Add ability to retry image pulling

Sometimes, the registries may intermittently fail to deliver the
images. This is often seen in the CI, though it also happens with
production deployments, even those with internal registries and/or
registry mirrors - due to sheer load when trying to pull the
images from many hosts.

This patchs adds two new vars to control retry behaviour.
The default has been set to make users happier by default. :-)

Change-Id: I81ad7d8642654f8474f11084c6934aab40243d35
This commit is contained in:
Radosław Piliszek 2021-08-07 14:30:55 +00:00
parent 578ca1d647
commit cbb567cb86
5 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,7 @@
---
# Kolla image pulling settings: the amount of retries and the delay (in seconds)
# between them. These are useful if your registry is not 100% reliable (usually
# due to load). They modify the Ansible image pulling task params ``retries``
# and ``delay``, respectively.
service_images_pull_retries: 3
service_images_pull_delay: 5

View File

@ -7,6 +7,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ service.image }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
with_dict: "{{ lookup('vars', (kolla_role_name | default(project_name)) + '_services') | select_services_enabled_and_mapped_to_host }}"
loop_control:
label: "{{ item.key }}"

View File

@ -93,3 +93,9 @@ swift_ks_users:
user: "{{ swift_keystone_user }}"
password: "{{ swift_keystone_password }}"
role: "admin"
# FIXME(yoctozepto): These are copied from service-images-pull role.
# Remove when the Swift role is finally migrated to new style.
service_images_pull_retries: 3
service_images_pull_delay: 5

View File

@ -5,6 +5,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_rsyncd_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-account-server'] or
inventory_hostname in groups['swift-container-server'] or
inventory_hostname in groups['swift-object-server']
@ -15,6 +19,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_proxy_server_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-proxy-server']
- name: Pulling swift-account image
@ -23,6 +31,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_account_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-account-server']
- name: Pulling swift-container image
@ -31,6 +43,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_container_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-container-server']
- name: Pulling swift-object image
@ -39,6 +55,10 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_object_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-object-server']
- name: Pulling swift-object-expirer image
@ -47,4 +67,8 @@
action: "pull_image"
common_options: "{{ docker_common_options }}"
image: "{{ swift_object_expirer_image_full }}"
retries: "{{ service_images_pull_retries }}"
delay: "{{ service_images_pull_delay }}"
register: result
until: result is success
when: inventory_hostname in groups['swift-object-server']

View File

@ -0,0 +1,9 @@
---
features:
- |
Adds two new variables ``service_images_pull_retries`` and
``service_images_pull_delay`` which control the behaviour of image
pulling tasks. These are useful if your registry is not 100%
reliable (usually due to load). The defaults have been set to
3 retries and 5 seconds delay to ensure a better default experience
(these are actually Ansible defaults when task retries are enabled).