kolla-ansible/ansible/module_utils/kolla_docker_worker.py
Michal Nasiadka cea076f379 Introduce oneshot docker_restart_policy
docker_restart_policy: no causes systemd units to not get created
and we use it in CI to disable restarts on services.

Introducing oneshot policy to not create systemd unit for oneshot
containers (those that are running bootstrap tasks, like db
bootstrap and don't need a systemd unit), but still create systemd
units for long lived containers but with Restart=No.

Change-Id: I9e0d656f19143ec2fcad7d6d345b2c9387551604
2023-11-14 15:17:50 +00:00

523 lines
19 KiB
Python

# Copyright 2015 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import docker
import json
import os
from ansible.module_utils.kolla_container_worker import COMPARE_CONFIG_CMD
from ansible.module_utils.kolla_container_worker import ContainerWorker
from distutils.version import StrictVersion
def get_docker_client():
return docker.APIClient
class DockerWorker(ContainerWorker):
def __init__(self, module):
super().__init__(module)
options = {
'version': self.params.get('api_version'),
'timeout': self.params.get('client_timeout'),
}
self.dc = get_docker_client()(**options)
self._cgroupns_mode_supported = (
StrictVersion(self.dc._version) >= StrictVersion('1.41'))
self._dimensions_kernel_memory_removed = (
StrictVersion(self.dc._version) >= StrictVersion('1.42'))
if self._dimensions_kernel_memory_removed:
self.dimension_map.pop('kernel_memory', None)
def generate_tls(self):
tls = {'verify': self.params.get('tls_verify')}
tls_cert = self.params.get('tls_cert'),
tls_key = self.params.get('tls_key'),
tls_cacert = self.params.get('tls_cacert')
if tls['verify']:
if tls_cert:
self.check_file(tls_cert)
self.check_file(tls_key)
tls['client_cert'] = (tls_cert, tls_key)
if tls_cacert:
self.check_file(tls_cacert)
tls['verify'] = tls_cacert
return docker.tls.TLSConfig(**tls)
def check_file(self, path):
if not os.path.isfile(path):
self.module.fail_json(
failed=True,
msg='There is no file at "{}"'.format(path)
)
if not os.access(path, os.R_OK):
self.module.fail_json(
failed=True,
msg='Permission denied for file at "{}"'.format(path)
)
def check_image(self):
find_image = ':'.join(self.parse_image())
for image in self.dc.images():
repo_tags = image.get('RepoTags')
if not repo_tags:
continue
for image_name in repo_tags:
if image_name == find_image:
return image
def check_volume(self):
for vol in self.dc.volumes()['Volumes'] or list():
if vol['Name'] == self.params.get('name'):
return vol
def check_container(self):
find_name = '/{}'.format(self.params.get('name'))
for cont in self.dc.containers(all=True):
if find_name in cont['Names']:
return cont
def get_container_info(self):
container = self.check_container()
if not container:
return None
return self.dc.inspect_container(self.params.get('name'))
def compare_pid_mode(self, container_info):
new_pid_mode = self.params.get('pid_mode')
current_pid_mode = container_info['HostConfig'].get('PidMode')
if not current_pid_mode:
current_pid_mode = None
if new_pid_mode != current_pid_mode:
return True
def compare_image(self, container_info=None):
container_info = container_info or self.get_container_info()
parse_repository_tag = docker.utils.parse_repository_tag
if not container_info:
return True
new_image = self.check_image()
current_image = container_info['Image']
if not new_image:
return True
if new_image['Id'] != current_image:
return True
# NOTE(Jeffrey4l) when new image and the current image have
# the same id, but the tag name different.
elif (parse_repository_tag(container_info['Config']['Image']) !=
parse_repository_tag(self.params.get('image'))):
return True
def compare_volumes(self, container_info):
volumes, binds = self.generate_volumes()
current_vols = container_info['Config'].get('Volumes')
current_binds = container_info['HostConfig'].get('Binds')
if not volumes:
volumes = list()
if not current_vols:
current_vols = list()
if not current_binds:
current_binds = list()
if set(volumes).symmetric_difference(set(current_vols)):
return True
new_binds = list()
if binds:
for k, v in binds.items():
new_binds.append("{}:{}:{}".format(k, v['bind'], v['mode']))
if set(new_binds).symmetric_difference(set(current_binds)):
return True
def compare_config(self):
try:
job = self.dc.exec_create(
self.params['name'],
COMPARE_CONFIG_CMD,
user='root',
)
output = self.dc.exec_start(job)
exec_inspect = self.dc.exec_inspect(job)
except docker.errors.APIError as e:
# NOTE(yoctozepto): If we have a client error, then the container
# cannot be used for config check (e.g., is restarting, or stopped
# in the mean time) - assume config is stale = return True.
# Else, propagate the server error back.
if e.is_client_error():
return True
else:
raise
# Exit codes:
# 0: not changed
# 1: changed
# 137: abrupt exit -> changed
# else: error
if exec_inspect['ExitCode'] == 0:
return False
elif exec_inspect['ExitCode'] == 1:
return True
elif exec_inspect['ExitCode'] == 137:
# NOTE(yoctozepto): This is Docker's command exit due to container
# exit. It means the container is unstable so we are better off
# marking it as requiring a restart due to config update.
return True
else:
raise Exception('Failed to compare container configuration: '
'ExitCode: %s Message: %s' %
(exec_inspect['ExitCode'], output))
def get_image_id(self):
full_image = self.params.get('image')
image = self.dc.images(name=full_image, quiet=True)
return image[0] if len(image) == 1 else None
def pull_image(self):
if self.params.get('auth_username'):
self.dc.login(
username=self.params.get('auth_username'),
password=self.params.get('auth_password'),
registry=self.params.get('auth_registry'),
email=self.params.get('auth_email')
)
image, tag = self.parse_image()
old_image_id = self.get_image_id()
statuses = [
json.loads(line.strip().decode('utf-8')) for line in self.dc.pull(
repository=image, tag=tag, stream=True
)
]
for status in reversed(statuses):
if 'error' in status:
if status['error'].endswith('not found'):
self.module.fail_json(
msg="The requested image does not exist: {}:{}".format(
image, tag),
failed=True
)
else:
self.module.fail_json(
msg="Unknown error message: {}".format(
status['error']),
failed=True
)
new_image_id = self.get_image_id()
self.changed = old_image_id != new_image_id
def remove_container(self):
self.changed |= self.systemd.remove_unit_file()
if self.check_container():
self.changed = True
# NOTE(jeffrey4l): in some case, docker failed to remove container
# filesystem and raise error. But the container info is
# disappeared already. If this happens, assume the container is
# removed.
try:
self.dc.remove_container(
container=self.params.get('name'),
force=True
)
self.systemd.remove_unit_file()
except docker.errors.APIError:
if self.check_container():
raise
def parse_dimensions(self, dimensions):
# When the data object contains types such as
# docker.types.Ulimit, Ansible will fail when these are
# returned via exit_json or fail_json. HostConfig is derived from dict,
# but its constructor requires additional arguments.
# to avoid that, here do copy the dimensions and return a new one.
dimensions = dimensions.copy()
supported = {'cpu_period', 'cpu_quota', 'cpu_shares',
'cpuset_cpus', 'cpuset_mems', 'mem_limit',
'mem_reservation', 'memswap_limit',
'kernel_memory', 'blkio_weight', 'ulimits'}
unsupported = set(dimensions) - supported
if unsupported:
self.module.exit_json(failed=True,
msg=repr("Unsupported dimensions"),
unsupported_dimensions=unsupported)
ulimits = dimensions.get('ulimits')
if ulimits:
dimensions['ulimits'] = self.build_ulimits(ulimits)
return dimensions
def build_ulimits(self, ulimits):
ulimits_opt = []
for key, value in ulimits.items():
soft = value.get('soft')
hard = value.get('hard')
ulimits_opt.append(docker.types.Ulimit(name=key,
soft=soft,
hard=hard))
return ulimits_opt
def build_host_config(self, binds):
options = {
'network_mode': 'host',
'ipc_mode': self.params.get('ipc_mode'),
'cap_add': self.params.get('cap_add'),
'security_opt': self.params.get('security_opt'),
'pid_mode': self.params.get('pid_mode'),
'privileged': self.params.get('privileged'),
'tmpfs': self.generate_tmpfs(),
'volumes_from': self.params.get('volumes_from')
}
dimensions = self.params.get('dimensions')
if dimensions:
dimensions = self.parse_dimensions(dimensions)
options.update(dimensions)
if binds:
options['binds'] = binds
host_config = self.dc.create_host_config(**options)
if self._cgroupns_mode_supported:
# NOTE(yoctozepto): python-docker does not support CgroupnsMode
# natively so we stuff it in manually.
cgroupns_mode = self.params.get('cgroupns_mode')
if cgroupns_mode is not None:
host_config['CgroupnsMode'] = cgroupns_mode
# detached containers should only log to journald
if self.params.get('detach'):
options['log_config'] = docker.types.LogConfig(
type=docker.types.LogConfig.types.NONE)
return host_config
def _inject_env_var(self, environment_info):
newenv = {
'KOLLA_SERVICE_NAME': self.params.get('name').replace('_', '-')
}
environment_info.update(newenv)
return environment_info
def _format_env_vars(self):
env = self._inject_env_var(self.params.get('environment'))
return {k: "" if env[k] is None else env[k] for k in env}
def build_container_options(self):
volumes, binds = self.generate_volumes()
options = {
'command': self.params.get('command'),
'detach': self.params.get('detach'),
'environment': self._format_env_vars(),
'host_config': self.build_host_config(binds),
'labels': self.params.get('labels'),
'image': self.params.get('image'),
'name': self.params.get('name'),
'volumes': volumes,
'tty': self.params.get('tty'),
}
healthcheck = self.parse_healthcheck(self.params.get('healthcheck'))
if healthcheck:
options.update(healthcheck)
return options
def create_container(self):
self.changed = True
options = self.build_container_options()
self.dc.create_container(**options)
if self.params.get('restart_policy') != 'oneshot':
self.changed |= self.systemd.create_unit_file()
def recreate_or_restart_container(self):
self.changed = True
container = self.check_container()
# get config_strategy from env
environment = self.params.get('environment')
config_strategy = environment.get('KOLLA_CONFIG_STRATEGY')
if not container:
self.start_container()
return
# If config_strategy is COPY_ONCE or container's parameters are
# changed, try to start a new one.
if config_strategy == 'COPY_ONCE' or self.check_container_differs():
# NOTE(mgoddard): Pull the image if necessary before stopping the
# container, otherwise a failure to pull the image will leave the
# container stopped.
if not self.check_image():
self.pull_image()
self.stop_container()
self.remove_container()
self.start_container()
elif config_strategy == 'COPY_ALWAYS':
self.restart_container()
def start_container(self):
if not self.check_image():
self.pull_image()
container = self.check_container()
if container and self.check_container_differs():
self.stop_container()
self.remove_container()
container = self.check_container()
if not container:
self.create_container()
container = self.check_container()
if not container['Status'].startswith('Up '):
self.changed = True
if self.params.get('restart_policy') == 'oneshot':
self.dc.start(container=self.params.get('name'))
else:
self.systemd.create_unit_file()
if not self.systemd.start():
self.module.fail_json(
changed=True,
msg="Container timed out",
**self.check_container())
# We do not want to detach so we wait around for container to exit
if not self.params.get('detach'):
rc = self.dc.wait(self.params.get('name'))
# NOTE(jeffrey4l): since python docker package 3.0, wait return a
# dict all the time.
if isinstance(rc, dict):
rc = rc['StatusCode']
# Include container's return code, standard output and error in the
# result.
self.result['rc'] = rc
self.result['stdout'] = self.dc.logs(self.params.get('name'),
stdout=True, stderr=False)
self.result['stderr'] = self.dc.logs(self.params.get('name'),
stdout=False, stderr=True)
if self.params.get('remove_on_exit'):
self.stop_container()
self.remove_container()
if rc != 0:
self.module.fail_json(
changed=True,
msg="Container exited with non-zero return code %s" % rc,
**self.result
)
def stop_container(self):
name = self.params.get('name')
graceful_timeout = self.params.get('graceful_timeout')
if not graceful_timeout:
graceful_timeout = 10
container = self.check_container()
if not container:
ignore_missing = self.params.get('ignore_missing')
if not ignore_missing:
self.module.fail_json(
msg="No such container: {} to stop".format(name))
elif not container['Status'].startswith('Exited '):
self.changed = True
if not self.systemd.check_unit_file():
self.dc.stop(name, timeout=graceful_timeout)
else:
self.systemd.stop()
def stop_and_remove_container(self):
container = self.check_container()
if container:
self.stop_container()
self.remove_container()
def restart_container(self):
name = self.params.get('name')
graceful_timeout = self.params.get('graceful_timeout')
if not graceful_timeout:
graceful_timeout = 10
info = self.get_container_info()
if not info:
self.module.fail_json(
msg="No such container: {}".format(name))
else:
self.changed = True
if self.params.get('restart_policy') != 'oneshot':
self.systemd.create_unit_file()
if not self.systemd.restart():
self.module.fail_json(
changed=True,
msg="Container timed out",
**self.check_container())
else:
self.dc.stop(name, timeout=graceful_timeout)
self.dc.start(name)
def create_volume(self):
if not self.check_volume():
self.changed = True
self.dc.create_volume(name=self.params.get('name'), driver='local')
def remove_volume(self):
if self.check_volume():
self.changed = True
try:
self.dc.remove_volume(name=self.params.get('name'))
except docker.errors.APIError as e:
if e.response.status_code == 409:
self.module.fail_json(
failed=True,
msg="Volume named '{}' is currently in-use".format(
self.params.get('name')
)
)
raise
def remove_image(self):
if self.check_image():
self.changed = True
try:
self.dc.remove_image(image=self.params.get('image'))
except docker.errors.APIError as e:
if e.response.status_code == 409:
self.module.fail_json(
failed=True,
msg="Image '{}' is currently in-use".format(
self.params.get('image')
)
)
elif e.response.status_code == 500:
self.module.fail_json(
failed=True,
msg="Server error"
)
raise
def ensure_image(self):
if not self.check_image():
self.pull_image()