1497ab2ab3
MariaDB bootstrap has a phase where the first MariaDB container is running with Galera bootstrap - after a check that WSREP is synced is successful - we restart the container. The bootstrap container is named mariadb and running with docker_restart_policy: "no" - the restarted container should be running in systemd. Before this patch the code created a systemd unit but it was initially stopped - so stopping was always a success - and the container would be killed with SIGKILL on removal (which obviously breaks MariaDB). This patch also improves docker/systemd stops by waiting for real unit/container stop and adds failing CI for containers that are killed with signal 9. Closes-Bug: #2029613 Change-Id: I0a03e509ce228a50e081fcab44d2b4831251190c
525 lines
19 KiB
Python
525 lines
19 KiB
Python
# Copyright 2015 Sam Yaple
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# FIXME(yoctozepto): restart_policy is *not* checked in the container
|
|
|
|
import docker
|
|
import json
|
|
import os
|
|
|
|
from ansible.module_utils.kolla_container_worker import COMPARE_CONFIG_CMD
|
|
from ansible.module_utils.kolla_container_worker import ContainerWorker
|
|
|
|
from distutils.version import StrictVersion
|
|
|
|
|
|
def get_docker_client():
|
|
return docker.APIClient
|
|
|
|
|
|
class DockerWorker(ContainerWorker):
|
|
|
|
def __init__(self, module):
|
|
super().__init__(module)
|
|
|
|
options = {
|
|
'version': self.params.get('api_version'),
|
|
'timeout': self.params.get('client_timeout'),
|
|
}
|
|
|
|
self.dc = get_docker_client()(**options)
|
|
|
|
self._cgroupns_mode_supported = (
|
|
StrictVersion(self.dc._version) >= StrictVersion('1.41'))
|
|
self._dimensions_kernel_memory_removed = (
|
|
StrictVersion(self.dc._version) >= StrictVersion('1.42'))
|
|
|
|
if self._dimensions_kernel_memory_removed:
|
|
self.dimension_map.pop('kernel_memory', None)
|
|
|
|
def generate_tls(self):
|
|
tls = {'verify': self.params.get('tls_verify')}
|
|
tls_cert = self.params.get('tls_cert'),
|
|
tls_key = self.params.get('tls_key'),
|
|
tls_cacert = self.params.get('tls_cacert')
|
|
|
|
if tls['verify']:
|
|
if tls_cert:
|
|
self.check_file(tls_cert)
|
|
self.check_file(tls_key)
|
|
tls['client_cert'] = (tls_cert, tls_key)
|
|
if tls_cacert:
|
|
self.check_file(tls_cacert)
|
|
tls['verify'] = tls_cacert
|
|
|
|
return docker.tls.TLSConfig(**tls)
|
|
|
|
def check_file(self, path):
|
|
if not os.path.isfile(path):
|
|
self.module.fail_json(
|
|
failed=True,
|
|
msg='There is no file at "{}"'.format(path)
|
|
)
|
|
if not os.access(path, os.R_OK):
|
|
self.module.fail_json(
|
|
failed=True,
|
|
msg='Permission denied for file at "{}"'.format(path)
|
|
)
|
|
|
|
def check_image(self):
|
|
find_image = ':'.join(self.parse_image())
|
|
for image in self.dc.images():
|
|
repo_tags = image.get('RepoTags')
|
|
if not repo_tags:
|
|
continue
|
|
for image_name in repo_tags:
|
|
if image_name == find_image:
|
|
return image
|
|
|
|
def check_volume(self):
|
|
for vol in self.dc.volumes()['Volumes'] or list():
|
|
if vol['Name'] == self.params.get('name'):
|
|
return vol
|
|
|
|
def check_container(self):
|
|
find_name = '/{}'.format(self.params.get('name'))
|
|
for cont in self.dc.containers(all=True):
|
|
if find_name in cont['Names']:
|
|
return cont
|
|
|
|
def get_container_info(self):
|
|
container = self.check_container()
|
|
if not container:
|
|
return None
|
|
return self.dc.inspect_container(self.params.get('name'))
|
|
|
|
def compare_pid_mode(self, container_info):
|
|
new_pid_mode = self.params.get('pid_mode')
|
|
current_pid_mode = container_info['HostConfig'].get('PidMode')
|
|
if not current_pid_mode:
|
|
current_pid_mode = None
|
|
|
|
if new_pid_mode != current_pid_mode:
|
|
return True
|
|
|
|
def compare_image(self, container_info=None):
|
|
container_info = container_info or self.get_container_info()
|
|
parse_repository_tag = docker.utils.parse_repository_tag
|
|
if not container_info:
|
|
return True
|
|
new_image = self.check_image()
|
|
current_image = container_info['Image']
|
|
if not new_image:
|
|
return True
|
|
if new_image['Id'] != current_image:
|
|
return True
|
|
# NOTE(Jeffrey4l) when new image and the current image have
|
|
# the same id, but the tag name different.
|
|
elif (parse_repository_tag(container_info['Config']['Image']) !=
|
|
parse_repository_tag(self.params.get('image'))):
|
|
return True
|
|
|
|
def compare_volumes(self, container_info):
|
|
volumes, binds = self.generate_volumes()
|
|
current_vols = container_info['Config'].get('Volumes')
|
|
current_binds = container_info['HostConfig'].get('Binds')
|
|
if not volumes:
|
|
volumes = list()
|
|
if not current_vols:
|
|
current_vols = list()
|
|
if not current_binds:
|
|
current_binds = list()
|
|
|
|
if set(volumes).symmetric_difference(set(current_vols)):
|
|
return True
|
|
|
|
new_binds = list()
|
|
if binds:
|
|
for k, v in binds.items():
|
|
new_binds.append("{}:{}:{}".format(k, v['bind'], v['mode']))
|
|
|
|
if set(new_binds).symmetric_difference(set(current_binds)):
|
|
return True
|
|
|
|
def compare_config(self):
|
|
try:
|
|
job = self.dc.exec_create(
|
|
self.params['name'],
|
|
COMPARE_CONFIG_CMD,
|
|
user='root',
|
|
)
|
|
output = self.dc.exec_start(job)
|
|
exec_inspect = self.dc.exec_inspect(job)
|
|
except docker.errors.APIError as e:
|
|
# NOTE(yoctozepto): If we have a client error, then the container
|
|
# cannot be used for config check (e.g., is restarting, or stopped
|
|
# in the mean time) - assume config is stale = return True.
|
|
# Else, propagate the server error back.
|
|
if e.is_client_error():
|
|
return True
|
|
else:
|
|
raise
|
|
# Exit codes:
|
|
# 0: not changed
|
|
# 1: changed
|
|
# 137: abrupt exit -> changed
|
|
# else: error
|
|
if exec_inspect['ExitCode'] == 0:
|
|
return False
|
|
elif exec_inspect['ExitCode'] == 1:
|
|
return True
|
|
elif exec_inspect['ExitCode'] == 137:
|
|
# NOTE(yoctozepto): This is Docker's command exit due to container
|
|
# exit. It means the container is unstable so we are better off
|
|
# marking it as requiring a restart due to config update.
|
|
return True
|
|
else:
|
|
raise Exception('Failed to compare container configuration: '
|
|
'ExitCode: %s Message: %s' %
|
|
(exec_inspect['ExitCode'], output))
|
|
|
|
def get_image_id(self):
|
|
full_image = self.params.get('image')
|
|
|
|
image = self.dc.images(name=full_image, quiet=True)
|
|
return image[0] if len(image) == 1 else None
|
|
|
|
def pull_image(self):
|
|
if self.params.get('auth_username'):
|
|
self.dc.login(
|
|
username=self.params.get('auth_username'),
|
|
password=self.params.get('auth_password'),
|
|
registry=self.params.get('auth_registry'),
|
|
email=self.params.get('auth_email')
|
|
)
|
|
|
|
image, tag = self.parse_image()
|
|
old_image_id = self.get_image_id()
|
|
|
|
statuses = [
|
|
json.loads(line.strip().decode('utf-8')) for line in self.dc.pull(
|
|
repository=image, tag=tag, stream=True
|
|
)
|
|
]
|
|
|
|
for status in reversed(statuses):
|
|
if 'error' in status:
|
|
if status['error'].endswith('not found'):
|
|
self.module.fail_json(
|
|
msg="The requested image does not exist: {}:{}".format(
|
|
image, tag),
|
|
failed=True
|
|
)
|
|
else:
|
|
self.module.fail_json(
|
|
msg="Unknown error message: {}".format(
|
|
status['error']),
|
|
failed=True
|
|
)
|
|
|
|
new_image_id = self.get_image_id()
|
|
self.changed = old_image_id != new_image_id
|
|
|
|
def remove_container(self):
|
|
self.changed |= self.systemd.remove_unit_file()
|
|
if self.check_container():
|
|
self.changed = True
|
|
# NOTE(jeffrey4l): in some case, docker failed to remove container
|
|
# filesystem and raise error. But the container info is
|
|
# disappeared already. If this happens, assume the container is
|
|
# removed.
|
|
try:
|
|
self.dc.remove_container(
|
|
container=self.params.get('name'),
|
|
force=True
|
|
)
|
|
self.systemd.remove_unit_file()
|
|
except docker.errors.APIError:
|
|
if self.check_container():
|
|
raise
|
|
|
|
def parse_dimensions(self, dimensions):
|
|
# When the data object contains types such as
|
|
# docker.types.Ulimit, Ansible will fail when these are
|
|
# returned via exit_json or fail_json. HostConfig is derived from dict,
|
|
# but its constructor requires additional arguments.
|
|
# to avoid that, here do copy the dimensions and return a new one.
|
|
dimensions = dimensions.copy()
|
|
|
|
supported = {'cpu_period', 'cpu_quota', 'cpu_shares',
|
|
'cpuset_cpus', 'cpuset_mems', 'mem_limit',
|
|
'mem_reservation', 'memswap_limit',
|
|
'kernel_memory', 'blkio_weight', 'ulimits'}
|
|
unsupported = set(dimensions) - supported
|
|
if unsupported:
|
|
self.module.exit_json(failed=True,
|
|
msg=repr("Unsupported dimensions"),
|
|
unsupported_dimensions=unsupported)
|
|
|
|
ulimits = dimensions.get('ulimits')
|
|
if ulimits:
|
|
dimensions['ulimits'] = self.build_ulimits(ulimits)
|
|
|
|
return dimensions
|
|
|
|
def build_ulimits(self, ulimits):
|
|
ulimits_opt = []
|
|
for key, value in ulimits.items():
|
|
soft = value.get('soft')
|
|
hard = value.get('hard')
|
|
ulimits_opt.append(docker.types.Ulimit(name=key,
|
|
soft=soft,
|
|
hard=hard))
|
|
return ulimits_opt
|
|
|
|
def build_host_config(self, binds):
|
|
options = {
|
|
'network_mode': 'host',
|
|
'ipc_mode': self.params.get('ipc_mode'),
|
|
'cap_add': self.params.get('cap_add'),
|
|
'security_opt': self.params.get('security_opt'),
|
|
'pid_mode': self.params.get('pid_mode'),
|
|
'privileged': self.params.get('privileged'),
|
|
'tmpfs': self.generate_tmpfs(),
|
|
'volumes_from': self.params.get('volumes_from')
|
|
}
|
|
|
|
dimensions = self.params.get('dimensions')
|
|
|
|
if dimensions:
|
|
dimensions = self.parse_dimensions(dimensions)
|
|
options.update(dimensions)
|
|
|
|
if binds:
|
|
options['binds'] = binds
|
|
|
|
host_config = self.dc.create_host_config(**options)
|
|
|
|
if self._cgroupns_mode_supported:
|
|
# NOTE(yoctozepto): python-docker does not support CgroupnsMode
|
|
# natively so we stuff it in manually.
|
|
cgroupns_mode = self.params.get('cgroupns_mode')
|
|
if cgroupns_mode is not None:
|
|
host_config['CgroupnsMode'] = cgroupns_mode
|
|
|
|
# detached containers should only log to journald
|
|
if self.params.get('detach'):
|
|
options['log_config'] = docker.types.LogConfig(
|
|
type=docker.types.LogConfig.types.NONE)
|
|
|
|
return host_config
|
|
|
|
def _inject_env_var(self, environment_info):
|
|
newenv = {
|
|
'KOLLA_SERVICE_NAME': self.params.get('name').replace('_', '-')
|
|
}
|
|
environment_info.update(newenv)
|
|
return environment_info
|
|
|
|
def _format_env_vars(self):
|
|
env = self._inject_env_var(self.params.get('environment'))
|
|
return {k: "" if env[k] is None else env[k] for k in env}
|
|
|
|
def build_container_options(self):
|
|
volumes, binds = self.generate_volumes()
|
|
|
|
options = {
|
|
'command': self.params.get('command'),
|
|
'detach': self.params.get('detach'),
|
|
'environment': self._format_env_vars(),
|
|
'host_config': self.build_host_config(binds),
|
|
'labels': self.params.get('labels'),
|
|
'image': self.params.get('image'),
|
|
'name': self.params.get('name'),
|
|
'volumes': volumes,
|
|
'tty': self.params.get('tty'),
|
|
}
|
|
|
|
healthcheck = self.parse_healthcheck(self.params.get('healthcheck'))
|
|
if healthcheck:
|
|
options.update(healthcheck)
|
|
|
|
return options
|
|
|
|
def create_container(self):
|
|
self.changed = True
|
|
options = self.build_container_options()
|
|
self.dc.create_container(**options)
|
|
if self.params.get('restart_policy') != 'no':
|
|
self.changed |= self.systemd.create_unit_file()
|
|
|
|
def recreate_or_restart_container(self):
|
|
self.changed = True
|
|
container = self.check_container()
|
|
# get config_strategy from env
|
|
environment = self.params.get('environment')
|
|
config_strategy = environment.get('KOLLA_CONFIG_STRATEGY')
|
|
|
|
if not container:
|
|
self.start_container()
|
|
return
|
|
# If config_strategy is COPY_ONCE or container's parameters are
|
|
# changed, try to start a new one.
|
|
if config_strategy == 'COPY_ONCE' or self.check_container_differs():
|
|
# NOTE(mgoddard): Pull the image if necessary before stopping the
|
|
# container, otherwise a failure to pull the image will leave the
|
|
# container stopped.
|
|
if not self.check_image():
|
|
self.pull_image()
|
|
self.stop_container()
|
|
self.remove_container()
|
|
self.start_container()
|
|
elif config_strategy == 'COPY_ALWAYS':
|
|
self.restart_container()
|
|
|
|
def start_container(self):
|
|
if not self.check_image():
|
|
self.pull_image()
|
|
|
|
container = self.check_container()
|
|
if container and self.check_container_differs():
|
|
self.stop_container()
|
|
self.remove_container()
|
|
container = self.check_container()
|
|
|
|
if not container:
|
|
self.create_container()
|
|
container = self.check_container()
|
|
|
|
if not container['Status'].startswith('Up '):
|
|
self.changed = True
|
|
if self.params.get('restart_policy') == 'no':
|
|
self.dc.start(container=self.params.get('name'))
|
|
else:
|
|
self.systemd.create_unit_file()
|
|
if not self.systemd.start():
|
|
self.module.fail_json(
|
|
changed=True,
|
|
msg="Container timed out",
|
|
**self.check_container())
|
|
|
|
# We do not want to detach so we wait around for container to exit
|
|
if not self.params.get('detach'):
|
|
rc = self.dc.wait(self.params.get('name'))
|
|
# NOTE(jeffrey4l): since python docker package 3.0, wait return a
|
|
# dict all the time.
|
|
if isinstance(rc, dict):
|
|
rc = rc['StatusCode']
|
|
# Include container's return code, standard output and error in the
|
|
# result.
|
|
self.result['rc'] = rc
|
|
self.result['stdout'] = self.dc.logs(self.params.get('name'),
|
|
stdout=True, stderr=False)
|
|
self.result['stderr'] = self.dc.logs(self.params.get('name'),
|
|
stdout=False, stderr=True)
|
|
if self.params.get('remove_on_exit'):
|
|
self.stop_container()
|
|
self.remove_container()
|
|
if rc != 0:
|
|
self.module.fail_json(
|
|
changed=True,
|
|
msg="Container exited with non-zero return code %s" % rc,
|
|
**self.result
|
|
)
|
|
|
|
def stop_container(self):
|
|
name = self.params.get('name')
|
|
graceful_timeout = self.params.get('graceful_timeout')
|
|
if not graceful_timeout:
|
|
graceful_timeout = 10
|
|
container = self.check_container()
|
|
if not container:
|
|
ignore_missing = self.params.get('ignore_missing')
|
|
if not ignore_missing:
|
|
self.module.fail_json(
|
|
msg="No such container: {} to stop".format(name))
|
|
elif not container['Status'].startswith('Exited '):
|
|
self.changed = True
|
|
if not self.systemd.check_unit_file():
|
|
self.dc.stop(name, timeout=graceful_timeout)
|
|
else:
|
|
self.systemd.stop()
|
|
|
|
def stop_and_remove_container(self):
|
|
container = self.check_container()
|
|
if container:
|
|
self.stop_container()
|
|
self.remove_container()
|
|
|
|
def restart_container(self):
|
|
name = self.params.get('name')
|
|
graceful_timeout = self.params.get('graceful_timeout')
|
|
if not graceful_timeout:
|
|
graceful_timeout = 10
|
|
info = self.get_container_info()
|
|
if not info:
|
|
self.module.fail_json(
|
|
msg="No such container: {}".format(name))
|
|
else:
|
|
self.changed = True
|
|
if self.params.get('restart_policy') != 'no':
|
|
self.systemd.create_unit_file()
|
|
if not self.systemd.restart():
|
|
self.module.fail_json(
|
|
changed=True,
|
|
msg="Container timed out",
|
|
**self.check_container())
|
|
else:
|
|
self.dc.stop(name, timeout=graceful_timeout)
|
|
self.dc.start(name)
|
|
|
|
def create_volume(self):
|
|
if not self.check_volume():
|
|
self.changed = True
|
|
self.dc.create_volume(name=self.params.get('name'), driver='local')
|
|
|
|
def remove_volume(self):
|
|
if self.check_volume():
|
|
self.changed = True
|
|
try:
|
|
self.dc.remove_volume(name=self.params.get('name'))
|
|
except docker.errors.APIError as e:
|
|
if e.response.status_code == 409:
|
|
self.module.fail_json(
|
|
failed=True,
|
|
msg="Volume named '{}' is currently in-use".format(
|
|
self.params.get('name')
|
|
)
|
|
)
|
|
raise
|
|
|
|
def remove_image(self):
|
|
if self.check_image():
|
|
self.changed = True
|
|
try:
|
|
self.dc.remove_image(image=self.params.get('image'))
|
|
except docker.errors.APIError as e:
|
|
if e.response.status_code == 409:
|
|
self.module.fail_json(
|
|
failed=True,
|
|
msg="Image '{}' is currently in-use".format(
|
|
self.params.get('image')
|
|
)
|
|
)
|
|
elif e.response.status_code == 500:
|
|
self.module.fail_json(
|
|
failed=True,
|
|
msg="Server error"
|
|
)
|
|
raise
|
|
|
|
def ensure_image(self):
|
|
if not self.check_image():
|
|
self.pull_image()
|