Libvirt: escape kube cgroups and pid reaper

This PS moves to run the Libvirt process as a transient unit
on the host, free fom k8s controlled cgroups. In addition it
also uses the cloud archive provided libvirt/qemu packages.

Change-Id: Idfe9ae6f072acd86f877df0c3dfe3db4c20902d6
Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
Pete Birley 2018-10-01 19:10:47 -05:00 committed by Pete Birley
parent 500698398d
commit 25985f7b43
6 changed files with 152 additions and 8 deletions

View File

@ -30,6 +30,15 @@ if [[ -c /dev/kvm ]]; then
chown root:kvm /dev/kvm
fi
#Setup Cgroups to use when breaking out of Kubernetes defined groups
CGROUPS=""
for CGROUP in cpu rdma hugetlb; do
if [ -d /sys/fs/cgroup/${CGROUP} ]; then
CGROUPS+="${CGROUP},"
fi
done
cgcreate -g ${CGROUPS%,}:/osh-libvirt
# We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu
hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')"
if [ 0"$hp_count" -gt 0 ]; then
@ -55,11 +64,6 @@ if [ 0"$hp_count" -gt 0 ]; then
# hugepage byte limit quota to zero out. This workaround sets that pod limit
# back to the total number of hugepage bytes available to the baremetal host.
if [ -d /sys/fs/cgroup/hugetlb ]; then
# NOTE(portdirect): Kubelet will always create pod specific cgroups for
# hugetables so if the hugetlb cgroup is enabled, when k8s removes the pod
# it will also remove the hugetlb cgroup for the pod, taking any qemu
# processes with it.
echo "WARN: As the hugetlb cgroup is enabled, it will not be possible to restart the libvirt pod via k8s, without killing VMs."
for limit in $(ls /sys/fs/cgroup/hugetlb/kubepods/hugetlb.*.limit_in_bytes); do
target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
# Ensure the write target for the hugepage limit for the pod exists
@ -88,7 +92,8 @@ if [ 0"$hp_count" -gt 0 ]; then
fi
if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] ; then
libvirtd --listen &
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen &
tmpsecret=$(mktemp --suffix .xml)
function cleanup {
@ -140,5 +145,6 @@ EOF
# rejoin libvirtd
wait
else
exec libvirtd --listen
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
exec cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen
fi

View File

@ -51,6 +51,7 @@ spec:
{{ .Values.labels.agent.libvirt.node_selector_key }}: {{ .Values.labels.agent.libvirt.node_selector_value }}
hostNetwork: true
hostPID: true
hostIPC: true
dnsPolicy: ClusterFirstWithHostNet
initContainers:
{{ tuple $envAll "pod_dependency" $mounts_libvirt_init | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}

View File

@ -27,7 +27,7 @@ labels:
images:
tags:
libvirt: docker.io/openstackhelm/libvirt:ubuntu-xenial-1.3.1
libvirt: docker.io/openstackhelm/libvirt:ubuntu-xenial-ocata
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: "IfNotPresent"

View File

@ -0,0 +1,43 @@
FROM docker.io/ubuntu:xenial
MAINTAINER pete.birley@att.com
ARG TARGET_OPENSTACK_VERSION=ocata
ARG CEPH_RELEASE=luminous
ARG PROJECT=nova
ARG UID=42424
ARG GID=42424
ADD https://download.ceph.com/keys/release.asc /etc/apt/ceph-release.asc
RUN set -ex ;\
export DEBIAN_FRONTEND=noninteractive ;\
apt-key add /etc/apt/ceph-release.asc ;\
rm -f /etc/apt/ceph-release.asc ;\
echo "deb http://download.ceph.com/debian-${CEPH_RELEASE}/ xenial main" | tee /etc/apt/sources.list.d/ceph.list ;\
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5EDB1B62EC4926EA ;\
echo "deb http://ubuntu-cloud.archive.canonical.com/ubuntu xenial-updates/${TARGET_OPENSTACK_VERSION} main" | tee /etc/apt/sources.list.d/cloud-archive.list ;\
apt-get update ;\
apt-get upgrade -y ;\
apt-get install --no-install-recommends -y \
ceph-common \
cgroup-tools \
dmidecode \
ebtables \
iproute2 \
libvirt-bin \
pm-utils \
qemu \
qemu-block-extra \
qemu-efi \
openvswitch-switch ;\
groupadd -g ${GID} ${PROJECT} ;\
useradd -u ${UID} -g ${PROJECT} -M -d /var/lib/${PROJECT} -s /usr/sbin/nologin -c "${PROJECT} user" ${PROJECT} ;\
mkdir -p /etc/${PROJECT} /var/log/${PROJECT} /var/lib/${PROJECT} /var/cache/${PROJECT} ;\
chown ${PROJECT}:${PROJECT} /etc/${PROJECT} /var/log/${PROJECT} /var/lib/${PROJECT} /var/cache/${PROJECT} ;\
usermod -a -G kvm ${PROJECT} ;\
apt-get clean -y ;\
rm -rf \
/var/cache/debconf/* \
/var/lib/apt/lists/* \
/var/log/* \
/tmp/* \
/var/tmp/*

View File

@ -0,0 +1,46 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# It's necessary to set this because some environments don't link sh -> bash.
SHELL := /bin/bash
TARGET_OPENSTACK_VERSION ?= ocata
DISTRO ?= ubuntu
DISTRO_RELEASE ?= xenial
CEPH_RELEASE ?= luminous
DOCKER_REGISTRY ?= docker.io
IMAGE_NAME ?= libvirt
IMAGE_PREFIX ?= openstackhelm
IMAGE_TAG ?= $(DISTRO)-$(DISTRO_RELEASE)-$(TARGET_OPENSTACK_VERSION)
LABEL ?= putlabelshere
IMAGE := ${DOCKER_REGISTRY}/${IMAGE_PREFIX}/${IMAGE_NAME}:${IMAGE_TAG}
# Build libvirt Docker image for this project
.PHONY: images
images: build_$(IMAGE_NAME)
# Make targets intended for use by the primary targets above.
.PHONY: build_$(IMAGE_NAME)
build_$(IMAGE_NAME):
docker build \
--network=host \
--force-rm \
--file=./Dockerfile.${DISTRO}.xenial \
--build-arg TARGET_OPENSTACK_VERSION="${TARGET_OPENSTACK_VERSION}" \
--build-arg CEPH_RELEASE="${CEPH_RELEASE}" \
--label $(LABEL) \
-t $(IMAGE) \
.

View File

@ -0,0 +1,48 @@
Libvirt Container
=================
This container builds a small image with Libvirt for use with OpenStack-Helm.
Instructions
------------
OS Specific Host setup:
~~~~~~~~~~~~~~~~~~~~~~~
Ubuntu:
^^^^^^^
From a freshly provisioned Ubuntu 16.04 LTS host run:
.. code:: bash
sudo apt-get update -y
sudo apt-get install -y \
docker.io \
git
Build the Libvirt Image
~~~~~~~~~~~~~~~~~~~~~~~
A known good image is published to dockerhub on a fairly regular basis, but if
you wish to build your own image, from the root directory of the OpenStack-Helm
repo run:
.. code:: bash
TARGET_OPENSTACK_VERSION=ocata
DISTRO=ubuntu
DISTRO_RELEASE=xenial
CEPH_RELEASE=luminous
sudo docker build \
--network=host \
--force-rm \
--pull \
--no-cache \
--file=./tools/images/libvirt/Dockerfile.${DISTRO}.xenial \
--build-arg TARGET_OPENSTACK_VERSION="${TARGET_OPENSTACK_VERSION}" \
--build-arg CEPH_RELEASE="${CEPH_RELEASE}" \
-t docker.io/openstackhelm/libvirt:${DISTRO}-${DISTRO_RELEASE}-${TARGET_OPENSTACK_VERSION} \
tools/images/libvirt
sudo docker push docker.io/openstackhelm/libvirt:${DISTRO}-${DISTRO_RELEASE}-${TARGET_OPENSTACK_VERSION}