Libvirt: escape kube cgroups and pid reaper
This PS moves to run the Libvirt process as a transient unit on the host, free fom k8s controlled cgroups. In addition it also uses the cloud archive provided libvirt/qemu packages. Change-Id: Idfe9ae6f072acd86f877df0c3dfe3db4c20902d6 Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
parent
500698398d
commit
25985f7b43
@ -30,6 +30,15 @@ if [[ -c /dev/kvm ]]; then
|
|||||||
chown root:kvm /dev/kvm
|
chown root:kvm /dev/kvm
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
#Setup Cgroups to use when breaking out of Kubernetes defined groups
|
||||||
|
CGROUPS=""
|
||||||
|
for CGROUP in cpu rdma hugetlb; do
|
||||||
|
if [ -d /sys/fs/cgroup/${CGROUP} ]; then
|
||||||
|
CGROUPS+="${CGROUP},"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
cgcreate -g ${CGROUPS%,}:/osh-libvirt
|
||||||
|
|
||||||
# We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu
|
# We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu
|
||||||
hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')"
|
hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')"
|
||||||
if [ 0"$hp_count" -gt 0 ]; then
|
if [ 0"$hp_count" -gt 0 ]; then
|
||||||
@ -55,11 +64,6 @@ if [ 0"$hp_count" -gt 0 ]; then
|
|||||||
# hugepage byte limit quota to zero out. This workaround sets that pod limit
|
# hugepage byte limit quota to zero out. This workaround sets that pod limit
|
||||||
# back to the total number of hugepage bytes available to the baremetal host.
|
# back to the total number of hugepage bytes available to the baremetal host.
|
||||||
if [ -d /sys/fs/cgroup/hugetlb ]; then
|
if [ -d /sys/fs/cgroup/hugetlb ]; then
|
||||||
# NOTE(portdirect): Kubelet will always create pod specific cgroups for
|
|
||||||
# hugetables so if the hugetlb cgroup is enabled, when k8s removes the pod
|
|
||||||
# it will also remove the hugetlb cgroup for the pod, taking any qemu
|
|
||||||
# processes with it.
|
|
||||||
echo "WARN: As the hugetlb cgroup is enabled, it will not be possible to restart the libvirt pod via k8s, without killing VMs."
|
|
||||||
for limit in $(ls /sys/fs/cgroup/hugetlb/kubepods/hugetlb.*.limit_in_bytes); do
|
for limit in $(ls /sys/fs/cgroup/hugetlb/kubepods/hugetlb.*.limit_in_bytes); do
|
||||||
target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
|
target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
|
||||||
# Ensure the write target for the hugepage limit for the pod exists
|
# Ensure the write target for the hugepage limit for the pod exists
|
||||||
@ -88,7 +92,8 @@ if [ 0"$hp_count" -gt 0 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] ; then
|
if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] ; then
|
||||||
libvirtd --listen &
|
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
|
||||||
|
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen &
|
||||||
|
|
||||||
tmpsecret=$(mktemp --suffix .xml)
|
tmpsecret=$(mktemp --suffix .xml)
|
||||||
function cleanup {
|
function cleanup {
|
||||||
@ -140,5 +145,6 @@ EOF
|
|||||||
# rejoin libvirtd
|
# rejoin libvirtd
|
||||||
wait
|
wait
|
||||||
else
|
else
|
||||||
exec libvirtd --listen
|
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
|
||||||
|
exec cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen
|
||||||
fi
|
fi
|
||||||
|
@ -51,6 +51,7 @@ spec:
|
|||||||
{{ .Values.labels.agent.libvirt.node_selector_key }}: {{ .Values.labels.agent.libvirt.node_selector_value }}
|
{{ .Values.labels.agent.libvirt.node_selector_key }}: {{ .Values.labels.agent.libvirt.node_selector_value }}
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
hostPID: true
|
hostPID: true
|
||||||
|
hostIPC: true
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
initContainers:
|
initContainers:
|
||||||
{{ tuple $envAll "pod_dependency" $mounts_libvirt_init | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
|
{{ tuple $envAll "pod_dependency" $mounts_libvirt_init | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
|
||||||
|
@ -27,7 +27,7 @@ labels:
|
|||||||
|
|
||||||
images:
|
images:
|
||||||
tags:
|
tags:
|
||||||
libvirt: docker.io/openstackhelm/libvirt:ubuntu-xenial-1.3.1
|
libvirt: docker.io/openstackhelm/libvirt:ubuntu-xenial-ocata
|
||||||
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
|
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
|
||||||
image_repo_sync: docker.io/docker:17.07.0
|
image_repo_sync: docker.io/docker:17.07.0
|
||||||
pull_policy: "IfNotPresent"
|
pull_policy: "IfNotPresent"
|
||||||
|
43
tools/images/libvirt/Dockerfile.ubuntu.xenial
Normal file
43
tools/images/libvirt/Dockerfile.ubuntu.xenial
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
FROM docker.io/ubuntu:xenial
|
||||||
|
MAINTAINER pete.birley@att.com
|
||||||
|
|
||||||
|
ARG TARGET_OPENSTACK_VERSION=ocata
|
||||||
|
ARG CEPH_RELEASE=luminous
|
||||||
|
ARG PROJECT=nova
|
||||||
|
ARG UID=42424
|
||||||
|
ARG GID=42424
|
||||||
|
|
||||||
|
ADD https://download.ceph.com/keys/release.asc /etc/apt/ceph-release.asc
|
||||||
|
RUN set -ex ;\
|
||||||
|
export DEBIAN_FRONTEND=noninteractive ;\
|
||||||
|
apt-key add /etc/apt/ceph-release.asc ;\
|
||||||
|
rm -f /etc/apt/ceph-release.asc ;\
|
||||||
|
echo "deb http://download.ceph.com/debian-${CEPH_RELEASE}/ xenial main" | tee /etc/apt/sources.list.d/ceph.list ;\
|
||||||
|
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 5EDB1B62EC4926EA ;\
|
||||||
|
echo "deb http://ubuntu-cloud.archive.canonical.com/ubuntu xenial-updates/${TARGET_OPENSTACK_VERSION} main" | tee /etc/apt/sources.list.d/cloud-archive.list ;\
|
||||||
|
apt-get update ;\
|
||||||
|
apt-get upgrade -y ;\
|
||||||
|
apt-get install --no-install-recommends -y \
|
||||||
|
ceph-common \
|
||||||
|
cgroup-tools \
|
||||||
|
dmidecode \
|
||||||
|
ebtables \
|
||||||
|
iproute2 \
|
||||||
|
libvirt-bin \
|
||||||
|
pm-utils \
|
||||||
|
qemu \
|
||||||
|
qemu-block-extra \
|
||||||
|
qemu-efi \
|
||||||
|
openvswitch-switch ;\
|
||||||
|
groupadd -g ${GID} ${PROJECT} ;\
|
||||||
|
useradd -u ${UID} -g ${PROJECT} -M -d /var/lib/${PROJECT} -s /usr/sbin/nologin -c "${PROJECT} user" ${PROJECT} ;\
|
||||||
|
mkdir -p /etc/${PROJECT} /var/log/${PROJECT} /var/lib/${PROJECT} /var/cache/${PROJECT} ;\
|
||||||
|
chown ${PROJECT}:${PROJECT} /etc/${PROJECT} /var/log/${PROJECT} /var/lib/${PROJECT} /var/cache/${PROJECT} ;\
|
||||||
|
usermod -a -G kvm ${PROJECT} ;\
|
||||||
|
apt-get clean -y ;\
|
||||||
|
rm -rf \
|
||||||
|
/var/cache/debconf/* \
|
||||||
|
/var/lib/apt/lists/* \
|
||||||
|
/var/log/* \
|
||||||
|
/tmp/* \
|
||||||
|
/var/tmp/*
|
46
tools/images/libvirt/Makefile
Normal file
46
tools/images/libvirt/Makefile
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# Copyright 2017 The Openstack-Helm Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# It's necessary to set this because some environments don't link sh -> bash.
|
||||||
|
SHELL := /bin/bash
|
||||||
|
|
||||||
|
TARGET_OPENSTACK_VERSION ?= ocata
|
||||||
|
DISTRO ?= ubuntu
|
||||||
|
DISTRO_RELEASE ?= xenial
|
||||||
|
CEPH_RELEASE ?= luminous
|
||||||
|
|
||||||
|
DOCKER_REGISTRY ?= docker.io
|
||||||
|
IMAGE_NAME ?= libvirt
|
||||||
|
IMAGE_PREFIX ?= openstackhelm
|
||||||
|
IMAGE_TAG ?= $(DISTRO)-$(DISTRO_RELEASE)-$(TARGET_OPENSTACK_VERSION)
|
||||||
|
LABEL ?= putlabelshere
|
||||||
|
|
||||||
|
IMAGE := ${DOCKER_REGISTRY}/${IMAGE_PREFIX}/${IMAGE_NAME}:${IMAGE_TAG}
|
||||||
|
|
||||||
|
# Build libvirt Docker image for this project
|
||||||
|
.PHONY: images
|
||||||
|
images: build_$(IMAGE_NAME)
|
||||||
|
|
||||||
|
# Make targets intended for use by the primary targets above.
|
||||||
|
.PHONY: build_$(IMAGE_NAME)
|
||||||
|
build_$(IMAGE_NAME):
|
||||||
|
docker build \
|
||||||
|
--network=host \
|
||||||
|
--force-rm \
|
||||||
|
--file=./Dockerfile.${DISTRO}.xenial \
|
||||||
|
--build-arg TARGET_OPENSTACK_VERSION="${TARGET_OPENSTACK_VERSION}" \
|
||||||
|
--build-arg CEPH_RELEASE="${CEPH_RELEASE}" \
|
||||||
|
--label $(LABEL) \
|
||||||
|
-t $(IMAGE) \
|
||||||
|
.
|
48
tools/images/libvirt/README.rst
Normal file
48
tools/images/libvirt/README.rst
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
Libvirt Container
|
||||||
|
=================
|
||||||
|
|
||||||
|
This container builds a small image with Libvirt for use with OpenStack-Helm.
|
||||||
|
|
||||||
|
Instructions
|
||||||
|
------------
|
||||||
|
|
||||||
|
OS Specific Host setup:
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Ubuntu:
|
||||||
|
^^^^^^^
|
||||||
|
|
||||||
|
From a freshly provisioned Ubuntu 16.04 LTS host run:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y \
|
||||||
|
docker.io \
|
||||||
|
git
|
||||||
|
|
||||||
|
Build the Libvirt Image
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
A known good image is published to dockerhub on a fairly regular basis, but if
|
||||||
|
you wish to build your own image, from the root directory of the OpenStack-Helm
|
||||||
|
repo run:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
TARGET_OPENSTACK_VERSION=ocata
|
||||||
|
DISTRO=ubuntu
|
||||||
|
DISTRO_RELEASE=xenial
|
||||||
|
CEPH_RELEASE=luminous
|
||||||
|
|
||||||
|
sudo docker build \
|
||||||
|
--network=host \
|
||||||
|
--force-rm \
|
||||||
|
--pull \
|
||||||
|
--no-cache \
|
||||||
|
--file=./tools/images/libvirt/Dockerfile.${DISTRO}.xenial \
|
||||||
|
--build-arg TARGET_OPENSTACK_VERSION="${TARGET_OPENSTACK_VERSION}" \
|
||||||
|
--build-arg CEPH_RELEASE="${CEPH_RELEASE}" \
|
||||||
|
-t docker.io/openstackhelm/libvirt:${DISTRO}-${DISTRO_RELEASE}-${TARGET_OPENSTACK_VERSION} \
|
||||||
|
tools/images/libvirt
|
||||||
|
sudo docker push docker.io/openstackhelm/libvirt:${DISTRO}-${DISTRO_RELEASE}-${TARGET_OPENSTACK_VERSION}
|
Loading…
Reference in New Issue
Block a user