Use multinode nodeset for compute-kit jobs

For recent releases we use 32GB nodes for compute-kit
jobs. The number of such nodes is extremely limited.
So we'd better use multinode nodesets for compute-kit
jobs.

We deploy K8s using kubeadm and then we set labels to the
K8s nodes so charts can use these labels for node selectors.
We deploy L3 agent only on the node where we run test scripts.

This is because we want test virual router to be always created
on this node. Otherwise the L2 overlay needs to be created
to emulate provider network (will be implemented later).

Glance is deployed w/o backend storage (will be fixed later).

Change-Id: Id2eb639fb67d41006940a7d7b45a865b2f1124f7
This commit is contained in:
Vladimir Kozhukalov 2023-07-20 14:59:23 +03:00
parent 91c8a5baf2
commit 2024cc361e
12 changed files with 562 additions and 5 deletions

View File

@ -0,0 +1,48 @@
#!/bin/bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
# Add labels to the core namespaces & nodes
kubectl label --overwrite namespace default name=default
kubectl label --overwrite namespace kube-system name=kube-system
kubectl label --overwrite namespace kube-public name=kube-public
kubectl label --overwrite nodes --all openstack-control-plane=enabled
kubectl label --overwrite nodes --all openstack-compute-node=enabled
kubectl label --overwrite nodes --all openvswitch=enabled
kubectl label --overwrite nodes --all linuxbridge=enabled
kubectl label --overwrite nodes --all ceph-mon=enabled
kubectl label --overwrite nodes --all ceph-osd=enabled
kubectl label --overwrite nodes --all ceph-mds=enabled
kubectl label --overwrite nodes --all ceph-rgw=enabled
kubectl label --overwrite nodes --all ceph-mgr=enabled
# We deploy l3 agent only on the node where we run test scripts.
# In this case virtual router will be created only on this node
# and we don't need L2 overlay (will be implemented later).
kubectl label --overwrite nodes -l "node-role.kubernetes.io/control-plane" l3-agent=enabled
for NAMESPACE in ceph openstack osh-infra; do
tee /tmp/${NAMESPACE}-ns.yaml << EOF
apiVersion: v1
kind: Namespace
metadata:
labels:
kubernetes.io/metadata.name: ${NAMESPACE}
name: ${NAMESPACE}
name: ${NAMESPACE}
EOF
kubectl apply -f /tmp/${NAMESPACE}-ns.yaml
done
make all

View File

@ -13,6 +13,7 @@
# under the License.
set -xe
: ${MULTINODE:="no"}
: ${RUN_HELM_TESTS:="yes"}
export OS_CLOUD=openstack_helm
@ -70,7 +71,7 @@ make neutron
tee /tmp/neutron.yaml << EOF
network:
interface:
tunnel: docker0
tunnel: null
conf:
neutron:
DEFAULT:
@ -91,6 +92,17 @@ conf:
linux_bridge:
bridge_mappings: public:br-ex
EOF
if [[ $MULTINODE == "yes" ]]; then
tee -a /tmp/neutron.yaml << EOF
labels:
agent:
l3:
node_selector_key: l3-agent
node_selector_value: enabled
EOF
fi
helm upgrade --install neutron ./neutron \
--namespace=openstack \
--values=/tmp/neutron.yaml \

View File

@ -0,0 +1,72 @@
- hosts: all
become: true
gather_facts: true
tasks:
- name: Remove old docker packages
apt:
pkg:
- docker.io
- docker-doc
- docker-compose
- podman-docker
- containerd
- runc
state: absent
- name: Ensure dependencies are installed
apt:
name:
- apt-transport-https
- ca-certificates
- gnupg2
state: present
- name: Add Docker apt repository key
apt_key:
url: https://download.docker.com/linux/ubuntu/gpg
keyring: /etc/apt/trusted.gpg.d/docker.gpg
state: present
- name: Get dpkg arch
command: dpkg --print-architecture
register: dpkg_architecture
- name: Add Docker apt repository
apt_repository:
repo: deb [arch="{{ dpkg_architecture.stdout }}" signed-by=/etc/apt/trusted.gpg.d/docker.gpg] https://download.docker.com/linux/ubuntu "{{ ansible_distribution_release }}" stable
state: present
filename: docker.list
- name: Install docker packages
apt:
pkg:
- docker-ce
- docker-ce-cli
- containerd.io
- docker-buildx-plugin
- docker-compose-plugin
state: present
update_cache: true
- name: Configure Docker daemon
copy:
src: files/daemon.json
dest: /etc/docker/daemon.json
- name: Remove /etc/containerd/config.toml
file:
path: /etc/containerd/config.toml
state: absent
ignore_errors: true
- name: Restart containerd
service:
name: containerd
daemon_reload: yes
state: restarted
- name: Restart docker
service:
name: docker
daemon_reload: yes
state: restarted

View File

@ -0,0 +1,200 @@
- hosts: all
become: true
gather_facts: true
roles:
- clear-firewall
tasks:
- name: Load necessary modules
modprobe:
name: "{{ item }}"
state: present
with_items:
- overlay
- br_netfilter
- name: Configure sysctl
sysctl:
name: "{{ item }}"
value: "1"
state: present
loop:
- net.ipv6.conf.default.disable_ipv6
- net.ipv6.conf.all.disable_ipv6
- net.ipv6.conf.lo.disable_ipv6
- net.bridge.bridge-nf-call-iptables
- net.bridge.bridge-nf-call-ip6tables
- net.ipv4.ip_forward
ignore_errors: true
- name: Remove swapfile from /etc/fstab
mount:
name: "{{ item }}"
fstype: swap
state: absent
with_items:
- swap
- none
- name: Disable swap
command: swapoff -a
when: ansible_swaptotal_mb > 0
- name: Ensure dependencies are installed
apt:
name:
- apt-transport-https
- ca-certificates
- gnupg2
- ipvsadm
- jq
state: present
- name: Add Kubernetes apt repository key
apt_key:
url: https://packages.cloud.google.com/apt/doc/apt-key.gpg
state: present
- name: Add Kubernetes apt repository
apt_repository:
repo: deb https://apt.kubernetes.io/ kubernetes-xenial main
state: present
filename: kubernetes.list
- name: Install Kubernetes binaries
apt:
state: present
update_cache: true
allow_downgrade: true
pkg:
- "kubelet={{ kube_version }}"
- "kubeadm={{ kube_version }}"
- "kubectl={{ kube_version }}"
- name: Restart kubelet
service:
name: kubelet
daemon_reload: yes
state: restarted
- name: Disable systemd-resolved
service:
name: systemd-resolved
enabled: false
state: stopped
- name: Configure resolv.conf
copy:
src: files/resolv.conf
dest: "{{ item }}"
loop:
- /etc/resolv.conf
- /run/systemd/resolve/resolv.conf
- hosts: primary
become: true
tasks:
- name: Mount tmpfs to /var/lib/etcd
mount:
path: /var/lib/etcd
src: tmpfs
fstype: tmpfs
opts: size=1g
state: mounted
- name: Prepare kubeadm config
copy:
src: files/kubeadm_config.yaml
dest: /tmp/kubeadm_config.yaml
- name: Initialize the Kubernetes cluster using kubeadm
command: kubeadm init --config /tmp/kubeadm_config.yaml
- name: Setup kubeconfig for zuul user
shell: |
mkdir -p /home/zuul/.kube
cp -i /etc/kubernetes/admin.conf /home/zuul/.kube/config
chown zuul:zuul /home/zuul/.kube/config
args:
executable: /bin/bash
- hosts: all
tasks:
# We download Calico manifest on all nodes because we then want to download
# Calico images BEFORE deploying it
- name: Download Calico manifest
shell: |
curl -LSs https://docs.projectcalico.org/archive/{{ calico_version }}/manifests/calico.yaml -o /tmp/calico.yaml
sed -i -e 's#docker.io/calico/#quay.io/calico/#g' /tmp/calico.yaml
args:
executable: /bin/bash
# Download images needed for calico before applying manifests, so that `kubectl wait` timeout
# for `k8s-app=kube-dns` isn't reached by slow download speeds
- name: Download Calico images
shell: |
awk '/image:/ { print $2 }' /tmp/calico.yaml | xargs -I{} sudo docker pull {}
args:
executable: /bin/bash
- hosts: primary
tasks:
- name: Deploy Calico
command: kubectl apply -f /tmp/calico.yaml
- name: Wait for Calico pods ready
command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node
- name: Prepare Calico patch
copy:
src: files/calico_patch.yaml
dest: /tmp/calico_patch.yaml
- name: Patch Calico
command: kubectl -n kube-system patch daemonset calico-node --patch-file /tmp/calico_patch.yaml
- name: Wait for Calico pods ready
command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node
- name: Generate join command
command: kubeadm token create --print-join-command
register: join_command
- hosts: nodes
become: true
tasks:
- name: Join node to cluster
command: "{{ hostvars['primary']['join_command'].stdout_lines[0] }}"
- hosts: primary
tasks:
- name: Wait for Calico pods ready
command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node
- name: Wait for Coredns pods ready
command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=kube-dns
- name: Untaint Kubernetes control plane node
command: kubectl taint nodes -l 'node-role.kubernetes.io/control-plane' node-role.kubernetes.io/control-plane-
- hosts: all
become: true
tasks:
- name: Add coredns to /etc/resolv.conf
lineinfile:
line: nameserver 10.96.0.10
path: /etc/resolv.conf
state: present
insertbefore: "BOF"
- hosts: primary
tasks:
- name: Enable recursive queries for coredns
shell: |
PATCH=$(mktemp)
kubectl get configmap coredns -n kube-system -o json | jq -r "{data: .data}" | sed 's/ready\\n/header \{\\n response set ra\\n \}\\n ready\\n/g' > "${PATCH}"
kubectl patch configmap coredns -n kube-system --patch-file "${PATCH}"
kubectl set image deployment coredns -n kube-system "coredns=registry.k8s.io/coredns/coredns:v1.9.4"
kubectl rollout restart -n kube-system deployment/coredns
rm -f "${PATCH}"
args:
executable: /bin/bash

View File

@ -0,0 +1,72 @@
- hosts: all
become: true
gather_facts: true
roles:
- ensure-python
- ensure-pip
tasks:
- name: Add Ceph apt repository key
apt_key:
url: https://download.ceph.com/keys/release.asc
state: present
- name: Add Ceph apt repository
apt_repository:
repo: deb https://download.ceph.com/debian-reef/ "{{ ansible_distribution_release }}" main
state: present
filename: ceph.list
- name: Install necessary packages
apt:
pkg:
- socat
- jq
- util-linux
- bridge-utils
- iptables
- conntrack
- libffi-dev
- ipvsadm
- make
- bc
- git-review
- notary
- ceph-common
- rbd-nbd
- nfs-common
- ethtool
- python3-dev
- ca-certificates
- git
- nmap
- curl
- uuid-runtime
- net-tools
- less
- telnet
- tcpdump
- vim
- lvm2
- name: Install Yq
shell: |
wget https://github.com/mikefarah/yq/releases/download/{{ yq_version }}/yq_linux_amd64.tar.gz -O - | tar xz && mv yq_linux_amd64 /usr/local/bin/yq
args:
executable: /bin/bash
- hosts: primary
become: true
tasks:
- name: Install Helm
shell: |
TMP_DIR=$(mktemp -d)
curl -sSL https://get.helm.sh/helm-{{ helm_version }}-linux-amd64.tar.gz | tar -zxv --strip-components=1 -C ${TMP_DIR}
mv "${TMP_DIR}"/helm /usr/local/bin/helm
rm -rf "${TMP_DIR}"
args:
executable: /bin/bash
# This is to improve build time
- name: Remove stable Helm repo
command: helm repo remove stable
ignore_errors: true

View File

@ -0,0 +1,21 @@
spec:
template:
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9091"
spec:
containers:
- name: calico-node
env:
- name: FELIX_PROMETHEUSMETRICSENABLED
value: "true"
- name: FELIX_PROMETHEUSMETRICSPORT
value: "9091"
- name: FELIX_IGNORELOOSERPF
value: "true"
# We assign IP on br-ex interface while testing the deployed Openstack cluster and
# we need Calico to skip this interface while discovering the
# network changes on the host to prevent announcing unnecessary networks.
- name: IP_AUTODETECTION_METHOD
value: "skip-interface=br-ex"

View File

@ -0,0 +1,9 @@
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"live-restore": true
}

View File

@ -0,0 +1,12 @@
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
networking:
serviceSubnet: "10.96.0.0/16"
podSubnet: "10.244.0.0/24" # --pod-network-cidr
dnsDomain: "cluster.local"
...

View File

@ -0,0 +1,4 @@
nameserver 8.8.8.8
nameserver 8.8.4.4
search svc.cluster.local cluster.local
options ndots:5 timeout:1 attempts:1

View File

@ -0,0 +1,3 @@
- hosts: all
roles:
- start-zuul-console

View File

@ -0,0 +1,44 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
- hosts: all
tasks:
- name: Override images
include_role:
name: override-images
when: buildset_registry is defined
- name: Use docker mirror
include_role:
name: use-docker-mirror
- hosts: primary
tasks:
- name: "creating directory for run artifacts"
file:
path: "/tmp/artifacts"
state: directory
- name: Run gate scripts
include_role:
name: "{{ ([item] | flatten | length == 1) | ternary('osh-run-script', 'osh-run-script-set') }}"
vars:
workload: "{{ [item] | flatten }}"
loop: "{{ gate_scripts }}"
- name: "Downloads artifacts to executor"
synchronize:
src: "/tmp/artifacts"
dest: "{{ zuul.executor.log_root }}/{{ inventory_hostname }}"
mode: pull
ignore_errors: True
...

View File

@ -36,6 +36,68 @@
- openstack/openstack-helm
nodeset: openstack-helm-single-node
- job:
name: openstack-helm-deploy-multinode
abstract: true
roles:
- zuul: openstack/openstack-helm-infra
- zuul: zuul/zuul-jobs
required-projects:
- openstack/openstack-helm
- openstack/openstack-helm-infra
irrelevant-files:
- ^.*\.rst$
- ^doc/.*$
- ^releasenotes/.*$
timeout: 7200
pre-run:
- tools/gate/playbooks/prepare-hosts.yaml
post-run: tools/gate/playbooks/osh-infra-collect-logs.yaml
run:
- tools/gate/playbooks/deploy-prerequisites.yaml
# zuul-jobs/roles/ensure-docker role installs containerd.io with the config that
# is not compatible with Kubernetes due to unimplemented CRI v1 runtime API
- tools/gate/playbooks/deploy-docker.yaml
- tools/gate/playbooks/deploy-k8s.yaml
- tools/gate/playbooks/run-scripts.yaml
# multinode nodeset
nodeset: openstack-helm-ubuntu
vars:
# the k8s package versions are available here
# https://packages.cloud.google.com/apt/dists/kubernetes-xenial/main/binary-amd64/Packages
kube_version: "1.26.3-00"
calico_version: "v3.25"
helm_version: "v3.6.3"
yq_version: "v4.6.0"
zuul_osh_infra_relative_path: ../openstack-helm-infra
gate_scripts_relative_path: ../openstack-helm
- job:
name: openstack-helm-compute-kit-multinode
parent: openstack-helm-deploy-multinode
vars:
run_helm_tests: "no"
gate_scripts:
- ./tools/deployment/common/prepare-k8s.sh
- ./tools/deployment/common/setup-client.sh
- ./tools/deployment/component/common/ingress.sh
- - ./tools/deployment/component/common/rabbitmq.sh
- ./tools/deployment/component/common/mariadb.sh
- ./tools/deployment/component/common/memcached.sh
- ./tools/deployment/component/keystone/keystone.sh
- - ./tools/deployment/component/heat/heat.sh
- export GLANCE_BACKEND=memory; ./tools/deployment/component/glance/glance.sh
- ./tools/deployment/component/compute-kit/openvswitch.sh
- ./tools/deployment/component/compute-kit/libvirt.sh
- export MULTINODE=yes; ./tools/deployment/component/compute-kit/compute-kit.sh
- export OSH_TEST_TIMEOUT=1200;./tools/deployment/common/run-helm-tests.sh neutron
- ./tools/deployment/common/run-helm-tests.sh nova
- ./tools/deployment/common/run-helm-tests.sh glance
- ./tools/deployment/common/run-helm-tests.sh keystone
- ./tools/deployment/developer/common/170-setup-gateway.sh
- ./tools/deployment/developer/common/900-use-it.sh
- ./tools/deployment/common/force-cronjob-run.sh
- job:
name: openstack-helm-bandit
parent: openstack-helm-chart-deploy
@ -271,8 +333,7 @@
- job:
name: openstack-helm-compute-kit-zed-ubuntu_focal
parent: openstack-helm-compute-kit
nodeset: openstack-helm-single-32GB-focal-tmp
parent: openstack-helm-compute-kit-multinode
vars:
osh_params:
openstack_release: zed
@ -302,8 +363,7 @@
- job:
name: openstack-helm-compute-kit-2023-1-ubuntu_focal
parent: openstack-helm-compute-kit
nodeset: openstack-helm-single-32GB-focal-tmp
parent: openstack-helm-compute-kit-multinode
vars:
osh_params:
openstack_release: "2023.1"