From 2024cc361e1c282fc8ee7c38c4dcbdbb71fd3910 Mon Sep 17 00:00:00 2001 From: Vladimir Kozhukalov Date: Thu, 20 Jul 2023 14:59:23 +0300 Subject: [PATCH] Use multinode nodeset for compute-kit jobs For recent releases we use 32GB nodes for compute-kit jobs. The number of such nodes is extremely limited. So we'd better use multinode nodesets for compute-kit jobs. We deploy K8s using kubeadm and then we set labels to the K8s nodes so charts can use these labels for node selectors. We deploy L3 agent only on the node where we run test scripts. This is because we want test virual router to be always created on this node. Otherwise the L2 overlay needs to be created to emulate provider network (will be implemented later). Glance is deployed w/o backend storage (will be fixed later). Change-Id: Id2eb639fb67d41006940a7d7b45a865b2f1124f7 --- tools/deployment/common/prepare-k8s.sh | 48 +++++ .../component/compute-kit/compute-kit.sh | 14 +- tools/gate/playbooks/deploy-docker.yaml | 72 +++++++ tools/gate/playbooks/deploy-k8s.yaml | 200 ++++++++++++++++++ .../gate/playbooks/deploy-prerequisites.yaml | 72 +++++++ tools/gate/playbooks/files/calico_patch.yaml | 21 ++ tools/gate/playbooks/files/daemon.json | 9 + .../gate/playbooks/files/kubeadm_config.yaml | 12 ++ tools/gate/playbooks/files/resolv.conf | 4 + tools/gate/playbooks/prepare-hosts.yaml | 3 + tools/gate/playbooks/run-scripts.yaml | 44 ++++ zuul.d/jobs-openstack-helm.yaml | 68 +++++- 12 files changed, 562 insertions(+), 5 deletions(-) create mode 100755 tools/deployment/common/prepare-k8s.sh create mode 100644 tools/gate/playbooks/deploy-docker.yaml create mode 100644 tools/gate/playbooks/deploy-k8s.yaml create mode 100644 tools/gate/playbooks/deploy-prerequisites.yaml create mode 100644 tools/gate/playbooks/files/calico_patch.yaml create mode 100644 tools/gate/playbooks/files/daemon.json create mode 100644 tools/gate/playbooks/files/kubeadm_config.yaml create mode 100644 tools/gate/playbooks/files/resolv.conf create mode 100644 tools/gate/playbooks/prepare-hosts.yaml create mode 100644 tools/gate/playbooks/run-scripts.yaml diff --git a/tools/deployment/common/prepare-k8s.sh b/tools/deployment/common/prepare-k8s.sh new file mode 100755 index 0000000000..a4d3724cf5 --- /dev/null +++ b/tools/deployment/common/prepare-k8s.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +# Add labels to the core namespaces & nodes +kubectl label --overwrite namespace default name=default +kubectl label --overwrite namespace kube-system name=kube-system +kubectl label --overwrite namespace kube-public name=kube-public +kubectl label --overwrite nodes --all openstack-control-plane=enabled +kubectl label --overwrite nodes --all openstack-compute-node=enabled +kubectl label --overwrite nodes --all openvswitch=enabled +kubectl label --overwrite nodes --all linuxbridge=enabled +kubectl label --overwrite nodes --all ceph-mon=enabled +kubectl label --overwrite nodes --all ceph-osd=enabled +kubectl label --overwrite nodes --all ceph-mds=enabled +kubectl label --overwrite nodes --all ceph-rgw=enabled +kubectl label --overwrite nodes --all ceph-mgr=enabled +# We deploy l3 agent only on the node where we run test scripts. +# In this case virtual router will be created only on this node +# and we don't need L2 overlay (will be implemented later). +kubectl label --overwrite nodes -l "node-role.kubernetes.io/control-plane" l3-agent=enabled + +for NAMESPACE in ceph openstack osh-infra; do +tee /tmp/${NAMESPACE}-ns.yaml << EOF +apiVersion: v1 +kind: Namespace +metadata: + labels: + kubernetes.io/metadata.name: ${NAMESPACE} + name: ${NAMESPACE} + name: ${NAMESPACE} +EOF + +kubectl apply -f /tmp/${NAMESPACE}-ns.yaml +done + +make all diff --git a/tools/deployment/component/compute-kit/compute-kit.sh b/tools/deployment/component/compute-kit/compute-kit.sh index 585e0dcac3..377521123d 100755 --- a/tools/deployment/component/compute-kit/compute-kit.sh +++ b/tools/deployment/component/compute-kit/compute-kit.sh @@ -13,6 +13,7 @@ # under the License. set -xe +: ${MULTINODE:="no"} : ${RUN_HELM_TESTS:="yes"} export OS_CLOUD=openstack_helm @@ -70,7 +71,7 @@ make neutron tee /tmp/neutron.yaml << EOF network: interface: - tunnel: docker0 + tunnel: null conf: neutron: DEFAULT: @@ -91,6 +92,17 @@ conf: linux_bridge: bridge_mappings: public:br-ex EOF + +if [[ $MULTINODE == "yes" ]]; then + tee -a /tmp/neutron.yaml << EOF +labels: + agent: + l3: + node_selector_key: l3-agent + node_selector_value: enabled +EOF +fi + helm upgrade --install neutron ./neutron \ --namespace=openstack \ --values=/tmp/neutron.yaml \ diff --git a/tools/gate/playbooks/deploy-docker.yaml b/tools/gate/playbooks/deploy-docker.yaml new file mode 100644 index 0000000000..ba4b704ad7 --- /dev/null +++ b/tools/gate/playbooks/deploy-docker.yaml @@ -0,0 +1,72 @@ +- hosts: all + become: true + gather_facts: true + tasks: + - name: Remove old docker packages + apt: + pkg: + - docker.io + - docker-doc + - docker-compose + - podman-docker + - containerd + - runc + state: absent + + - name: Ensure dependencies are installed + apt: + name: + - apt-transport-https + - ca-certificates + - gnupg2 + state: present + + - name: Add Docker apt repository key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + keyring: /etc/apt/trusted.gpg.d/docker.gpg + state: present + + - name: Get dpkg arch + command: dpkg --print-architecture + register: dpkg_architecture + + - name: Add Docker apt repository + apt_repository: + repo: deb [arch="{{ dpkg_architecture.stdout }}" signed-by=/etc/apt/trusted.gpg.d/docker.gpg] https://download.docker.com/linux/ubuntu "{{ ansible_distribution_release }}" stable + state: present + filename: docker.list + + - name: Install docker packages + apt: + pkg: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + update_cache: true + + - name: Configure Docker daemon + copy: + src: files/daemon.json + dest: /etc/docker/daemon.json + + - name: Remove /etc/containerd/config.toml + file: + path: /etc/containerd/config.toml + state: absent + ignore_errors: true + + - name: Restart containerd + service: + name: containerd + daemon_reload: yes + state: restarted + + - name: Restart docker + service: + name: docker + daemon_reload: yes + state: restarted diff --git a/tools/gate/playbooks/deploy-k8s.yaml b/tools/gate/playbooks/deploy-k8s.yaml new file mode 100644 index 0000000000..7994632e28 --- /dev/null +++ b/tools/gate/playbooks/deploy-k8s.yaml @@ -0,0 +1,200 @@ +- hosts: all + become: true + gather_facts: true + roles: + - clear-firewall + tasks: + - name: Load necessary modules + modprobe: + name: "{{ item }}" + state: present + with_items: + - overlay + - br_netfilter + + - name: Configure sysctl + sysctl: + name: "{{ item }}" + value: "1" + state: present + loop: + - net.ipv6.conf.default.disable_ipv6 + - net.ipv6.conf.all.disable_ipv6 + - net.ipv6.conf.lo.disable_ipv6 + - net.bridge.bridge-nf-call-iptables + - net.bridge.bridge-nf-call-ip6tables + - net.ipv4.ip_forward + ignore_errors: true + + - name: Remove swapfile from /etc/fstab + mount: + name: "{{ item }}" + fstype: swap + state: absent + with_items: + - swap + - none + + - name: Disable swap + command: swapoff -a + when: ansible_swaptotal_mb > 0 + + - name: Ensure dependencies are installed + apt: + name: + - apt-transport-https + - ca-certificates + - gnupg2 + - ipvsadm + - jq + state: present + + - name: Add Kubernetes apt repository key + apt_key: + url: https://packages.cloud.google.com/apt/doc/apt-key.gpg + state: present + + - name: Add Kubernetes apt repository + apt_repository: + repo: deb https://apt.kubernetes.io/ kubernetes-xenial main + state: present + filename: kubernetes.list + + - name: Install Kubernetes binaries + apt: + state: present + update_cache: true + allow_downgrade: true + pkg: + - "kubelet={{ kube_version }}" + - "kubeadm={{ kube_version }}" + - "kubectl={{ kube_version }}" + + - name: Restart kubelet + service: + name: kubelet + daemon_reload: yes + state: restarted + + - name: Disable systemd-resolved + service: + name: systemd-resolved + enabled: false + state: stopped + + - name: Configure resolv.conf + copy: + src: files/resolv.conf + dest: "{{ item }}" + loop: + - /etc/resolv.conf + - /run/systemd/resolve/resolv.conf + +- hosts: primary + become: true + tasks: + - name: Mount tmpfs to /var/lib/etcd + mount: + path: /var/lib/etcd + src: tmpfs + fstype: tmpfs + opts: size=1g + state: mounted + + - name: Prepare kubeadm config + copy: + src: files/kubeadm_config.yaml + dest: /tmp/kubeadm_config.yaml + + - name: Initialize the Kubernetes cluster using kubeadm + command: kubeadm init --config /tmp/kubeadm_config.yaml + + - name: Setup kubeconfig for zuul user + shell: | + mkdir -p /home/zuul/.kube + cp -i /etc/kubernetes/admin.conf /home/zuul/.kube/config + chown zuul:zuul /home/zuul/.kube/config + args: + executable: /bin/bash + +- hosts: all + tasks: + # We download Calico manifest on all nodes because we then want to download + # Calico images BEFORE deploying it + - name: Download Calico manifest + shell: | + curl -LSs https://docs.projectcalico.org/archive/{{ calico_version }}/manifests/calico.yaml -o /tmp/calico.yaml + sed -i -e 's#docker.io/calico/#quay.io/calico/#g' /tmp/calico.yaml + args: + executable: /bin/bash + + # Download images needed for calico before applying manifests, so that `kubectl wait` timeout + # for `k8s-app=kube-dns` isn't reached by slow download speeds + - name: Download Calico images + shell: | + awk '/image:/ { print $2 }' /tmp/calico.yaml | xargs -I{} sudo docker pull {} + args: + executable: /bin/bash + +- hosts: primary + tasks: + - name: Deploy Calico + command: kubectl apply -f /tmp/calico.yaml + + - name: Wait for Calico pods ready + command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node + + - name: Prepare Calico patch + copy: + src: files/calico_patch.yaml + dest: /tmp/calico_patch.yaml + + - name: Patch Calico + command: kubectl -n kube-system patch daemonset calico-node --patch-file /tmp/calico_patch.yaml + + - name: Wait for Calico pods ready + command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node + + - name: Generate join command + command: kubeadm token create --print-join-command + register: join_command + +- hosts: nodes + become: true + tasks: + - name: Join node to cluster + command: "{{ hostvars['primary']['join_command'].stdout_lines[0] }}" + +- hosts: primary + tasks: + - name: Wait for Calico pods ready + command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=calico-node + + - name: Wait for Coredns pods ready + command: kubectl -n kube-system wait --timeout=240s --for=condition=Ready pods -l k8s-app=kube-dns + + - name: Untaint Kubernetes control plane node + command: kubectl taint nodes -l 'node-role.kubernetes.io/control-plane' node-role.kubernetes.io/control-plane- + +- hosts: all + become: true + tasks: + - name: Add coredns to /etc/resolv.conf + lineinfile: + line: nameserver 10.96.0.10 + path: /etc/resolv.conf + state: present + insertbefore: "BOF" + +- hosts: primary + tasks: + - name: Enable recursive queries for coredns + shell: | + PATCH=$(mktemp) + kubectl get configmap coredns -n kube-system -o json | jq -r "{data: .data}" | sed 's/ready\\n/header \{\\n response set ra\\n \}\\n ready\\n/g' > "${PATCH}" + kubectl patch configmap coredns -n kube-system --patch-file "${PATCH}" + kubectl set image deployment coredns -n kube-system "coredns=registry.k8s.io/coredns/coredns:v1.9.4" + kubectl rollout restart -n kube-system deployment/coredns + rm -f "${PATCH}" + args: + executable: /bin/bash diff --git a/tools/gate/playbooks/deploy-prerequisites.yaml b/tools/gate/playbooks/deploy-prerequisites.yaml new file mode 100644 index 0000000000..fa1f2116b9 --- /dev/null +++ b/tools/gate/playbooks/deploy-prerequisites.yaml @@ -0,0 +1,72 @@ +- hosts: all + become: true + gather_facts: true + roles: + - ensure-python + - ensure-pip + tasks: + - name: Add Ceph apt repository key + apt_key: + url: https://download.ceph.com/keys/release.asc + state: present + + - name: Add Ceph apt repository + apt_repository: + repo: deb https://download.ceph.com/debian-reef/ "{{ ansible_distribution_release }}" main + state: present + filename: ceph.list + + - name: Install necessary packages + apt: + pkg: + - socat + - jq + - util-linux + - bridge-utils + - iptables + - conntrack + - libffi-dev + - ipvsadm + - make + - bc + - git-review + - notary + - ceph-common + - rbd-nbd + - nfs-common + - ethtool + - python3-dev + - ca-certificates + - git + - nmap + - curl + - uuid-runtime + - net-tools + - less + - telnet + - tcpdump + - vim + - lvm2 + + - name: Install Yq + shell: | + wget https://github.com/mikefarah/yq/releases/download/{{ yq_version }}/yq_linux_amd64.tar.gz -O - | tar xz && mv yq_linux_amd64 /usr/local/bin/yq + args: + executable: /bin/bash + +- hosts: primary + become: true + tasks: + - name: Install Helm + shell: | + TMP_DIR=$(mktemp -d) + curl -sSL https://get.helm.sh/helm-{{ helm_version }}-linux-amd64.tar.gz | tar -zxv --strip-components=1 -C ${TMP_DIR} + mv "${TMP_DIR}"/helm /usr/local/bin/helm + rm -rf "${TMP_DIR}" + args: + executable: /bin/bash + + # This is to improve build time + - name: Remove stable Helm repo + command: helm repo remove stable + ignore_errors: true diff --git a/tools/gate/playbooks/files/calico_patch.yaml b/tools/gate/playbooks/files/calico_patch.yaml new file mode 100644 index 0000000000..52c909c1fe --- /dev/null +++ b/tools/gate/playbooks/files/calico_patch.yaml @@ -0,0 +1,21 @@ +spec: + template: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9091" + spec: + containers: + - name: calico-node + env: + - name: FELIX_PROMETHEUSMETRICSENABLED + value: "true" + - name: FELIX_PROMETHEUSMETRICSPORT + value: "9091" + - name: FELIX_IGNORELOOSERPF + value: "true" + # We assign IP on br-ex interface while testing the deployed Openstack cluster and + # we need Calico to skip this interface while discovering the + # network changes on the host to prevent announcing unnecessary networks. + - name: IP_AUTODETECTION_METHOD + value: "skip-interface=br-ex" diff --git a/tools/gate/playbooks/files/daemon.json b/tools/gate/playbooks/files/daemon.json new file mode 100644 index 0000000000..2547992479 --- /dev/null +++ b/tools/gate/playbooks/files/daemon.json @@ -0,0 +1,9 @@ +{ + "exec-opts": ["native.cgroupdriver=systemd"], + "log-driver": "json-file", + "log-opts": { + "max-size": "100m" + }, + "storage-driver": "overlay2", + "live-restore": true +} diff --git a/tools/gate/playbooks/files/kubeadm_config.yaml b/tools/gate/playbooks/files/kubeadm_config.yaml new file mode 100644 index 0000000000..eaf088192b --- /dev/null +++ b/tools/gate/playbooks/files/kubeadm_config.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kubeproxy.config.k8s.io/v1alpha1 +kind: KubeProxyConfiguration +mode: ipvs +--- +apiVersion: kubeadm.k8s.io/v1beta2 +kind: ClusterConfiguration +networking: + serviceSubnet: "10.96.0.0/16" + podSubnet: "10.244.0.0/24" # --pod-network-cidr + dnsDomain: "cluster.local" +... diff --git a/tools/gate/playbooks/files/resolv.conf b/tools/gate/playbooks/files/resolv.conf new file mode 100644 index 0000000000..5f9818c771 --- /dev/null +++ b/tools/gate/playbooks/files/resolv.conf @@ -0,0 +1,4 @@ +nameserver 8.8.8.8 +nameserver 8.8.4.4 +search svc.cluster.local cluster.local +options ndots:5 timeout:1 attempts:1 diff --git a/tools/gate/playbooks/prepare-hosts.yaml b/tools/gate/playbooks/prepare-hosts.yaml new file mode 100644 index 0000000000..3ebc3ae7b1 --- /dev/null +++ b/tools/gate/playbooks/prepare-hosts.yaml @@ -0,0 +1,3 @@ +- hosts: all + roles: + - start-zuul-console diff --git a/tools/gate/playbooks/run-scripts.yaml b/tools/gate/playbooks/run-scripts.yaml new file mode 100644 index 0000000000..56a2fc0c8a --- /dev/null +++ b/tools/gate/playbooks/run-scripts.yaml @@ -0,0 +1,44 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +- hosts: all + tasks: + - name: Override images + include_role: + name: override-images + when: buildset_registry is defined + - name: Use docker mirror + include_role: + name: use-docker-mirror + +- hosts: primary + tasks: + - name: "creating directory for run artifacts" + file: + path: "/tmp/artifacts" + state: directory + + - name: Run gate scripts + include_role: + name: "{{ ([item] | flatten | length == 1) | ternary('osh-run-script', 'osh-run-script-set') }}" + vars: + workload: "{{ [item] | flatten }}" + loop: "{{ gate_scripts }}" + + - name: "Downloads artifacts to executor" + synchronize: + src: "/tmp/artifacts" + dest: "{{ zuul.executor.log_root }}/{{ inventory_hostname }}" + mode: pull + ignore_errors: True +... diff --git a/zuul.d/jobs-openstack-helm.yaml b/zuul.d/jobs-openstack-helm.yaml index e0049ea1b2..af06d988d6 100644 --- a/zuul.d/jobs-openstack-helm.yaml +++ b/zuul.d/jobs-openstack-helm.yaml @@ -36,6 +36,68 @@ - openstack/openstack-helm nodeset: openstack-helm-single-node +- job: + name: openstack-helm-deploy-multinode + abstract: true + roles: + - zuul: openstack/openstack-helm-infra + - zuul: zuul/zuul-jobs + required-projects: + - openstack/openstack-helm + - openstack/openstack-helm-infra + irrelevant-files: + - ^.*\.rst$ + - ^doc/.*$ + - ^releasenotes/.*$ + timeout: 7200 + pre-run: + - tools/gate/playbooks/prepare-hosts.yaml + post-run: tools/gate/playbooks/osh-infra-collect-logs.yaml + run: + - tools/gate/playbooks/deploy-prerequisites.yaml + # zuul-jobs/roles/ensure-docker role installs containerd.io with the config that + # is not compatible with Kubernetes due to unimplemented CRI v1 runtime API + - tools/gate/playbooks/deploy-docker.yaml + - tools/gate/playbooks/deploy-k8s.yaml + - tools/gate/playbooks/run-scripts.yaml + # multinode nodeset + nodeset: openstack-helm-ubuntu + vars: + # the k8s package versions are available here + # https://packages.cloud.google.com/apt/dists/kubernetes-xenial/main/binary-amd64/Packages + kube_version: "1.26.3-00" + calico_version: "v3.25" + helm_version: "v3.6.3" + yq_version: "v4.6.0" + zuul_osh_infra_relative_path: ../openstack-helm-infra + gate_scripts_relative_path: ../openstack-helm + +- job: + name: openstack-helm-compute-kit-multinode + parent: openstack-helm-deploy-multinode + vars: + run_helm_tests: "no" + gate_scripts: + - ./tools/deployment/common/prepare-k8s.sh + - ./tools/deployment/common/setup-client.sh + - ./tools/deployment/component/common/ingress.sh + - - ./tools/deployment/component/common/rabbitmq.sh + - ./tools/deployment/component/common/mariadb.sh + - ./tools/deployment/component/common/memcached.sh + - ./tools/deployment/component/keystone/keystone.sh + - - ./tools/deployment/component/heat/heat.sh + - export GLANCE_BACKEND=memory; ./tools/deployment/component/glance/glance.sh + - ./tools/deployment/component/compute-kit/openvswitch.sh + - ./tools/deployment/component/compute-kit/libvirt.sh + - export MULTINODE=yes; ./tools/deployment/component/compute-kit/compute-kit.sh + - export OSH_TEST_TIMEOUT=1200;./tools/deployment/common/run-helm-tests.sh neutron + - ./tools/deployment/common/run-helm-tests.sh nova + - ./tools/deployment/common/run-helm-tests.sh glance + - ./tools/deployment/common/run-helm-tests.sh keystone + - ./tools/deployment/developer/common/170-setup-gateway.sh + - ./tools/deployment/developer/common/900-use-it.sh + - ./tools/deployment/common/force-cronjob-run.sh + - job: name: openstack-helm-bandit parent: openstack-helm-chart-deploy @@ -271,8 +333,7 @@ - job: name: openstack-helm-compute-kit-zed-ubuntu_focal - parent: openstack-helm-compute-kit - nodeset: openstack-helm-single-32GB-focal-tmp + parent: openstack-helm-compute-kit-multinode vars: osh_params: openstack_release: zed @@ -302,8 +363,7 @@ - job: name: openstack-helm-compute-kit-2023-1-ubuntu_focal - parent: openstack-helm-compute-kit - nodeset: openstack-helm-single-32GB-focal-tmp + parent: openstack-helm-compute-kit-multinode vars: osh_params: openstack_release: "2023.1"