From 510cea0c23458142a6cb919ce993ceeaae65d8c9 Mon Sep 17 00:00:00 2001 From: Vladimir Kozhukalov Date: Thu, 16 Nov 2023 18:51:00 -0600 Subject: [PATCH] Deploy Ceph on multi-node envs - In case we deploy Ceph on a multi-node env we have to prepare the loop devices on all nodes. For this we moved loop devices setup to the deploy-env Ansible role. For simplicity we need the same device on all nodes, so we create a loop device with a big minor number (/dev/loop100 by default) hoping that only low minor numbers could be busy. - For test jobs we don't need to use different devices for OSD data and metadata. There is no any benefit from this for the test environment. So let's keep it simple and put both OSD data and metadata on the same device. - On multi-node env Ceph cluster needs cluster members see each other, so let's use pod network CIDR. Change-Id: I493b6c31d97ff2fc4992c6bb1994d0c73320cd7b --- roles/deploy-env/defaults/main.yaml | 7 ++++ roles/deploy-env/files/kubeadm_config.yaml | 4 +-- roles/deploy-env/files/loop-setup.service | 18 ++++++++++ roles/deploy-env/handlers/main.yaml | 9 +++++ roles/deploy-env/tasks/control-plane.yaml | 2 +- roles/deploy-env/tasks/loopback_devices.yaml | 33 +++++++++++++++++++ roles/deploy-env/tasks/main.yaml | 5 +++ roles/osh-run-script-set/defaults/main.yaml | 5 +-- roles/osh-run-script-set/tasks/main.yaml | 3 +- roles/osh-run-script/defaults/main.yaml | 5 +-- roles/osh-run-script/tasks/main.yaml | 3 +- .../020-ceph.sh => ceph/ceph.sh} | 29 ++++++++-------- .../openstack-support-rook/020-ceph.sh | 7 +--- .../deployment/openstack-support/020-ceph.sh | 1 - zuul.d/jobs.yaml | 17 ++++++---- 15 files changed, 111 insertions(+), 37 deletions(-) create mode 100644 roles/deploy-env/files/loop-setup.service create mode 100644 roles/deploy-env/handlers/main.yaml create mode 100644 roles/deploy-env/tasks/loopback_devices.yaml rename tools/deployment/{osh-infra-logging/020-ceph.sh => ceph/ceph.sh} (90%) delete mode 120000 tools/deployment/openstack-support/020-ceph.sh diff --git a/roles/deploy-env/defaults/main.yaml b/roles/deploy-env/defaults/main.yaml index 365e32669..4a7c95529 100644 --- a/roles/deploy-env/defaults/main.yaml +++ b/roles/deploy-env/defaults/main.yaml @@ -13,8 +13,15 @@ kubectl: user: zuul group: zuul +kubeadm: + pod_network_cidr: "10.244.0.0/24" + service_cidr: "10.96.0.0/16" docker: root_path: /var/lib/docker containerd: root_path: /var/lib/containerd +loopback_setup: false +loopback_device: /dev/loop100 +loopback_image: /var/lib/openstack-helm/ceph-loop.img +loopback_image_size: 12G ... diff --git a/roles/deploy-env/files/kubeadm_config.yaml b/roles/deploy-env/files/kubeadm_config.yaml index 25b1adcf2..147b0c6ef 100644 --- a/roles/deploy-env/files/kubeadm_config.yaml +++ b/roles/deploy-env/files/kubeadm_config.yaml @@ -7,7 +7,7 @@ mode: ipvs apiVersion: kubeadm.k8s.io/v1beta2 kind: ClusterConfiguration networking: - serviceSubnet: "10.96.0.0/16" - podSubnet: "10.244.0.0/24" # --pod-network-cidr + serviceSubnet: "{{ kubeadm.service_cidr }}" # --service-cidr + podSubnet: "{{ kubeadm.pod_network_cidr }}" # --pod-network-cidr dnsDomain: "cluster.local" ... diff --git a/roles/deploy-env/files/loop-setup.service b/roles/deploy-env/files/loop-setup.service new file mode 100644 index 000000000..d4d6e3f09 --- /dev/null +++ b/roles/deploy-env/files/loop-setup.service @@ -0,0 +1,18 @@ +[Unit] +Description=Setup loop devices +DefaultDependencies=no +Conflicts=umount.target +Before=local-fs.target +After=systemd-udevd.service +Requires=systemd-udevd.service + +[Service] +Type=oneshot +ExecStart=/sbin/losetup {{ loopback_device }} '{{ loopback_image }}' +ExecStop=/sbin/losetup -d {{ loopback_device }} +TimeoutSec=60 +RemainAfterExit=yes + +[Install] +WantedBy=local-fs.target +Also=systemd-udevd.service diff --git a/roles/deploy-env/handlers/main.yaml b/roles/deploy-env/handlers/main.yaml new file mode 100644 index 000000000..e9846b0ee --- /dev/null +++ b/roles/deploy-env/handlers/main.yaml @@ -0,0 +1,9 @@ +--- +- name: Systemd reload + shell: systemctl daemon-reload + +- name: Restart loop-setup + service: + name: loop-setup + state: restarted +... diff --git a/roles/deploy-env/tasks/control-plane.yaml b/roles/deploy-env/tasks/control-plane.yaml index 8c2f9997c..e9d7422ca 100644 --- a/roles/deploy-env/tasks/control-plane.yaml +++ b/roles/deploy-env/tasks/control-plane.yaml @@ -8,7 +8,7 @@ state: mounted - name: Prepare kubeadm config - copy: + template: src: files/kubeadm_config.yaml dest: /tmp/kubeadm_config.yaml diff --git a/roles/deploy-env/tasks/loopback_devices.yaml b/roles/deploy-env/tasks/loopback_devices.yaml new file mode 100644 index 000000000..54cbff6e5 --- /dev/null +++ b/roles/deploy-env/tasks/loopback_devices.yaml @@ -0,0 +1,33 @@ +--- +- name: Create loop device image + shell: | + mkdir -p {{ loopback_image | dirname }} + truncate -s {{ loopback_image_size }} {{ loopback_image }} + +- name: Create loop device + shell: | + mknod {{ loopback_device }} b $(grep loop /proc/devices | cut -c3) {{ loopback_device | regex_search('[0-9]+') }} + +- name: Create loop-setup systemd unit + template: + src: files/loop-setup.service + dest: /etc/systemd/system/loop-setup.service + notify: + - Systemd reload + +- name: Systemd reload + shell: systemctl daemon-reload + +- name: Configure loop-setup systemd unit + service: + name: loop-setup + enabled: yes + state: started + notify: + - Systemd reload + - Restart loop-setup + +- name: Check {{ loopback_device }} is attached + shell: | + losetup | grep -i {{ loopback_device }} +... diff --git a/roles/deploy-env/tasks/main.yaml b/roles/deploy-env/tasks/main.yaml index e6a4d0d28..003335a38 100644 --- a/roles/deploy-env/tasks/main.yaml +++ b/roles/deploy-env/tasks/main.yaml @@ -41,4 +41,9 @@ path: /etc/resolv.conf state: present insertbefore: "BOF" + +- name: Loop devices + include_tasks: + file: loopback_devices.yaml + when: loopback_setup ... diff --git a/roles/osh-run-script-set/defaults/main.yaml b/roles/osh-run-script-set/defaults/main.yaml index 20896a467..6f555bb1a 100644 --- a/roles/osh-run-script-set/defaults/main.yaml +++ b/roles/osh-run-script-set/defaults/main.yaml @@ -11,8 +11,9 @@ # limitations under the License. --- -ceph: - loopback_path: "/var/lib/openstack-helm" +ceph_osd_data_device: "/dev/loop0" +kubeadm: + pod_network_cidr: "10.244.0.0/24" osh_params: container_distro_name: ubuntu container_distro_version: focal diff --git a/roles/osh-run-script-set/tasks/main.yaml b/roles/osh-run-script-set/tasks/main.yaml index 6ae8c6e2b..3bddbb92c 100644 --- a/roles/osh-run-script-set/tasks/main.yaml +++ b/roles/osh-run-script-set/tasks/main.yaml @@ -23,7 +23,8 @@ args: chdir: "{{ zuul.project.src_dir }}/{{ gate_scripts_relative_path }}" environment: - CEPH_LOOPBACK_PATH: "{{ ceph.loopback_path }}" + CEPH_OSD_DATA_DEVICE: "{{ ceph_osd_data_device }}" + POD_NETWORK_CIDR: "{{ kubeadm.pod_network_cidr }}" zuul_site_mirror_fqdn: "{{ zuul_site_mirror_fqdn }}" OSH_EXTRA_HELM_ARGS: "{{ zuul_osh_extra_helm_args_relative_path | default('') }}" OSH_PATH: "{{ zuul_osh_relative_path | default('../openstack-helm/') }}" diff --git a/roles/osh-run-script/defaults/main.yaml b/roles/osh-run-script/defaults/main.yaml index 20896a467..6f555bb1a 100644 --- a/roles/osh-run-script/defaults/main.yaml +++ b/roles/osh-run-script/defaults/main.yaml @@ -11,8 +11,9 @@ # limitations under the License. --- -ceph: - loopback_path: "/var/lib/openstack-helm" +ceph_osd_data_device: "/dev/loop0" +kubeadm: + pod_network_cidr: "10.244.0.0/24" osh_params: container_distro_name: ubuntu container_distro_version: focal diff --git a/roles/osh-run-script/tasks/main.yaml b/roles/osh-run-script/tasks/main.yaml index 8789c7a07..844f6b359 100644 --- a/roles/osh-run-script/tasks/main.yaml +++ b/roles/osh-run-script/tasks/main.yaml @@ -20,7 +20,8 @@ args: chdir: "{{ zuul.project.src_dir }}/{{ gate_scripts_relative_path }}" environment: - CEPH_LOOPBACK_PATH: "{{ ceph.loopback_path }}" + CEPH_OSD_DATA_DEVICE: "{{ ceph_osd_data_device }}" + POD_NETWORK_CIDR: "{{ kubeadm.pod_network_cidr }}" zuul_site_mirror_fqdn: "{{ zuul_site_mirror_fqdn }}" OSH_EXTRA_HELM_ARGS: "{{ zuul_osh_extra_helm_args_relative_path | default('') }}" OSH_PATH: "{{ zuul_osh_relative_path | default('../openstack-helm/') }}" diff --git a/tools/deployment/osh-infra-logging/020-ceph.sh b/tools/deployment/ceph/ceph.sh similarity index 90% rename from tools/deployment/osh-infra-logging/020-ceph.sh rename to tools/deployment/ceph/ceph.sh index 188625436..ba6f5cd67 100755 --- a/tools/deployment/osh-infra-logging/020-ceph.sh +++ b/tools/deployment/ceph/ceph.sh @@ -14,17 +14,16 @@ set -xe -# setup loopback devices for ceph -free_loop_devices=( $(ls -1 /dev/loop[0-7] | while read loopdev; do losetup | grep -q $loopdev || echo $loopdev; done) ) -./tools/deployment/common/setup-ceph-loopback-device.sh \ - --ceph-osd-data ${CEPH_OSD_DATA_DEVICE:=${free_loop_devices[0]}} \ - --ceph-osd-dbwal ${CEPH_OSD_DB_WAL_DEVICE:=${free_loop_devices[1]}} +: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"} +: ${POD_NETWORK_CIDR:="10.244.0.0/24"} #NOTE: Lint and package chart for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do make "${CHART}" done +NUMBER_OF_OSDS="$(kubectl get nodes -l ceph-osd=enabled --no-headers | wc -l)" + #NOTE: Deploy command : ${OSH_EXTRA_HELM_ARGS:=""} [ -s /tmp/ceph-fs-uuid.txt ] || uuidgen > /tmp/ceph-fs-uuid.txt @@ -54,8 +53,8 @@ endpoints: metrics: default: 9283 network: - public: 172.17.0.1/16 - cluster: 172.17.0.1/16 + public: "${POD_NETWORK_CIDR}" + cluster: "${POD_NETWORK_CIDR}" port: mon: 6789 rgw: 8088 @@ -83,8 +82,8 @@ conf: crush: tunables: ${CRUSH_TUNABLES} target: - osd: 1 - final_osd: 1 + osd: ${NUMBER_OF_OSDS} + final_osd: ${NUMBER_OF_OSDS} pg_per_osd: 100 default: crush_rule: same_host @@ -174,12 +173,12 @@ conf: - data: type: bluestore location: ${CEPH_OSD_DATA_DEVICE} - block_db: - location: ${CEPH_OSD_DB_WAL_DEVICE} - size: "5GB" - block_wal: - location: ${CEPH_OSD_DB_WAL_DEVICE} - size: "2GB" + # block_db: + # location: ${CEPH_OSD_DB_WAL_DEVICE} + # size: "5GB" + # block_wal: + # location: ${CEPH_OSD_DB_WAL_DEVICE} + # size: "2GB" pod: replicas: diff --git a/tools/deployment/openstack-support-rook/020-ceph.sh b/tools/deployment/openstack-support-rook/020-ceph.sh index 503088c94..bae24d949 100755 --- a/tools/deployment/openstack-support-rook/020-ceph.sh +++ b/tools/deployment/openstack-support-rook/020-ceph.sh @@ -17,11 +17,7 @@ set -xe # Specify the Rook release tag to use for the Rook operator here ROOK_RELEASE=v1.12.4 -# setup loopback devices for ceph -free_loop_devices=( $(ls -1 /dev/loop[0-7] | while read loopdev; do losetup | grep -q $loopdev || echo $loopdev; done) ) -./tools/deployment/common/setup-ceph-loopback-device.sh \ - --ceph-osd-data ${CEPH_OSD_DATA_DEVICE:=${free_loop_devices[0]}} \ - --ceph-osd-dbwal ${CEPH_OSD_DB_WAL_DEVICE:=${free_loop_devices[1]}} +: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"} #NOTE: Deploy command : ${OSH_EXTRA_HELM_ARGS:=""} @@ -499,7 +495,6 @@ cephClusterSpec: devices: - name: "${CEPH_OSD_DATA_DEVICE}" config: - metadataDevice: "${CEPH_OSD_DB_WAL_DEVICE}" databaseSizeMB: "5120" walSizeMB: "2048" disruptionManagement: diff --git a/tools/deployment/openstack-support/020-ceph.sh b/tools/deployment/openstack-support/020-ceph.sh deleted file mode 120000 index 1ab828eed..000000000 --- a/tools/deployment/openstack-support/020-ceph.sh +++ /dev/null @@ -1 +0,0 @@ -../osh-infra-logging/020-ceph.sh \ No newline at end of file diff --git a/zuul.d/jobs.yaml b/zuul.d/jobs.yaml index fadf0c4a2..ebae4df06 100644 --- a/zuul.d/jobs.yaml +++ b/zuul.d/jobs.yaml @@ -92,8 +92,13 @@ root_path: "/opt/ext_vol/docker" containerd: root_path: "/opt/ext_vol/containerd" - ceph: - loopback_path: "/opt/ext_vol/openstack-helm" + kubeadm: + pod_network_cidr: "10.244.0.0/24" + service_cidr: "10.96.0.0/16" + loopback_setup: true + loopback_device: /dev/loop100 + loopback_image: "/opt/ext_vol/openstack-helm/ceph-loop.img" + ceph_osd_data_device: /dev/loop100 # the k8s package versions are available here # https://packages.cloud.google.com/apt/dists/kubernetes-xenial/main/binary-amd64/Packages kube_version: "1.26.3-00" @@ -108,7 +113,7 @@ - job: name: openstack-helm-infra-logging parent: openstack-helm-infra-deploy - nodeset: openstack-helm-1node-ubuntu_focal + nodeset: openstack-helm-3nodes-ubuntu_focal vars: osh_params: openstack_release: "2023.1" @@ -117,7 +122,7 @@ gate_scripts: - ./tools/deployment/osh-infra-logging/000-prepare-k8s.sh - ./tools/deployment/osh-infra-logging/010-ingress.sh - - ./tools/deployment/osh-infra-logging/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/osh-infra-logging/025-ceph-ns-activate.sh - ./tools/deployment/osh-infra-logging/030-radosgw-osh-infra.sh - ./tools/deployment/osh-infra-logging/040-ldap.sh @@ -194,7 +199,7 @@ - ./tools/deployment/openstack-support/000-prepare-k8s.sh - ./tools/deployment/openstack-support/007-namespace-config.sh - ./tools/deployment/openstack-support/010-ingress.sh - - ./tools/deployment/openstack-support/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/openstack-support/025-ceph-ns-activate.sh - ./tools/deployment/openstack-support/030-rabbitmq.sh - ./tools/deployment/openstack-support/070-mariadb.sh @@ -250,7 +255,7 @@ - ./tools/deployment/openstack-support/000-prepare-k8s.sh - ./tools/deployment/openstack-support/007-namespace-config.sh - ./tools/deployment/openstack-support/010-ingress.sh - - ./tools/deployment/openstack-support/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/openstack-support/025-ceph-ns-activate.sh - ./tools/deployment/openstack-support/030-rabbitmq.sh - ./tools/deployment/openstack-support/070-mariadb.sh