diff --git a/playbookconfig/src/playbooks/roles/recover-ceph-data/files/recover_cephfs.sh b/playbookconfig/src/playbooks/roles/recover-ceph-data/files/recover_cephfs.sh index d1bf4b6e5..8b050d467 100755 --- a/playbookconfig/src/playbooks/roles/recover-ceph-data/files/recover_cephfs.sh +++ b/playbookconfig/src/playbooks/roles/recover-ceph-data/files/recover_cephfs.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Copyright (c) 2021-2023 Wind River Systems, Inc. +# Copyright (c) 2021-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -15,6 +15,8 @@ # - https://github.com/ansible/ansible/issues/70092 sleep 2 +CEPH_BIN=/usr/bin/ceph + FS_NAME=kube-cephfs DATA_POOL_NAME=kube-cephfs-data METADATA_POOL_NAME=kube-cephfs-metadata @@ -29,13 +31,13 @@ set -x /etc/init.d/ceph stop mds # Check if the filesystem for the system RWX provisioner is present -ceph fs ls | grep ${FS_NAME} +${CEPH_BIN} fs ls | grep ${FS_NAME} if [ $? -ne 0 ]; then # Use existing metadata/data pools to recover cephfs - ceph fs new ${FS_NAME} ${METADATA_POOL_NAME} ${DATA_POOL_NAME} --force + ${CEPH_BIN} fs new ${FS_NAME} ${METADATA_POOL_NAME} ${DATA_POOL_NAME} --force # Recover MDS state from filesystem - ceph fs reset ${FS_NAME} --yes-i-really-mean-it + ${CEPH_BIN} fs reset ${FS_NAME} --yes-i-really-mean-it # Try to recover from some common errors cephfs-journal-tool --rank=${FS_NAME}:0 event recover_dentries summary diff --git a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml index 6d52c834b..42829d2e1 100644 --- a/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/recover-ceph-data/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2019-2023 Wind River Systems, Inc. +# Copyright (c) 2019-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -257,7 +257,7 @@ - debug: var=prepare_ceph_partitions.stdout_lines - name: Bring up ceph-mon - command: /etc/init.d/ceph start mon + command: /etc/init.d/ceph start mon.{{ mon_name }} # Recover ceph-data from every osd with ceph-objectore-tool - name: Recover ceph-data @@ -267,7 +267,7 @@ - debug: var=ceph_data_out.stdout_lines - name: Bring down ceph-mon - command: /etc/init.d/ceph stop mon + command: /etc/init.d/ceph stop mon.{{ mon_name }} - name: Delete store.db file from ceph-mon file: @@ -306,10 +306,10 @@ state: absent - name: Bring up ceph Monitor - command: /etc/init.d/ceph start mon + command: /etc/init.d/ceph start mon.{{ mon_name }} - name: Wait for ceph monitor to be up - shell: timeout 15 ceph -s + shell: timeout 15 /usr/bin/ceph -s retries: 5 delay: 2 register: result @@ -329,14 +329,14 @@ until: result.rc == 0 - name: Enable Ceph Msgr v2 protocol - shell: ceph mon enable-msgr2 + shell: /usr/bin/ceph mon enable-msgr2 retries: 5 delay: 2 register: result until: result.rc == 0 - name: Wait for V2 protocol to be enabled - shell: ceph -s + shell: /usr/bin/ceph -s register: result until: "'1 monitors have not enabled msgr2' not in result" retries: 30 @@ -346,7 +346,7 @@ command: /usr/bin/ceph-mgr --cluster ceph --id controller-0 - start ceph-mgr - name: Wait for ceph-mgr to detect Ceph's pools - shell: ceph -s + shell: /usr/bin/ceph -s register: result until: "'0 pools' not in result" retries: 30 @@ -362,8 +362,8 @@ path: "/etc/ceph/ceph.client.guest.keyring" state: touch - - name: Restart ceph one more time to pick latest changes - command: /etc/init.d/ceph restart + - name: Restart ceph-mon one more time to pick latest changes + command: /etc/init.d/ceph restart mon.{{ mon_name }} - name: Check and recover CephFs filesystem script: recover_cephfs.sh