Adjust the Ceph restore to support AIO-DX with 3 Ceph monitors

Adjusting BnR playbooks to recover Ceph correctly now that AIO-DX will
have 3 Ceph monitors running.

Test Plan:
  PASS: BnR AIO-SX
  PASS: BnR AIO-DX with Ceph network using management
  PASS: BnR AIO-DX with Ceph network using cluster-host
  PASS: BnR Standard 2+2
  PASS: BnR Storage 2+2+2

Story: 2011122
Task: 50127

Depends-on: https://review.opendev.org/c/starlingx/stx-puppet/+/914912

Change-Id: Icfa39fa3b5804438c5c666b223e6694fa957b4fa
Signed-off-by: Felipe Sanches Zanoni <Felipe.SanchesZanoni@windriver.com>
This commit is contained in:
Felipe Sanches Zanoni 2024-04-24 08:46:27 -03:00
parent b39144710b
commit be0a00b0e0
2 changed files with 16 additions and 14 deletions

View File

@ -1,6 +1,6 @@
#!/bin/sh
#
# Copyright (c) 2021-2023 Wind River Systems, Inc.
# Copyright (c) 2021-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -15,6 +15,8 @@
# - https://github.com/ansible/ansible/issues/70092
sleep 2
CEPH_BIN=/usr/bin/ceph
FS_NAME=kube-cephfs
DATA_POOL_NAME=kube-cephfs-data
METADATA_POOL_NAME=kube-cephfs-metadata
@ -29,13 +31,13 @@ set -x
/etc/init.d/ceph stop mds
# Check if the filesystem for the system RWX provisioner is present
ceph fs ls | grep ${FS_NAME}
${CEPH_BIN} fs ls | grep ${FS_NAME}
if [ $? -ne 0 ]; then
# Use existing metadata/data pools to recover cephfs
ceph fs new ${FS_NAME} ${METADATA_POOL_NAME} ${DATA_POOL_NAME} --force
${CEPH_BIN} fs new ${FS_NAME} ${METADATA_POOL_NAME} ${DATA_POOL_NAME} --force
# Recover MDS state from filesystem
ceph fs reset ${FS_NAME} --yes-i-really-mean-it
${CEPH_BIN} fs reset ${FS_NAME} --yes-i-really-mean-it
# Try to recover from some common errors
cephfs-journal-tool --rank=${FS_NAME}:0 event recover_dentries summary

View File

@ -1,6 +1,6 @@
---
#
# Copyright (c) 2019-2023 Wind River Systems, Inc.
# Copyright (c) 2019-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -257,7 +257,7 @@
- debug: var=prepare_ceph_partitions.stdout_lines
- name: Bring up ceph-mon
command: /etc/init.d/ceph start mon
command: /etc/init.d/ceph start mon.{{ mon_name }}
# Recover ceph-data from every osd with ceph-objectore-tool
- name: Recover ceph-data
@ -267,7 +267,7 @@
- debug: var=ceph_data_out.stdout_lines
- name: Bring down ceph-mon
command: /etc/init.d/ceph stop mon
command: /etc/init.d/ceph stop mon.{{ mon_name }}
- name: Delete store.db file from ceph-mon
file:
@ -306,10 +306,10 @@
state: absent
- name: Bring up ceph Monitor
command: /etc/init.d/ceph start mon
command: /etc/init.d/ceph start mon.{{ mon_name }}
- name: Wait for ceph monitor to be up
shell: timeout 15 ceph -s
shell: timeout 15 /usr/bin/ceph -s
retries: 5
delay: 2
register: result
@ -329,14 +329,14 @@
until: result.rc == 0
- name: Enable Ceph Msgr v2 protocol
shell: ceph mon enable-msgr2
shell: /usr/bin/ceph mon enable-msgr2
retries: 5
delay: 2
register: result
until: result.rc == 0
- name: Wait for V2 protocol to be enabled
shell: ceph -s
shell: /usr/bin/ceph -s
register: result
until: "'1 monitors have not enabled msgr2' not in result"
retries: 30
@ -346,7 +346,7 @@
command: /usr/bin/ceph-mgr --cluster ceph --id controller-0 - start ceph-mgr
- name: Wait for ceph-mgr to detect Ceph's pools
shell: ceph -s
shell: /usr/bin/ceph -s
register: result
until: "'0 pools' not in result"
retries: 30
@ -362,8 +362,8 @@
path: "/etc/ceph/ceph.client.guest.keyring"
state: touch
- name: Restart ceph one more time to pick latest changes
command: /etc/init.d/ceph restart
- name: Restart ceph-mon one more time to pick latest changes
command: /etc/init.d/ceph restart mon.{{ mon_name }}
- name: Check and recover CephFs filesystem
script: recover_cephfs.sh