Allow external ceph journals and fix bootstrap
This allows us to specify external journals for osds which can greatly improve performance when the external journals are on the solid-state drives. The new lookup and startup methods fix the previous races we had preventing osds from being created properly. This retains the same functionality as before and is completely compatible with the previous method and labels, however this does set new labels for all new bootstrap OSDs. This was due to a limitation in the length of the name of a GPT partition. Closes-Bug: #1558853 DocImpact Partially-Implements: blueprint ceph-improvements Change-Id: I61fd10cb35c67dabc53bd82270f26909ef51fc38
This commit is contained in:
parent
6d0cd535d3
commit
5250a00781
@ -2,7 +2,7 @@
|
||||
- name: Looking up disks to bootstrap for Ceph
|
||||
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
|
||||
-m find_disks
|
||||
-a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP'"
|
||||
-a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP' match_mode='prefix'"
|
||||
register: osd_lookup
|
||||
changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
|
||||
failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
|
||||
@ -14,7 +14,7 @@
|
||||
- name: Looking up disks to bootstrap for Ceph
|
||||
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
|
||||
-m find_disks
|
||||
-a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP'"
|
||||
-a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP' match_mode='prefix'"
|
||||
register: osd_cache_lookup
|
||||
changed_when: "{{ osd_cache_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_cache_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
|
||||
failed_when: osd_cache_lookup.stdout.split()[2] != 'SUCCESS'
|
||||
@ -32,6 +32,12 @@
|
||||
KOLLA_BOOTSTRAP:
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
OSD_DEV: "{{ item.1.device }}"
|
||||
OSD_PARTITION: "{{ item.1.partition }}"
|
||||
OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
|
||||
JOURNAL_DEV: "{{ item.1.journal_device }}"
|
||||
JOURNAL_PARTITION: "{{ item.1.journal }}"
|
||||
JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
|
||||
USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
|
||||
OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
|
||||
OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
|
||||
HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"
|
||||
@ -56,6 +62,12 @@
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
CEPH_CACHE:
|
||||
OSD_DEV: "{{ item.1.device }}"
|
||||
OSD_PARTITION: "{{ item.1.partition }}"
|
||||
OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
|
||||
JOURNAL_DEV: "{{ item.1.journal_device }}"
|
||||
JOURNAL_PARTITION: "{{ item.1.journal }}"
|
||||
JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
|
||||
USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
|
||||
OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
|
||||
OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
|
||||
HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"
|
||||
|
@ -2,7 +2,7 @@
|
||||
- name: Looking up OSDs for Ceph
|
||||
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
|
||||
-m find_disks
|
||||
-a "partition_name='KOLLA_CEPH_DATA'"
|
||||
-a "partition_name='KOLLA_CEPH_DATA' match_mode='prefix'"
|
||||
register: osd_lookup
|
||||
changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
|
||||
failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
|
||||
@ -34,7 +34,7 @@
|
||||
environment:
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
OSD_ID: "{{ item.0.stdout }}"
|
||||
OSD_DEV: "{{ item.1['device'] }}"
|
||||
JOURNAL_PARTITION: "{{ item.1.journal }}"
|
||||
image: "{{ ceph_osd_image_full }}"
|
||||
name: "ceph_osd_{{ item.0.stdout }}"
|
||||
pid_mode: "host"
|
||||
|
@ -3,28 +3,22 @@
|
||||
# Bootstrap and exit if KOLLA_BOOTSTRAP variable is set. This catches all cases
|
||||
# of the KOLLA_BOOTSTRAP variable being set, including empty.
|
||||
if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
|
||||
# NOTE(SamYaple): Static gpt partcodes
|
||||
CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
|
||||
CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
|
||||
|
||||
# Wait for ceph quorum before proceeding
|
||||
ceph quorum_status
|
||||
|
||||
# Formatting disk for ceph
|
||||
sgdisk --zap-all -- "${OSD_DEV}"
|
||||
sgdisk --new=2:1M:5G --change-name=2:KOLLA_CEPH_JOURNAL --typecode=2:45B0969E-9B03-4F30-B4C6-B4B80CEFF106 --mbrtogpt -- "${OSD_DEV}"
|
||||
sgdisk --largest-new=1 --change-name=1:KOLLA_CEPH_DATA --typecode=1:4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D -- "${OSD_DEV}"
|
||||
# This command may throw errors that we can safely ignore
|
||||
partprobe || true
|
||||
if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then
|
||||
# Formatting disk for ceph
|
||||
sgdisk --zap-all -- "${OSD_DEV}"
|
||||
sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
|
||||
sgdisk --largest-new=1 -- "${OSD_DEV}"
|
||||
# NOTE(SamYaple): This command may throw errors that we can safely ignore
|
||||
partprobe || true
|
||||
|
||||
count=0
|
||||
while [[ "${OSD_PARTITION}x" == "x" ]]; do
|
||||
if [[ "${count}" -gt 5 ]]; then
|
||||
echo "Could not find OSD Partition"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
# We look up the appropriate device path with partition.
|
||||
OSD_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?1")
|
||||
count=$(( count + 1 ))
|
||||
done
|
||||
JOURNAL_PARTITION="${OSD_PARTITION%?}2"
|
||||
fi
|
||||
|
||||
OSD_ID=$(ceph osd create)
|
||||
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
|
||||
@ -56,10 +50,13 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
|
||||
|
||||
# Adding osd to crush map
|
||||
ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}"
|
||||
|
||||
# Setting partition name based on ${OSD_ID}
|
||||
sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
|
||||
sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# We look up the appropriate journal since we cannot rely on symlinks
|
||||
JOURNAL_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?2")
|
||||
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
|
||||
ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
|
||||
|
@ -66,6 +66,61 @@ EXAMPLES = '''
|
||||
|
||||
import json
|
||||
import pyudev
|
||||
import re
|
||||
|
||||
|
||||
def is_dev_matched_by_name(dev, name, mode):
|
||||
if dev.get('DEVTYPE', '') == 'partition':
|
||||
dev_name = dev.get('ID_PART_ENTRY_NAME', '')
|
||||
else:
|
||||
dev_name = dev.get('ID_FS_LABEL', '')
|
||||
|
||||
if mode == 'strict':
|
||||
return dev_name == name
|
||||
elif mode == 'prefix':
|
||||
return dev_name.startswith(name)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def find_disk(ct, name, match_mode):
|
||||
for dev in ct.list_devices(subsystem='block'):
|
||||
if is_dev_matched_by_name(dev, name, match_mode):
|
||||
yield dev
|
||||
|
||||
|
||||
def extract_disk_info(ct, dev, name):
|
||||
if not dev:
|
||||
return
|
||||
kwargs = dict()
|
||||
kwargs['fs_uuid'] = dev.get('ID_FS_UUID', '')
|
||||
kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
|
||||
if dev.get('DEVTYPE', '') == 'partition':
|
||||
kwargs['device'] = dev.find_parent('block').device_node
|
||||
kwargs['partition'] = dev.device_node
|
||||
kwargs['partition_num'] = \
|
||||
re.sub(r'.*[^\d$]', '', dev.device_node)
|
||||
if is_dev_matched_by_name(dev, name, 'strict'):
|
||||
kwargs['external_journal'] = False
|
||||
kwargs['journal'] = dev.device_node[:-1] + '2'
|
||||
kwargs['journal_device'] = kwargs['device']
|
||||
kwargs['journal_num'] = 2
|
||||
else:
|
||||
kwargs['external_journal'] = True
|
||||
journal_name = dev.get('ID_PART_ENTRY_NAME', '') + '_J'
|
||||
for journal in find_disk(ct, journal_name, 'strict'):
|
||||
kwargs['journal'] = journal.device_node
|
||||
kwargs['journal_device'] = \
|
||||
journal.find_parent('block').device_node
|
||||
kwargs['journal_num'] = \
|
||||
re.sub(r'.*[^\d$]', '', journal.device_node)
|
||||
break
|
||||
if 'journal' not in kwargs:
|
||||
# NOTE(SamYaple): Journal not found, not returning info
|
||||
return
|
||||
else:
|
||||
kwargs['device'] = dev.device_node
|
||||
yield kwargs
|
||||
|
||||
|
||||
def main():
|
||||
@ -78,33 +133,14 @@ def main():
|
||||
match_mode = module.params.get('match_mode')
|
||||
name = module.params.get('name')
|
||||
|
||||
def is_dev_matched_by_name(dev, name):
|
||||
if dev.get('DEVTYPE', '') == 'partition':
|
||||
dev_name = dev.get('ID_PART_ENTRY_NAME', '')
|
||||
else:
|
||||
dev_name = dev.get('ID_FS_LABEL', '')
|
||||
|
||||
if match_mode == 'strict':
|
||||
return dev_name == name
|
||||
elif match_mode == 'prefix':
|
||||
return dev_name.startswith(name)
|
||||
else:
|
||||
return False
|
||||
|
||||
try:
|
||||
ret = list()
|
||||
ct = pyudev.Context()
|
||||
for dev in ct.list_devices(subsystem='block'):
|
||||
if is_dev_matched_by_name(dev, name):
|
||||
fs_uuid = dev.get('ID_FS_UUID', '')
|
||||
fs_label = dev.get('ID_FS_LABEL', '')
|
||||
if dev.get('DEVTYPE', '') == 'partition':
|
||||
device_node = dev.find_parent('block').device_node
|
||||
else:
|
||||
device_node = dev.device_node
|
||||
ret.append({'device': device_node,
|
||||
'fs_uuid': fs_uuid,
|
||||
'fs_label': fs_label})
|
||||
for dev in find_disk(ct, name, match_mode):
|
||||
for info in extract_disk_info(ct, dev, name):
|
||||
if info:
|
||||
ret.append(info)
|
||||
|
||||
module.exit_json(disks=json.dumps(ret))
|
||||
except Exception as e:
|
||||
module.exit_json(failed=True, msg=repr(e))
|
||||
|
Loading…
Reference in New Issue
Block a user