93699cb464
Provision will fail to create new md device if any of disks belongs to inactive md device from time to time. Change-Id: I9abea747e21963b830c1fc27699cc0d756a8c58c Closes-Bug: #1390492
165 lines
6.5 KiB
Python
165 lines
6.5 KiB
Python
# Copyright 2014 Mirantis, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from fuel_agent import errors
|
|
from fuel_agent.openstack.common import log as logging
|
|
from fuel_agent.utils import hardware_utils as hu
|
|
from fuel_agent.utils import utils
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def mddetail_parse(output):
|
|
md = {}
|
|
h, v = output.split('Number Major Minor RaidDevice State')
|
|
for line in h.split('\n'):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
for pattern in ('Version', 'Raid Level', 'Raid Devices',
|
|
'Active Devices', 'Spare Devices',
|
|
'Failed Devices', 'State', 'UUID'):
|
|
if line.startswith(pattern):
|
|
md[pattern] = line.split()[-1]
|
|
md['devices'] = []
|
|
for line in v.split('\n'):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
md['devices'].append(line.split()[-1])
|
|
return md
|
|
|
|
|
|
def get_mdnames(output=None):
|
|
mdnames = []
|
|
if not output:
|
|
with open('/proc/mdstat') as f:
|
|
output = f.read()
|
|
for line in output.split('\n'):
|
|
if line.startswith('md'):
|
|
mdnames.append('/dev/%s' % line.split()[0])
|
|
return mdnames
|
|
|
|
|
|
def mddisplay(names=None):
|
|
mdnames = names or get_mdnames()
|
|
mds = []
|
|
for mdname in mdnames:
|
|
md = {'name': mdname}
|
|
try:
|
|
output = utils.execute('mdadm', '--detail', mdname,
|
|
check_exit_code=[0])[0]
|
|
md.update(mddetail_parse(output))
|
|
except errors.ProcessExecutionError:
|
|
continue
|
|
finally:
|
|
mds.append(md)
|
|
LOG.debug('Found md devices: {0}'.format(mds))
|
|
return mds
|
|
|
|
|
|
def mdcreate(mdname, level, device, *args):
|
|
mds = mddisplay()
|
|
|
|
# check if md device already exists
|
|
if filter(lambda x: x['name'] == mdname, mds):
|
|
raise errors.MDAlreadyExistsError(
|
|
'Error while creating md: md %s already exists' % mdname)
|
|
|
|
# check if level argument is valid
|
|
supported_levels = ('0', '1', 'raid0', 'raid1', 'stripe', 'mirror')
|
|
if level not in supported_levels:
|
|
raise errors.MDWrongSpecError(
|
|
'Error while creating md device: '
|
|
'level must be one of: %s' % ', '.join(supported_levels))
|
|
|
|
devices = [device] + list(args)
|
|
|
|
# check if all necessary devices exist
|
|
if not set(devices).issubset(
|
|
set([bd['device'] for bd in hu.list_block_devices(disks=False)])):
|
|
raise errors.MDNotFoundError(
|
|
'Error while creating md: at least one of devices is not found')
|
|
|
|
# check if devices are not parts of some md array
|
|
if set(devices) & \
|
|
set(reduce(lambda x, y: x + y,
|
|
[md.get('devices', []) for md in mds], [])):
|
|
raise errors.MDDeviceDuplicationError(
|
|
'Error while creating md: at least one of devices is '
|
|
'already in belongs to some md')
|
|
|
|
#FIXME: mdadm will ask user to continue creating if any device appears to
|
|
# be a part of raid array. Superblock zeroing helps to avoid that.
|
|
map(mdclean, devices)
|
|
utils.execute('mdadm', '--create', '--force', mdname, '-e0.90',
|
|
'--level=%s' % level,
|
|
'--raid-devices=%s' % len(devices), *devices,
|
|
check_exit_code=[0])
|
|
|
|
|
|
def mdremove(mdname):
|
|
# check if md exists
|
|
if mdname not in get_mdnames():
|
|
raise errors.MDNotFoundError(
|
|
'Error while removing md: md %s not found' % mdname)
|
|
#FIXME: The issue faced was quiet hard to reproduce and to figure out the
|
|
# root cause. For unknown reason already removed md device is
|
|
# unexpectedly returning back after a while from time to time making
|
|
# new md device creation to fail.
|
|
# Still the actual reason of its failure is unknown, but after a
|
|
# searching on a web a mention was found about a race in udev
|
|
# http://dev.bizo.com/2012/07/mdadm-device-or-resource-busy.html
|
|
# The article recommends to disable udev's queue entirely during md
|
|
# device manipulation which sounds rather unappropriate for our case.
|
|
# And the link to original post on mailing list suggests to execute
|
|
# `udevadm settle` before removing the md device.
|
|
# here -> http://permalink.gmane.org/gmane.linux.raid/34027
|
|
# So, what was done. `udevadm settle` calls were placed just
|
|
# before any of `mdadm` calls and the analizyng the logs was started.
|
|
# According to the manual `settle` is an option that "Watches the
|
|
# udev event queue, and exits if all current events are handled".
|
|
# That means it will wait for udev's finishing of processing the
|
|
# events. According to the logs noticeable delay had been recognized
|
|
# between `udevadm settle` and the next `mdadm` call.
|
|
# The delay was about 150-200ms or even bigger. It was appeared
|
|
# right before the `mdadm --stop` call. That just means that udev was
|
|
# too busy with events when we start to modifiy md devices hard.
|
|
# Thus `udevadm settle` is helping to avoid the later failure and
|
|
# to prevent strange behaviour of md device.
|
|
utils.execute('udevadm', 'settle', '--quiet', check_exit_code=[0])
|
|
utils.execute('mdadm', '--stop', mdname, check_exit_code=[0])
|
|
utils.execute('mdadm', '--remove', mdname, check_exit_code=[0, 1])
|
|
|
|
|
|
def mdclean(device):
|
|
# we don't care if device actually exists or not
|
|
utils.execute('mdadm', '--zero-superblock', '--force', device,
|
|
check_exit_code=[0])
|
|
|
|
|
|
def mdclean_all():
|
|
LOG.debug('Trying to wipe out all md devices')
|
|
for md in mddisplay():
|
|
mdremove(md['name'])
|
|
for dev in md.get('devices', []):
|
|
mdclean(dev)
|
|
# second attempt, remove stale inactive devices
|
|
for md in mddisplay():
|
|
mdremove(md['name'])
|
|
mds = mddisplay()
|
|
if len(mds) > 0:
|
|
raise errors.MDRemovingError(
|
|
'Error while removing mds: few devices still presented %s' % mds)
|