Merge "Fix workaround for the "device is busy" problem"

This commit is contained in:
Jenkins 2014-07-04 11:34:22 +00:00 committed by Gerrit Code Review
commit 9ba13b875f
5 changed files with 219 additions and 29 deletions

View File

@ -652,6 +652,25 @@
#db_max_retries=20 #db_max_retries=20
[disk_partitioner]
#
# Options defined in ironic.common.disk_partitioner
#
# After Ironic has completed creating the partition table, it
# continues to check for activity on the attached iSCSI device
# status at this interval prior to copying the image to the
# node, in seconds (integer value)
#check_device_interval=1
# The maximum number of times to check that the device is not
# accessed by another process. If the device is still busy
# after that, the disk partitioning will be treated as having
# failed. (integer value)
#check_device_max_retries=20
[glance] [glance]
# #

View File

@ -13,4 +13,5 @@ mkswap: CommandFilter, mkswap, root
mkfs: CommandFilter, mkfs, root mkfs: CommandFilter, mkfs, root
# ironic/common/disk_partitioner.py # ironic/common/disk_partitioner.py
fuser: CommandFilter, fuser, root
parted: CommandFilter, parted, root parted: CommandFilter, parted, root

View File

@ -14,11 +14,35 @@
# under the License. # under the License.
import re import re
import time
from oslo.config import cfg
from ironic.common import exception
from ironic.common import utils from ironic.common import utils
from ironic.openstack.common import log as logging from ironic.openstack.common import log as logging
from ironic.openstack.common import loopingcall
from ironic.openstack.common import processutils
opts = [
cfg.IntOpt('check_device_interval',
default=1,
help='After Ironic has completed creating the partition table, '
'it continues to check for activity on the attached iSCSI '
'device status at this interval prior to copying the image'
' to the node, in seconds'),
cfg.IntOpt('check_device_max_retries',
default=20,
help='The maximum number of times to check that the device is '
'not accessed by another process. If the device is still '
'busy after that, the disk partitioning will be treated as'
' having failed.'),
]
CONF = cfg.CONF
opt_group = cfg.OptGroup(name='disk_partitioner',
title='Options for the disk partitioner')
CONF.register_group(opt_group)
CONF.register_opts(opts, opt_group)
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -41,6 +65,7 @@ class DiskPartitioner(object):
self._disk_label = disk_label self._disk_label = disk_label
self._alignment = alignment self._alignment = alignment
self._partitions = [] self._partitions = []
self._fuser_pids_re = re.compile(r'((\d)+\s*)+')
def _exec(self, *args): def _exec(self, *args):
# NOTE(lucasagomes): utils.execute() is already a wrapper on top # NOTE(lucasagomes): utils.execute() is already a wrapper on top
@ -82,6 +107,30 @@ class DiskPartitioner(object):
""" """
return enumerate(self._partitions, 1) return enumerate(self._partitions, 1)
def _wait_for_disk_to_become_available(self, retries, max_retries, pids,
stderr):
retries[0] += 1
if retries[0] > max_retries:
raise loopingcall.LoopingCallDone()
try:
# NOTE(ifarkas): fuser returns a non-zero return code if none of
# the specified files is accessed
out, err = utils.execute('fuser', self._device,
check_exit_code=[0, 1], run_as_root=True)
if not out and not err:
raise loopingcall.LoopingCallDone()
else:
if err:
stderr[0] = err
if out:
pids_match = re.search(self._fuser_pids_re, out)
pids[0] = pids_match.group()
except processutils.ProcessExecutionError as exc:
LOG.warning(_('Failed to check the device %(device)s with fuser: '
' %(err)s') % {'device': self._device, 'err': exc})
def commit(self): def commit(self):
"""Write to the disk.""" """Write to the disk."""
cmd_args = ['mklabel', self._disk_label] cmd_args = ['mklabel', self._disk_label]
@ -97,10 +146,31 @@ class DiskPartitioner(object):
start = end start = end
self._exec(*cmd_args) self._exec(*cmd_args)
# TODO(lucasagomes): Do not sleep, use another mechanism to avoid
# the "device is busy" problem. lsof, fuser... retries = [0]
# avoid "device is busy" pids = ['']
time.sleep(3) fuser_err = ['']
interval = CONF.disk_partitioner.check_device_interval
max_retries = CONF.disk_partitioner.check_device_max_retries
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_disk_to_become_available,
retries, max_retries, pids, fuser_err)
timer.start(interval=interval).wait()
if retries[0] > max_retries:
if pids[0]:
raise exception.InstanceDeployFailure(
_('Disk partitioning failed on device %(device)s. '
'Processes with the following PIDs are holding it: '
'%(pids)s. Time out waiting for completion.')
% {'device': self._device, 'pids': pids[0]})
else:
raise exception.InstanceDeployFailure(
_('Disk partitioning failed on device %(device)s. Fuser '
'exited with "%(fuser_err)s". Time out waiting for '
'completion.')
% {'device': self._device, 'fuser_err': fuser_err[0]})
_PARTED_PRINT_RE = re.compile(r"^\d+:([\d\.]+)MiB:" _PARTED_PRINT_RE = re.compile(r"^\d+:([\d\.]+)MiB:"

View File

@ -14,10 +14,8 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import itertools
import time
import fixtures import fixtures
import itertools
import mock import mock
import os import os
import tempfile import tempfile
@ -490,7 +488,6 @@ class WorkOnDiskTestCase(tests_base.TestCase):
commit=True) commit=True)
@mock.patch.object(time, 'sleep', lambda _: None)
@mock.patch.object(common_utils, 'execute') @mock.patch.object(common_utils, 'execute')
class MakePartitionsTestCase(tests_base.TestCase): class MakePartitionsTestCase(tests_base.TestCase):
@ -504,13 +501,19 @@ class MakePartitionsTestCase(tests_base.TestCase):
'--', 'unit', 'MiB', 'mklabel', 'msdos'] '--', 'unit', 'MiB', 'mklabel', 'msdos']
def test_make_partitions(self, mock_exc): def test_make_partitions(self, mock_exc):
expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513', mock_exc.return_value = (None, None)
'mkpart', 'primary', '', '513', '1537']
cmd = self.parted_static_cmd + expected_mkpart
utils.make_partitions(self.dev, self.root_mb, self.swap_mb, utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
self.ephemeral_mb) self.ephemeral_mb)
mock_exc.assert_called_once_with(*cmd, run_as_root=True,
expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513',
'mkpart', 'primary', '', '513', '1537']
parted_cmd = self.parted_static_cmd + expected_mkpart
parted_call = mock.call(*parted_cmd, run_as_root=True,
check_exit_code=[0]) check_exit_code=[0])
fuser_cmd = ['fuser', 'fake-dev']
fuser_call = mock.call(*fuser_cmd, run_as_root=True,
check_exit_code=[0, 1])
mock_exc.assert_has_calls([parted_call, fuser_call])
def test_make_partitions_with_ephemeral(self, mock_exc): def test_make_partitions_with_ephemeral(self, mock_exc):
self.ephemeral_mb = 2048 self.ephemeral_mb = 2048
@ -518,10 +521,12 @@ class MakePartitionsTestCase(tests_base.TestCase):
'mkpart', 'primary', 'linux-swap', '2049', '2561', 'mkpart', 'primary', 'linux-swap', '2049', '2561',
'mkpart', 'primary', '', '2561', '3585'] 'mkpart', 'primary', '', '2561', '3585']
cmd = self.parted_static_cmd + expected_mkpart cmd = self.parted_static_cmd + expected_mkpart
mock_exc.return_value = (None, None)
utils.make_partitions(self.dev, self.root_mb, self.swap_mb, utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
self.ephemeral_mb) self.ephemeral_mb)
mock_exc.assert_called_once_with(*cmd, run_as_root=True,
check_exit_code=[0]) parted_call = mock.call(*cmd, run_as_root=True, check_exit_code=[0])
mock_exc.assert_has_calls(parted_call)
@mock.patch.object(utils, 'get_dev_block_size') @mock.patch.object(utils, 'get_dev_block_size')
@ -592,8 +597,6 @@ class GetDeviceBlockSizeTestCase(tests_base.TestCase):
@mock.patch.object(utils, 'block_uuid', lambda p: 'uuid') @mock.patch.object(utils, 'block_uuid', lambda p: 'uuid')
@mock.patch.object(utils, 'dd', lambda *_: None) @mock.patch.object(utils, 'dd', lambda *_: None)
@mock.patch.object(common_utils, 'mkfs', lambda *_: None) @mock.patch.object(common_utils, 'mkfs', lambda *_: None)
# TODO(dtantsur): remove once https://review.openstack.org/90126 lands
@mock.patch.object(time, 'sleep', lambda *_: None)
# NOTE(dtantsur): destroy_disk_metadata resets file size, disabling it # NOTE(dtantsur): destroy_disk_metadata resets file size, disabling it
@mock.patch.object(utils, 'destroy_disk_metadata', lambda *_: None) @mock.patch.object(utils, 'destroy_disk_metadata', lambda *_: None)
class RealFilePartitioningTestCase(tests_base.TestCase): class RealFilePartitioningTestCase(tests_base.TestCase):
@ -611,8 +614,10 @@ class RealFilePartitioningTestCase(tests_base.TestCase):
common_utils.execute('parted', '--version') common_utils.execute('parted', '--version')
except OSError as exc: except OSError as exc:
self.skipTest('parted utility was not found: %s' % exc) self.skipTest('parted utility was not found: %s' % exc)
self.file = tempfile.NamedTemporaryFile() self.file = tempfile.NamedTemporaryFile(delete=False)
self.addCleanup(lambda: self.file.close()) # NOTE(ifarkas): the file needs to be closed, so fuser won't report
# any usage
self.file.close()
# NOTE(dtantsur): 20 MiB file with zeros # NOTE(dtantsur): 20 MiB file with zeros
common_utils.execute('dd', 'if=/dev/zero', 'of=%s' % self.file.name, common_utils.execute('dd', 'if=/dev/zero', 'of=%s' % self.file.name,
'bs=1', 'count=0', 'seek=20MiB') 'bs=1', 'count=0', 'seek=20MiB')

View File

@ -13,20 +13,27 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import time import fixtures
import mock import mock
from testtools.matchers import HasLength from testtools.matchers import HasLength
from ironic.common import disk_partitioner from ironic.common import disk_partitioner
from ironic.common import exception
from ironic.common import utils from ironic.common import utils
from ironic.tests import base from ironic.tests import base
@mock.patch.object(time, 'sleep', lambda _: None)
@mock.patch.object(utils, 'execute', lambda _: None)
class DiskPartitionerTestCase(base.TestCase): class DiskPartitionerTestCase(base.TestCase):
def setUp(self):
super(DiskPartitionerTestCase, self).setUp()
def noop(*args, **kwargs):
pass
self.useFixture(fixtures.MonkeyPatch('eventlet.greenthread.sleep',
noop))
def test_add_partition(self): def test_add_partition(self):
dp = disk_partitioner.DiskPartitioner('/dev/fake') dp = disk_partitioner.DiskPartitioner('/dev/fake')
dp.add_partition(1024) dp.add_partition(1024)
@ -49,7 +56,8 @@ class DiskPartitionerTestCase(base.TestCase):
self.assertEqual(expected, partitions) self.assertEqual(expected, partitions)
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec') @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
def test_commit(self, mock_exec): @mock.patch.object(utils, 'execute')
def test_commit(self, mock_utils_exc, mock_disk_partitioner_exec):
dp = disk_partitioner.DiskPartitioner('/dev/fake') dp = disk_partitioner.DiskPartitioner('/dev/fake')
fake_parts = [(1, {'bootable': False, fake_parts = [(1, {'bootable': False,
'fs_type': 'fake-fs-type', 'fs_type': 'fake-fs-type',
@ -61,11 +69,98 @@ class DiskPartitionerTestCase(base.TestCase):
'size': 1})] 'size': 1})]
with mock.patch.object(dp, 'get_partitions') as mock_gp: with mock.patch.object(dp, 'get_partitions') as mock_gp:
mock_gp.return_value = fake_parts mock_gp.return_value = fake_parts
mock_utils_exc.return_value = (None, None)
dp.commit() dp.commit()
mock_exec.assert_called_once_with('mklabel', 'msdos',
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
'mkpart', 'fake-type', 'fake-fs-type', '1', '2', 'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
'mkpart', 'fake-type', 'fake-fs-type', '2', '3', 'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
'set', '2', 'boot', 'on') 'set', '2', 'boot', 'on')
mock_utils_exc.assert_called_once_with('fuser', '/dev/fake',
run_as_root=True, check_exit_code=[0, 1])
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
@mock.patch.object(utils, 'execute')
def test_commit_with_device_is_busy_once(self, mock_utils_exc,
mock_disk_partitioner_exec):
dp = disk_partitioner.DiskPartitioner('/dev/fake')
fake_parts = [(1, {'bootable': False,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1}),
(2, {'bootable': True,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1})]
fuser_outputs = [("/dev/fake: 10000 10001", None), (None, None)]
with mock.patch.object(dp, 'get_partitions') as mock_gp:
mock_gp.return_value = fake_parts
mock_utils_exc.side_effect = fuser_outputs
dp.commit()
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
'set', '2', 'boot', 'on')
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
run_as_root=True, check_exit_code=[0, 1])
self.assertEqual(2, mock_utils_exc.call_count)
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
@mock.patch.object(utils, 'execute')
def test_commit_with_device_is_always_busy(self, mock_utils_exc,
mock_disk_partitioner_exec):
dp = disk_partitioner.DiskPartitioner('/dev/fake')
fake_parts = [(1, {'bootable': False,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1}),
(2, {'bootable': True,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1})]
with mock.patch.object(dp, 'get_partitions') as mock_gp:
mock_gp.return_value = fake_parts
mock_utils_exc.return_value = ("/dev/fake: 10000 10001", None)
self.assertRaises(exception.InstanceDeployFailure, dp.commit)
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
'set', '2', 'boot', 'on')
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
run_as_root=True, check_exit_code=[0, 1])
self.assertEqual(20, mock_utils_exc.call_count)
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
@mock.patch.object(utils, 'execute')
def test_commit_with_device_disconnected(self, mock_utils_exc,
mock_disk_partitioner_exec):
dp = disk_partitioner.DiskPartitioner('/dev/fake')
fake_parts = [(1, {'bootable': False,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1}),
(2, {'bootable': True,
'fs_type': 'fake-fs-type',
'type': 'fake-type',
'size': 1})]
with mock.patch.object(dp, 'get_partitions') as mock_gp:
mock_gp.return_value = fake_parts
mock_utils_exc.return_value = (None, "Specified filename /dev/fake"
" does not exist.")
self.assertRaises(exception.InstanceDeployFailure, dp.commit)
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
'set', '2', 'boot', 'on')
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
run_as_root=True, check_exit_code=[0, 1])
self.assertEqual(20, mock_utils_exc.call_count)
@mock.patch.object(utils, 'execute') @mock.patch.object(utils, 'execute')