Fix workaround for the "device is busy" problem
Right after partitioning, we have a sleep(3) to avoid the "device is busy" problem. A less error-prone solution is to check with fuser, whether there is any process currently using the disk. The following config options are introduced under the 'disk_partitioner' group: - check_device_interval: seconds between consecutive checks for the device status - check_device_max_retries: number of retries for checking the device status Change-Id: Icb73db5aeb16d1abc390975c2bd8bf4b486566e5 Implements: blueprint device-status-check-with-fuser Closes-Bug: #1301975
This commit is contained in:
parent
510b967870
commit
5166aab73a
@ -652,6 +652,25 @@
|
|||||||
#db_max_retries=20
|
#db_max_retries=20
|
||||||
|
|
||||||
|
|
||||||
|
[disk_partitioner]
|
||||||
|
|
||||||
|
#
|
||||||
|
# Options defined in ironic.common.disk_partitioner
|
||||||
|
#
|
||||||
|
|
||||||
|
# After Ironic has completed creating the partition table, it
|
||||||
|
# continues to check for activity on the attached iSCSI device
|
||||||
|
# status at this interval prior to copying the image to the
|
||||||
|
# node, in seconds (integer value)
|
||||||
|
#check_device_interval=1
|
||||||
|
|
||||||
|
# The maximum number of times to check that the device is not
|
||||||
|
# accessed by another process. If the device is still busy
|
||||||
|
# after that, the disk partitioning will be treated as having
|
||||||
|
# failed. (integer value)
|
||||||
|
#check_device_max_retries=20
|
||||||
|
|
||||||
|
|
||||||
[glance]
|
[glance]
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -13,4 +13,5 @@ mkswap: CommandFilter, mkswap, root
|
|||||||
mkfs: CommandFilter, mkfs, root
|
mkfs: CommandFilter, mkfs, root
|
||||||
|
|
||||||
# ironic/common/disk_partitioner.py
|
# ironic/common/disk_partitioner.py
|
||||||
|
fuser: CommandFilter, fuser, root
|
||||||
parted: CommandFilter, parted, root
|
parted: CommandFilter, parted, root
|
||||||
|
@ -14,11 +14,35 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
|
from oslo.config import cfg
|
||||||
|
|
||||||
|
from ironic.common import exception
|
||||||
from ironic.common import utils
|
from ironic.common import utils
|
||||||
from ironic.openstack.common import log as logging
|
from ironic.openstack.common import log as logging
|
||||||
|
from ironic.openstack.common import loopingcall
|
||||||
|
from ironic.openstack.common import processutils
|
||||||
|
|
||||||
|
opts = [
|
||||||
|
cfg.IntOpt('check_device_interval',
|
||||||
|
default=1,
|
||||||
|
help='After Ironic has completed creating the partition table, '
|
||||||
|
'it continues to check for activity on the attached iSCSI '
|
||||||
|
'device status at this interval prior to copying the image'
|
||||||
|
' to the node, in seconds'),
|
||||||
|
cfg.IntOpt('check_device_max_retries',
|
||||||
|
default=20,
|
||||||
|
help='The maximum number of times to check that the device is '
|
||||||
|
'not accessed by another process. If the device is still '
|
||||||
|
'busy after that, the disk partitioning will be treated as'
|
||||||
|
' having failed.'),
|
||||||
|
]
|
||||||
|
|
||||||
|
CONF = cfg.CONF
|
||||||
|
opt_group = cfg.OptGroup(name='disk_partitioner',
|
||||||
|
title='Options for the disk partitioner')
|
||||||
|
CONF.register_group(opt_group)
|
||||||
|
CONF.register_opts(opts, opt_group)
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -41,6 +65,7 @@ class DiskPartitioner(object):
|
|||||||
self._disk_label = disk_label
|
self._disk_label = disk_label
|
||||||
self._alignment = alignment
|
self._alignment = alignment
|
||||||
self._partitions = []
|
self._partitions = []
|
||||||
|
self._fuser_pids_re = re.compile(r'((\d)+\s*)+')
|
||||||
|
|
||||||
def _exec(self, *args):
|
def _exec(self, *args):
|
||||||
# NOTE(lucasagomes): utils.execute() is already a wrapper on top
|
# NOTE(lucasagomes): utils.execute() is already a wrapper on top
|
||||||
@ -82,6 +107,30 @@ class DiskPartitioner(object):
|
|||||||
"""
|
"""
|
||||||
return enumerate(self._partitions, 1)
|
return enumerate(self._partitions, 1)
|
||||||
|
|
||||||
|
def _wait_for_disk_to_become_available(self, retries, max_retries, pids,
|
||||||
|
stderr):
|
||||||
|
retries[0] += 1
|
||||||
|
if retries[0] > max_retries:
|
||||||
|
raise loopingcall.LoopingCallDone()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# NOTE(ifarkas): fuser returns a non-zero return code if none of
|
||||||
|
# the specified files is accessed
|
||||||
|
out, err = utils.execute('fuser', self._device,
|
||||||
|
check_exit_code=[0, 1], run_as_root=True)
|
||||||
|
|
||||||
|
if not out and not err:
|
||||||
|
raise loopingcall.LoopingCallDone()
|
||||||
|
else:
|
||||||
|
if err:
|
||||||
|
stderr[0] = err
|
||||||
|
if out:
|
||||||
|
pids_match = re.search(self._fuser_pids_re, out)
|
||||||
|
pids[0] = pids_match.group()
|
||||||
|
except processutils.ProcessExecutionError as exc:
|
||||||
|
LOG.warning(_('Failed to check the device %(device)s with fuser: '
|
||||||
|
' %(err)s') % {'device': self._device, 'err': exc})
|
||||||
|
|
||||||
def commit(self):
|
def commit(self):
|
||||||
"""Write to the disk."""
|
"""Write to the disk."""
|
||||||
cmd_args = ['mklabel', self._disk_label]
|
cmd_args = ['mklabel', self._disk_label]
|
||||||
@ -97,10 +146,31 @@ class DiskPartitioner(object):
|
|||||||
start = end
|
start = end
|
||||||
|
|
||||||
self._exec(*cmd_args)
|
self._exec(*cmd_args)
|
||||||
# TODO(lucasagomes): Do not sleep, use another mechanism to avoid
|
|
||||||
# the "device is busy" problem. lsof, fuser...
|
retries = [0]
|
||||||
# avoid "device is busy"
|
pids = ['']
|
||||||
time.sleep(3)
|
fuser_err = ['']
|
||||||
|
interval = CONF.disk_partitioner.check_device_interval
|
||||||
|
max_retries = CONF.disk_partitioner.check_device_max_retries
|
||||||
|
|
||||||
|
timer = loopingcall.FixedIntervalLoopingCall(
|
||||||
|
self._wait_for_disk_to_become_available,
|
||||||
|
retries, max_retries, pids, fuser_err)
|
||||||
|
timer.start(interval=interval).wait()
|
||||||
|
|
||||||
|
if retries[0] > max_retries:
|
||||||
|
if pids[0]:
|
||||||
|
raise exception.InstanceDeployFailure(
|
||||||
|
_('Disk partitioning failed on device %(device)s. '
|
||||||
|
'Processes with the following PIDs are holding it: '
|
||||||
|
'%(pids)s. Time out waiting for completion.')
|
||||||
|
% {'device': self._device, 'pids': pids[0]})
|
||||||
|
else:
|
||||||
|
raise exception.InstanceDeployFailure(
|
||||||
|
_('Disk partitioning failed on device %(device)s. Fuser '
|
||||||
|
'exited with "%(fuser_err)s". Time out waiting for '
|
||||||
|
'completion.')
|
||||||
|
% {'device': self._device, 'fuser_err': fuser_err[0]})
|
||||||
|
|
||||||
|
|
||||||
_PARTED_PRINT_RE = re.compile(r"^\d+:([\d\.]+)MiB:"
|
_PARTED_PRINT_RE = re.compile(r"^\d+:([\d\.]+)MiB:"
|
||||||
|
@ -14,10 +14,8 @@
|
|||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import itertools
|
|
||||||
import time
|
|
||||||
|
|
||||||
import fixtures
|
import fixtures
|
||||||
|
import itertools
|
||||||
import mock
|
import mock
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -490,7 +488,6 @@ class WorkOnDiskTestCase(tests_base.TestCase):
|
|||||||
commit=True)
|
commit=True)
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(time, 'sleep', lambda _: None)
|
|
||||||
@mock.patch.object(common_utils, 'execute')
|
@mock.patch.object(common_utils, 'execute')
|
||||||
class MakePartitionsTestCase(tests_base.TestCase):
|
class MakePartitionsTestCase(tests_base.TestCase):
|
||||||
|
|
||||||
@ -504,13 +501,19 @@ class MakePartitionsTestCase(tests_base.TestCase):
|
|||||||
'--', 'unit', 'MiB', 'mklabel', 'msdos']
|
'--', 'unit', 'MiB', 'mklabel', 'msdos']
|
||||||
|
|
||||||
def test_make_partitions(self, mock_exc):
|
def test_make_partitions(self, mock_exc):
|
||||||
expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513',
|
mock_exc.return_value = (None, None)
|
||||||
'mkpart', 'primary', '', '513', '1537']
|
|
||||||
cmd = self.parted_static_cmd + expected_mkpart
|
|
||||||
utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
|
utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
|
||||||
self.ephemeral_mb)
|
self.ephemeral_mb)
|
||||||
mock_exc.assert_called_once_with(*cmd, run_as_root=True,
|
|
||||||
|
expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513',
|
||||||
|
'mkpart', 'primary', '', '513', '1537']
|
||||||
|
parted_cmd = self.parted_static_cmd + expected_mkpart
|
||||||
|
parted_call = mock.call(*parted_cmd, run_as_root=True,
|
||||||
check_exit_code=[0])
|
check_exit_code=[0])
|
||||||
|
fuser_cmd = ['fuser', 'fake-dev']
|
||||||
|
fuser_call = mock.call(*fuser_cmd, run_as_root=True,
|
||||||
|
check_exit_code=[0, 1])
|
||||||
|
mock_exc.assert_has_calls([parted_call, fuser_call])
|
||||||
|
|
||||||
def test_make_partitions_with_ephemeral(self, mock_exc):
|
def test_make_partitions_with_ephemeral(self, mock_exc):
|
||||||
self.ephemeral_mb = 2048
|
self.ephemeral_mb = 2048
|
||||||
@ -518,10 +521,12 @@ class MakePartitionsTestCase(tests_base.TestCase):
|
|||||||
'mkpart', 'primary', 'linux-swap', '2049', '2561',
|
'mkpart', 'primary', 'linux-swap', '2049', '2561',
|
||||||
'mkpart', 'primary', '', '2561', '3585']
|
'mkpart', 'primary', '', '2561', '3585']
|
||||||
cmd = self.parted_static_cmd + expected_mkpart
|
cmd = self.parted_static_cmd + expected_mkpart
|
||||||
|
mock_exc.return_value = (None, None)
|
||||||
utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
|
utils.make_partitions(self.dev, self.root_mb, self.swap_mb,
|
||||||
self.ephemeral_mb)
|
self.ephemeral_mb)
|
||||||
mock_exc.assert_called_once_with(*cmd, run_as_root=True,
|
|
||||||
check_exit_code=[0])
|
parted_call = mock.call(*cmd, run_as_root=True, check_exit_code=[0])
|
||||||
|
mock_exc.assert_has_calls(parted_call)
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(utils, 'get_dev_block_size')
|
@mock.patch.object(utils, 'get_dev_block_size')
|
||||||
@ -592,8 +597,6 @@ class GetDeviceBlockSizeTestCase(tests_base.TestCase):
|
|||||||
@mock.patch.object(utils, 'block_uuid', lambda p: 'uuid')
|
@mock.patch.object(utils, 'block_uuid', lambda p: 'uuid')
|
||||||
@mock.patch.object(utils, 'dd', lambda *_: None)
|
@mock.patch.object(utils, 'dd', lambda *_: None)
|
||||||
@mock.patch.object(common_utils, 'mkfs', lambda *_: None)
|
@mock.patch.object(common_utils, 'mkfs', lambda *_: None)
|
||||||
# TODO(dtantsur): remove once https://review.openstack.org/90126 lands
|
|
||||||
@mock.patch.object(time, 'sleep', lambda *_: None)
|
|
||||||
# NOTE(dtantsur): destroy_disk_metadata resets file size, disabling it
|
# NOTE(dtantsur): destroy_disk_metadata resets file size, disabling it
|
||||||
@mock.patch.object(utils, 'destroy_disk_metadata', lambda *_: None)
|
@mock.patch.object(utils, 'destroy_disk_metadata', lambda *_: None)
|
||||||
class RealFilePartitioningTestCase(tests_base.TestCase):
|
class RealFilePartitioningTestCase(tests_base.TestCase):
|
||||||
@ -611,8 +614,10 @@ class RealFilePartitioningTestCase(tests_base.TestCase):
|
|||||||
common_utils.execute('parted', '--version')
|
common_utils.execute('parted', '--version')
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
self.skipTest('parted utility was not found: %s' % exc)
|
self.skipTest('parted utility was not found: %s' % exc)
|
||||||
self.file = tempfile.NamedTemporaryFile()
|
self.file = tempfile.NamedTemporaryFile(delete=False)
|
||||||
self.addCleanup(lambda: self.file.close())
|
# NOTE(ifarkas): the file needs to be closed, so fuser won't report
|
||||||
|
# any usage
|
||||||
|
self.file.close()
|
||||||
# NOTE(dtantsur): 20 MiB file with zeros
|
# NOTE(dtantsur): 20 MiB file with zeros
|
||||||
common_utils.execute('dd', 'if=/dev/zero', 'of=%s' % self.file.name,
|
common_utils.execute('dd', 'if=/dev/zero', 'of=%s' % self.file.name,
|
||||||
'bs=1', 'count=0', 'seek=20MiB')
|
'bs=1', 'count=0', 'seek=20MiB')
|
||||||
|
@ -13,20 +13,27 @@
|
|||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import time
|
import fixtures
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
from testtools.matchers import HasLength
|
from testtools.matchers import HasLength
|
||||||
|
|
||||||
from ironic.common import disk_partitioner
|
from ironic.common import disk_partitioner
|
||||||
|
from ironic.common import exception
|
||||||
from ironic.common import utils
|
from ironic.common import utils
|
||||||
from ironic.tests import base
|
from ironic.tests import base
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(time, 'sleep', lambda _: None)
|
|
||||||
@mock.patch.object(utils, 'execute', lambda _: None)
|
|
||||||
class DiskPartitionerTestCase(base.TestCase):
|
class DiskPartitionerTestCase(base.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(DiskPartitionerTestCase, self).setUp()
|
||||||
|
|
||||||
|
def noop(*args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.useFixture(fixtures.MonkeyPatch('eventlet.greenthread.sleep',
|
||||||
|
noop))
|
||||||
|
|
||||||
def test_add_partition(self):
|
def test_add_partition(self):
|
||||||
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
||||||
dp.add_partition(1024)
|
dp.add_partition(1024)
|
||||||
@ -49,7 +56,8 @@ class DiskPartitionerTestCase(base.TestCase):
|
|||||||
self.assertEqual(expected, partitions)
|
self.assertEqual(expected, partitions)
|
||||||
|
|
||||||
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
|
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
|
||||||
def test_commit(self, mock_exec):
|
@mock.patch.object(utils, 'execute')
|
||||||
|
def test_commit(self, mock_utils_exc, mock_disk_partitioner_exec):
|
||||||
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
||||||
fake_parts = [(1, {'bootable': False,
|
fake_parts = [(1, {'bootable': False,
|
||||||
'fs_type': 'fake-fs-type',
|
'fs_type': 'fake-fs-type',
|
||||||
@ -61,11 +69,98 @@ class DiskPartitionerTestCase(base.TestCase):
|
|||||||
'size': 1})]
|
'size': 1})]
|
||||||
with mock.patch.object(dp, 'get_partitions') as mock_gp:
|
with mock.patch.object(dp, 'get_partitions') as mock_gp:
|
||||||
mock_gp.return_value = fake_parts
|
mock_gp.return_value = fake_parts
|
||||||
|
mock_utils_exc.return_value = (None, None)
|
||||||
dp.commit()
|
dp.commit()
|
||||||
mock_exec.assert_called_once_with('mklabel', 'msdos',
|
|
||||||
|
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
|
||||||
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
|
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
|
||||||
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
|
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
|
||||||
'set', '2', 'boot', 'on')
|
'set', '2', 'boot', 'on')
|
||||||
|
mock_utils_exc.assert_called_once_with('fuser', '/dev/fake',
|
||||||
|
run_as_root=True, check_exit_code=[0, 1])
|
||||||
|
|
||||||
|
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
|
||||||
|
@mock.patch.object(utils, 'execute')
|
||||||
|
def test_commit_with_device_is_busy_once(self, mock_utils_exc,
|
||||||
|
mock_disk_partitioner_exec):
|
||||||
|
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
||||||
|
fake_parts = [(1, {'bootable': False,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1}),
|
||||||
|
(2, {'bootable': True,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1})]
|
||||||
|
fuser_outputs = [("/dev/fake: 10000 10001", None), (None, None)]
|
||||||
|
|
||||||
|
with mock.patch.object(dp, 'get_partitions') as mock_gp:
|
||||||
|
mock_gp.return_value = fake_parts
|
||||||
|
mock_utils_exc.side_effect = fuser_outputs
|
||||||
|
dp.commit()
|
||||||
|
|
||||||
|
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
|
||||||
|
'set', '2', 'boot', 'on')
|
||||||
|
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
|
||||||
|
run_as_root=True, check_exit_code=[0, 1])
|
||||||
|
self.assertEqual(2, mock_utils_exc.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
|
||||||
|
@mock.patch.object(utils, 'execute')
|
||||||
|
def test_commit_with_device_is_always_busy(self, mock_utils_exc,
|
||||||
|
mock_disk_partitioner_exec):
|
||||||
|
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
||||||
|
fake_parts = [(1, {'bootable': False,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1}),
|
||||||
|
(2, {'bootable': True,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1})]
|
||||||
|
|
||||||
|
with mock.patch.object(dp, 'get_partitions') as mock_gp:
|
||||||
|
mock_gp.return_value = fake_parts
|
||||||
|
mock_utils_exc.return_value = ("/dev/fake: 10000 10001", None)
|
||||||
|
self.assertRaises(exception.InstanceDeployFailure, dp.commit)
|
||||||
|
|
||||||
|
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
|
||||||
|
'set', '2', 'boot', 'on')
|
||||||
|
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
|
||||||
|
run_as_root=True, check_exit_code=[0, 1])
|
||||||
|
self.assertEqual(20, mock_utils_exc.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(disk_partitioner.DiskPartitioner, '_exec')
|
||||||
|
@mock.patch.object(utils, 'execute')
|
||||||
|
def test_commit_with_device_disconnected(self, mock_utils_exc,
|
||||||
|
mock_disk_partitioner_exec):
|
||||||
|
dp = disk_partitioner.DiskPartitioner('/dev/fake')
|
||||||
|
fake_parts = [(1, {'bootable': False,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1}),
|
||||||
|
(2, {'bootable': True,
|
||||||
|
'fs_type': 'fake-fs-type',
|
||||||
|
'type': 'fake-type',
|
||||||
|
'size': 1})]
|
||||||
|
|
||||||
|
with mock.patch.object(dp, 'get_partitions') as mock_gp:
|
||||||
|
mock_gp.return_value = fake_parts
|
||||||
|
mock_utils_exc.return_value = (None, "Specified filename /dev/fake"
|
||||||
|
" does not exist.")
|
||||||
|
self.assertRaises(exception.InstanceDeployFailure, dp.commit)
|
||||||
|
|
||||||
|
mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '1', '2',
|
||||||
|
'mkpart', 'fake-type', 'fake-fs-type', '2', '3',
|
||||||
|
'set', '2', 'boot', 'on')
|
||||||
|
mock_utils_exc.assert_called_with('fuser', '/dev/fake',
|
||||||
|
run_as_root=True, check_exit_code=[0, 1])
|
||||||
|
self.assertEqual(20, mock_utils_exc.call_count)
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(utils, 'execute')
|
@mock.patch.object(utils, 'execute')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user