From da0b679764cd8cf98d7245b393aabc49d964cce1 Mon Sep 17 00:00:00 2001 From: Vladimir Kozhukalov Date: Fri, 10 Oct 2014 17:58:38 +0400 Subject: [PATCH] Removed sleep in execute util in fuel-agent Sometimes when parted performs some action it fails to re-read updated partition table on a disk. It occurs mostly when you run parted command several times without delay. We had sleep(1) in execute utility as a fast workaround. Looks like the issue can be addressed with just additional call or partprobe right after failed parted command. This patch impelemnts this assumption. But it is still not a real fix, it is again just a workaround. Real fix is to launch parted one time for every disk passing to it a whole table (all partition at once). We are going to implement this as soon as possible, right after addressing most prioritized issues. Change-Id: I539800271260c3d3f1b3560f8111ba8b6cacc9c4 Implements: blueprint image-based-provisioning --- fuel_agent/tests/test_partition_utils.py | 57 ++++++++++++++++---- fuel_agent/utils/partition_utils.py | 69 ++++++++++++++++++++---- fuel_agent/utils/utils.py | 5 -- 3 files changed, 107 insertions(+), 24 deletions(-) diff --git a/fuel_agent/tests/test_partition_utils.py b/fuel_agent/tests/test_partition_utils.py index 207cbc0..c1da8f4 100644 --- a/fuel_agent/tests/test_partition_utils.py +++ b/fuel_agent/tests/test_partition_utils.py @@ -29,22 +29,28 @@ class TestPartitionUtils(test_base.BaseTestCase): pu.wipe('/dev/fake') mock_label.assert_called_once_with('/dev/fake') + @mock.patch.object(pu, 'reread_partitions') @mock.patch.object(utils, 'execute') - def test_make_label(self, mock_exec): + def test_make_label(self, mock_exec, mock_rerd): # should run parted OS command # in order to create label on a device + mock_exec.return_value = ('out', '') # gpt by default pu.make_label('/dev/fake') mock_exec.assert_called_once_with( - 'parted', '-s', '/dev/fake', 'mklabel', 'gpt', check_exit_code=[0]) + 'parted', '-s', '/dev/fake', + 'mklabel', 'gpt', check_exit_code=[0, 1]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') mock_exec.reset_mock() + mock_rerd.reset_mock() # label is set explicitly pu.make_label('/dev/fake', label='msdos') mock_exec.assert_called_once_with( 'parted', '-s', '/dev/fake', - 'mklabel', 'msdos', check_exit_code=[0]) + 'mklabel', 'msdos', check_exit_code=[0, 1]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') def test_make_label_wrong_label(self): # should check if label is valid @@ -52,23 +58,28 @@ class TestPartitionUtils(test_base.BaseTestCase): self.assertRaises(errors.WrongPartitionLabelError, pu.make_label, '/dev/fake', 'wrong') + @mock.patch.object(pu, 'reread_partitions') @mock.patch.object(utils, 'execute') - def test_set_partition_flag(self, mock_exec): + def test_set_partition_flag(self, mock_exec, mock_rerd): # should run parted OS command # in order to set flag on a partition + mock_exec.return_value = ('out', '') # default state is 'on' pu.set_partition_flag('/dev/fake', 1, 'boot') mock_exec.assert_called_once_with( 'parted', '-s', '/dev/fake', 'set', '1', 'boot', 'on', - check_exit_code=[0]) + check_exit_code=[0, 1]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') mock_exec.reset_mock() + mock_rerd.reset_mock() # if state argument is given use it pu.set_partition_flag('/dev/fake', 1, 'boot', state='off') mock_exec.assert_called_once_with( 'parted', '-s', '/dev/fake', 'set', '1', 'boot', 'off', - check_exit_code=[0]) + check_exit_code=[0, 1]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') @mock.patch.object(utils, 'execute') def test_set_partition_flag_wrong_flag(self, mock_exec): @@ -86,11 +97,14 @@ class TestPartitionUtils(test_base.BaseTestCase): pu.set_partition_flag, '/dev/fake', 1, 'boot', state='wrong') + @mock.patch.object(pu, 'reread_partitions') @mock.patch.object(pu, 'info') @mock.patch.object(utils, 'execute') - def test_make_partition(self, mock_exec, mock_info): + def test_make_partition(self, mock_exec, mock_info, mock_rerd): # should run parted OS command # in order to create new partition + mock_exec.return_value = ('out', '') + mock_info.return_value = { 'parts': [ {'begin': 0, 'end': 1000, 'fstype': 'free'}, @@ -103,7 +117,8 @@ class TestPartitionUtils(test_base.BaseTestCase): '-s', '/dev/fake', 'unit', 'MiB', 'mkpart', 'primary', '100', '200', - check_exit_code=[0]) + check_exit_code=[0, 1]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') @mock.patch.object(utils, 'execute') def test_make_partition_wrong_ptype(self, mock_exec): @@ -141,11 +156,13 @@ class TestPartitionUtils(test_base.BaseTestCase): self.assertEqual(mock_info.call_args_list, [mock.call('/dev/fake')] * 3) + @mock.patch.object(pu, 'reread_partitions') @mock.patch.object(pu, 'info') @mock.patch.object(utils, 'execute') - def test_remove_partition(self, mock_exec, mock_info): + def test_remove_partition(self, mock_exec, mock_info, mock_rerd): # should run parted OS command # in order to remove partition + mock_exec.return_value = ('out', '') mock_info.return_value = { 'parts': [ { @@ -167,6 +184,7 @@ class TestPartitionUtils(test_base.BaseTestCase): pu.remove_partition('/dev/fake', 1) mock_exec.assert_called_once_with( 'parted', '-s', '/dev/fake', 'rm', '1', check_exit_code=[0]) + mock_rerd.assert_called_once_with('/dev/fake', out='out') @mock.patch.object(pu, 'info') @mock.patch.object(utils, 'execute') @@ -231,3 +249,24 @@ class TestPartitionUtils(test_base.BaseTestCase): mock_exec.assert_called_once_with('parted', '-s', '/dev/fake', '-m', 'unit', 'MiB', 'print', 'free', check_exit_code=[0, 1]) + + @mock.patch.object(utils, 'execute') + def test_reread_partitions_ok(self, mock_exec): + pu.reread_partitions('/dev/fake', out='') + self.assertEqual(mock_exec.call_args_list, []) + + @mock.patch.object(utils, 'execute') + def test_reread_partitions_device_busy(self, mock_exec): + mock_exec.return_value = ('', '') + pu.reread_partitions('/dev/fake', out='_Device or resource busy_') + mock_exec_expected = [ + mock.call('partprobe', '/dev/fake', check_exit_code=[0, 1]), + mock.call('partx', '-a', '/dev/fake', check_exit_code=[0, 1]) + ] + self.assertEqual(mock_exec.call_args_list, mock_exec_expected) + + @mock.patch.object(utils, 'execute') + def test_reread_partitions_timeout(self, mock_exec): + self.assertRaises(errors.BaseError, pu.reread_partitions, + '/dev/fake', out='Device or resource busy', + timeout=-40) diff --git a/fuel_agent/utils/partition_utils.py b/fuel_agent/utils/partition_utils.py index abbaa07..f911d89 100644 --- a/fuel_agent/utils/partition_utils.py +++ b/fuel_agent/utils/partition_utils.py @@ -12,9 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time + from fuel_agent import errors +from fuel_agent.openstack.common import log as logging from fuel_agent.utils import utils +LOG = logging.getLogger(__name__) + def parse_partition_info(output): lines = output.split('\n') @@ -48,11 +53,16 @@ def info(dev): 'unit', 'MiB', 'print', 'free', check_exit_code=[0, 1])[0] - return parse_partition_info(output) + LOG.debug('Info output: \n%s' % output) + result = parse_partition_info(output) + LOG.debug('Info result: %s' % result) + return result def wipe(dev): # making an empty new table is equivalent to wiping the old one + LOG.debug('Wiping partition table on %s (we assume it is equal ' + 'to creating a new one)' % dev) make_label(dev) @@ -64,11 +74,15 @@ def make_label(dev, label='gpt'): :returns: None """ + LOG.debug('Trying to create %s partition table on device %s' % + (label, dev)) if label not in ('gpt', 'msdos'): raise errors.WrongPartitionLabelError( 'Wrong partition label type: %s' % label) - utils.execute('parted', '-s', dev, 'mklabel', label, - check_exit_code=[0]) + out, err = utils.execute('parted', '-s', dev, 'mklabel', label, + check_exit_code=[0, 1]) + LOG.debug('Parted output: \n%s' % out) + reread_partitions(dev, out=out) def set_partition_flag(dev, num, flag, state='on'): @@ -82,6 +96,8 @@ def set_partition_flag(dev, num, flag, state='on'): :returns: None """ + LOG.debug('Trying to set partition flag: dev=%s num=%s flag=%s state=%s' % + (dev, num, flag, state)) # parted supports more flags but we are interested in # setting only this subset of them. # not all of these flags are compatible with one another. @@ -91,8 +107,10 @@ def set_partition_flag(dev, num, flag, state='on'): if state not in ('on', 'off'): raise errors.WrongPartitionSchemeError( 'Wrong partition flag state: %s' % state) - utils.execute('parted', '-s', dev, 'set', str(num), - flag, state, check_exit_code=[0]) + out, err = utils.execute('parted', '-s', dev, 'set', str(num), + flag, state, check_exit_code=[0, 1]) + LOG.debug('Parted output: \n%s' % out) + reread_partitions(dev, out=out) def set_gpt_type(dev, num, type_guid): @@ -107,11 +125,15 @@ def set_gpt_type(dev, num, type_guid): :returns: None """ # TODO(kozhukalov): check whether type_guid is valid + LOG.debug('Setting partition GUID: dev=%s num=%s guid=%s' % + (dev, num, type_guid)) utils.execute('sgdisk', '--typecode=%s:%s' % (num, type_guid), dev, check_exit_code=[0]) def make_partition(dev, begin, end, ptype): + LOG.debug('Trying to create a partition: dev=%s begin=%s end=%s' % + (dev, begin, end)) if ptype not in ('primary', 'logical'): raise errors.WrongPartitionSchemeError( 'Wrong partition type: %s' % ptype) @@ -126,15 +148,42 @@ def make_partition(dev, begin, end, ptype): end <= x['end'] for x in info(dev)['parts']): raise errors.WrongPartitionSchemeError( 'Invalid boundaries: begin and end ' - 'are not inside available free space' - ) + 'are not inside available free space') - utils.execute('parted', '-a', 'optimal', '-s', dev, 'unit', 'MiB', - 'mkpart', ptype, str(begin), str(end), check_exit_code=[0]) + out, err = utils.execute( + 'parted', '-a', 'optimal', '-s', dev, 'unit', 'MiB', + 'mkpart', ptype, str(begin), str(end), check_exit_code=[0, 1]) + LOG.debug('Parted output: \n%s' % out) + reread_partitions(dev, out=out) def remove_partition(dev, num): + LOG.debug('Trying to remove partition: dev=%s num=%s' % (dev, num)) if not any(x['fstype'] != 'free' and x['num'] == num for x in info(dev)['parts']): raise errors.PartitionNotFoundError('Partition %s not found' % num) - utils.execute('parted', '-s', dev, 'rm', str(num), check_exit_code=[0]) + out, err = utils.execute('parted', '-s', dev, 'rm', + str(num), check_exit_code=[0]) + reread_partitions(dev, out=out) + + +def reread_partitions(dev, out='Device or resource busy', timeout=30): + # The reason for this method to exist is that old versions of parted + # use ioctl(fd, BLKRRPART, NULL) to tell Linux to re-read partitions. + # This system call does not work sometimes. So we try to re-read partition + # table several times. Besides partprobe uses BLKPG instead, which + # is better than BLKRRPART for this case. BLKRRPART tells Linux to re-read + # partitions while BLKPG tells Linux which partitions are available + # BLKPG is usually used as a fallback system call. + begin = time.time() + while 'Device or resource busy' in out: + if time.time() > begin + timeout: + raise errors.BaseError('Unable to re-read partition table on' + 'device %s' % dev) + LOG.debug('Last time output contained "Device or resource busy". ' + 'Trying to re-read partition table on device %s' % dev) + out, err = utils.execute('partprobe', dev, check_exit_code=[0, 1]) + LOG.debug('Partprobe output: \n%s' % out) + pout, perr = utils.execute('partx', '-a', dev, check_exit_code=[0, 1]) + LOG.debug('Partx output: \n%s' % pout) + time.sleep(1) diff --git a/fuel_agent/utils/utils.py b/fuel_agent/utils/utils.py index f7b79e3..3d03f4e 100644 --- a/fuel_agent/utils/utils.py +++ b/fuel_agent/utils/utils.py @@ -18,7 +18,6 @@ import os import re import shlex import subprocess -import time import jinja2 import stevedore.driver @@ -75,10 +74,6 @@ def execute(*cmd, **kwargs): stderr=e, cmd=command) if len(process) >= 2: process[-2].stdout.close() - #FIXME(agordeev): added sleep for preventing parted failures - #TODO(agordeev): figure out the better way to be ensure that partition - # information was updated properly - time.sleep(1) stdout, stderr = process[-1].communicate() if not ignore_exit_code and process[-1].returncode not in check_exit_code: raise errors.ProcessExecutionError(exit_code=process[-1].returncode,