IPA: Do a soft power off at the end of deployment
For IPA we currently do a hard reboot at the end of the deployment, but that doesn't always play nice with some systems specially when they have to deal with UEFI. So this patch is changing the code in Ironic to actually tell the ramdisk to power off itself instead of hard powering off the machine from BMC. This will give the kernel the necessary time to finish sync'ing up whatever it has to sync before rebooting the node. Depends-On: I2a5f984af26bbbe03002bb8c367c8c6af8d91434 Co-Authored-By: Lucas Alvares Gomes <lucasagomes@gmail.com> Closes-Bug: #1451310 Change-Id: I831d8dc1d15c82a0caab94173c2b9f147017500f
This commit is contained in:
parent
309707bab0
commit
7adbf0622c
@ -22,6 +22,7 @@ import time
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
from oslo_utils import excutils
|
||||
import retrying
|
||||
|
||||
from ironic.common import boot_devices
|
||||
from ironic.common import exception
|
||||
@ -43,6 +44,15 @@ agent_opts = [
|
||||
cfg.IntOpt('heartbeat_timeout',
|
||||
default=300,
|
||||
help='Maximum interval (in seconds) for agent heartbeats.'),
|
||||
cfg.IntOpt('post_deploy_get_power_state_retries',
|
||||
default=6,
|
||||
help='Number of times to retry getting power state to check '
|
||||
'if bare metal node has been powered off after a soft '
|
||||
'power off.'),
|
||||
cfg.IntOpt('post_deploy_get_power_state_retry_interval',
|
||||
default=5,
|
||||
help='Amount of time (in seconds) to wait between polling '
|
||||
'power state after trigger soft poweroff.'),
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
@ -438,11 +448,37 @@ class BaseAgentVendor(base.VendorInterface):
|
||||
:param task: a TaskManager object containing the node
|
||||
:raises: InstanceDeployFailure, if node reboot failed.
|
||||
"""
|
||||
wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000
|
||||
attempts = CONF.agent.post_deploy_get_power_state_retries + 1
|
||||
|
||||
@retrying.retry(
|
||||
stop_max_attempt_number=attempts,
|
||||
retry_on_result=lambda state: state != states.POWER_OFF,
|
||||
wait_fixed=wait
|
||||
)
|
||||
def _wait_until_powered_off(task):
|
||||
return task.driver.power.get_power_state(task)
|
||||
|
||||
node = task.node
|
||||
|
||||
try:
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
try:
|
||||
self._client.power_off(node)
|
||||
_wait_until_powered_off(task)
|
||||
except Exception as e:
|
||||
LOG.warning(
|
||||
_LW('Failed to soft power off node %(node_uuid)s '
|
||||
'in at least %(timeout)d seconds. Error: %(error)s'),
|
||||
{'node_uuid': node.uuid,
|
||||
'timeout': wait * (attempts - 1),
|
||||
'error': e})
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
else:
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
except Exception as e:
|
||||
msg = (_('Error rebooting node %(node)s. Error: %(error)s') %
|
||||
{'node': task.node.uuid, 'error': e})
|
||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||
'Error: %(error)s') %
|
||||
{'node': node.uuid, 'error': e})
|
||||
self._log_and_raise_deployment_error(task, msg)
|
||||
|
||||
task.process_event('done')
|
||||
|
@ -142,3 +142,9 @@ class AgentClient(object):
|
||||
method='clean.execute_clean_step',
|
||||
params=params,
|
||||
wait=False)
|
||||
|
||||
def power_off(self, node):
|
||||
"""Soft powers off the bare metal node by shutting down ramdisk OS."""
|
||||
return self._command(node=node,
|
||||
method='standby.power_off',
|
||||
params={})
|
||||
|
@ -12,6 +12,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import types
|
||||
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
|
||||
@ -22,8 +24,10 @@ from ironic.common import keystone
|
||||
from ironic.common import pxe_utils
|
||||
from ironic.common import states
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils as manager_utils
|
||||
from ironic.drivers.modules import agent
|
||||
from ironic.drivers.modules import agent_client
|
||||
from ironic.drivers.modules import fake
|
||||
from ironic.tests.conductor import utils as mgr_utils
|
||||
from ironic.tests.db import base as db_base
|
||||
from ironic.tests.db import utils as db_utils
|
||||
@ -421,12 +425,17 @@ class TestAgentVendor(db_base.DbTestCase):
|
||||
self.assertEqual(states.ACTIVE,
|
||||
task.node.target_provision_state)
|
||||
|
||||
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch('ironic.conductor.utils.node_set_boot_device', autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.agent.AgentVendorInterface'
|
||||
'.check_deploy_success', autospec=True)
|
||||
def test_reboot_to_instance(self, check_deploy_mock, bootdev_mock,
|
||||
power_mock):
|
||||
power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock):
|
||||
check_deploy_mock.return_value = None
|
||||
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
@ -435,11 +444,15 @@ class TestAgentVendor(db_base.DbTestCase):
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=False) as task:
|
||||
get_power_state_mock.return_value = states.POWER_OFF
|
||||
self.passthru.reboot_to_instance(task)
|
||||
|
||||
check_deploy_mock.assert_called_once_with(mock.ANY, task.node)
|
||||
bootdev_mock.assert_called_once_with(task, 'disk', persistent=True)
|
||||
power_mock.assert_called_once_with(task, states.REBOOT)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
get_power_state_mock.assert_called_once_with(task)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.POWER_ON)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import time
|
||||
import types
|
||||
|
||||
import mock
|
||||
@ -27,6 +28,7 @@ from ironic.conductor import utils as manager_utils
|
||||
from ironic.drivers.modules import agent_base_vendor
|
||||
from ironic.drivers.modules import agent_client
|
||||
from ironic.drivers.modules import deploy_utils
|
||||
from ironic.drivers.modules import fake
|
||||
from ironic import objects
|
||||
from ironic.tests.conductor import utils as mgr_utils
|
||||
from ironic.tests.db import base as db_base
|
||||
@ -332,31 +334,118 @@ class TestBaseAgentVendor(db_base.DbTestCase):
|
||||
self.assertIsInstance(driver_routes, dict)
|
||||
self.assertEqual(expected, list(driver_routes))
|
||||
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
def test_reboot_and_finish_deploy_success(self, node_power_action_mock):
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy(self, power_off_mock,
|
||||
get_power_state_mock,
|
||||
node_power_action_mock):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
get_power_state_mock.side_effect = [states.POWER_ON,
|
||||
states.POWER_OFF]
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
node_power_action_mock.assert_called_once_with(task, states.REBOOT)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
self.assertEqual(2, get_power_state_mock.call_count)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.POWER_ON)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete(
|
||||
self, power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
get_power_state_mock.return_value = states.POWER_ON
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
self.assertEqual(7, get_power_state_mock.call_count)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.REBOOT)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
def test_reboot_and_finish_deploy_reboot_failure(self,
|
||||
node_power_action_mock):
|
||||
exc = exception.PowerStateFailure(pstate=states.REBOOT)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_soft_poweroff_fails(
|
||||
self, power_off_mock, node_power_action_mock):
|
||||
power_off_mock.side_effect = RuntimeError("boom")
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
node_power_action_mock.side_effect = exc
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.REBOOT)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_get_power_state_fails(
|
||||
self, power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
get_power_state_mock.side_effect = RuntimeError("boom")
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
self.assertEqual(7, get_power_state_mock.call_count)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.REBOOT)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_power_action_fails(
|
||||
self, power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
get_power_state_mock.return_value = states.POWER_ON
|
||||
node_power_action_mock.side_effect = RuntimeError("boom")
|
||||
self.assertRaises(exception.InstanceDeployFailure,
|
||||
self.passthru.reboot_and_finish_deploy, task)
|
||||
node_power_action_mock.assert_any_call(task, states.REBOOT)
|
||||
self.passthru.reboot_and_finish_deploy,
|
||||
task)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
self.assertEqual(7, get_power_state_mock.call_count)
|
||||
node_power_action_mock.assert_has_calls([
|
||||
mock.call(task, states.REBOOT),
|
||||
mock.call(task, states.POWER_OFF)])
|
||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||
|
||||
|
@ -210,3 +210,9 @@ class TestAgentClient(base.TestCase):
|
||||
self.client._command.assert_called_once_with(
|
||||
node=self.node, method='clean.execute_clean_step',
|
||||
params=expected_params, wait=False)
|
||||
|
||||
def test_power_off(self):
|
||||
self.client._command = mock.MagicMock(spec_set=[])
|
||||
self.client.power_off(self.node)
|
||||
self.client._command.assert_called_once_with(
|
||||
node=self.node, method='standby.power_off', params={})
|
||||
|
Loading…
x
Reference in New Issue
Block a user