Merge "Move heartbeat processing to separate mixin class"
This commit is contained in:
commit
82a4b288ba
@ -206,8 +206,8 @@ def log_and_raise_deployment_error(task, msg):
|
||||
raise exception.InstanceDeployFailure(msg)
|
||||
|
||||
|
||||
class AgentDeployMixin(object):
|
||||
"""Mixin with deploy methods."""
|
||||
class HeartbeatMixin(object):
|
||||
"""Mixin class implementing heartbeat processing."""
|
||||
|
||||
def __init__(self):
|
||||
self._client = _get_client()
|
||||
@ -240,8 +240,104 @@ class AgentDeployMixin(object):
|
||||
|
||||
"""
|
||||
|
||||
@METRICS.timer('AgentDeployMixin._refresh_clean_steps')
|
||||
def _refresh_clean_steps(self, task):
|
||||
def refresh_clean_steps(self, task):
|
||||
"""Refresh the node's cached clean steps
|
||||
|
||||
:param task: a TaskManager instance
|
||||
|
||||
"""
|
||||
|
||||
def continue_cleaning(self, task):
|
||||
"""Start the next cleaning step if the previous one is complete.
|
||||
|
||||
:param task: a TaskManager instance
|
||||
|
||||
"""
|
||||
|
||||
@METRICS.timer('HeartbeatMixin.heartbeat')
|
||||
def heartbeat(self, task, callback_url):
|
||||
"""Process a heartbeat.
|
||||
|
||||
:param task: task to work with.
|
||||
:param callback_url: agent HTTP API URL.
|
||||
"""
|
||||
# TODO(dtantsur): upgrade lock only if we actually take action other
|
||||
# than updating the last timestamp.
|
||||
task.upgrade_lock()
|
||||
|
||||
node = task.node
|
||||
LOG.debug('Heartbeat from node %s', node.uuid)
|
||||
|
||||
driver_internal_info = node.driver_internal_info
|
||||
driver_internal_info['agent_url'] = callback_url
|
||||
|
||||
# TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't
|
||||
# being used so remove that entry if it exists.
|
||||
# Hopefully all nodes will have been updated by Pike, so
|
||||
# we can delete this code then.
|
||||
driver_internal_info.pop('agent_last_heartbeat', None)
|
||||
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
|
||||
# Async call backs don't set error state on their own
|
||||
# TODO(jimrollenhagen) improve error messages here
|
||||
msg = _('Failed checking if deploy is done.')
|
||||
try:
|
||||
if node.maintenance:
|
||||
# this shouldn't happen often, but skip the rest if it does.
|
||||
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
|
||||
'not taking any action.', {'node': node.uuid})
|
||||
return
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
not self.deploy_has_started(task)):
|
||||
msg = _('Node failed to deploy.')
|
||||
self.continue_deploy(task)
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
self.deploy_is_done(task)):
|
||||
msg = _('Node failed to move to active state.')
|
||||
self.reboot_to_instance(task)
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
self.deploy_has_started(task)):
|
||||
node.touch_provisioning()
|
||||
elif node.provision_state == states.CLEANWAIT:
|
||||
node.touch_provisioning()
|
||||
try:
|
||||
if not node.clean_step:
|
||||
LOG.debug('Node %s just booted to start cleaning.',
|
||||
node.uuid)
|
||||
msg = _('Node failed to start the first cleaning '
|
||||
'step.')
|
||||
# First, cache the clean steps
|
||||
self.refresh_clean_steps(task)
|
||||
# Then set/verify node clean steps and start cleaning
|
||||
manager_utils.set_node_cleaning_steps(task)
|
||||
_notify_conductor_resume_clean(task)
|
||||
else:
|
||||
msg = _('Node failed to check cleaning progress.')
|
||||
self.continue_cleaning(task)
|
||||
except exception.NoFreeConductorWorker:
|
||||
# waiting for the next heartbeat, node.last_error and
|
||||
# logging message is filled already via conductor's hook
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
err_info = {'node': node.uuid, 'msg': msg, 'e': e}
|
||||
last_error = _('Asynchronous exception for node %(node)s: '
|
||||
'%(msg)s Exception: %(e)s') % err_info
|
||||
LOG.exception(last_error)
|
||||
if node.provision_state in (states.CLEANING, states.CLEANWAIT):
|
||||
manager_utils.cleaning_error_handler(task, last_error)
|
||||
elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
|
||||
deploy_utils.set_failed_state(
|
||||
task, last_error, collect_logs=bool(self._client))
|
||||
|
||||
|
||||
class AgentDeployMixin(HeartbeatMixin):
|
||||
"""Mixin with deploy methods."""
|
||||
|
||||
@METRICS.timer('AgentDeployMixin.refresh_clean_steps')
|
||||
def refresh_clean_steps(self, task):
|
||||
"""Refresh the node's cached clean steps from the booted agent.
|
||||
|
||||
Gets the node's clean steps from the booted agent and caches them.
|
||||
@ -353,7 +449,7 @@ class AgentDeployMixin(object):
|
||||
elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
|
||||
# Cache the new clean steps (and 'hardware_manager_version')
|
||||
try:
|
||||
self._refresh_clean_steps(task)
|
||||
self.refresh_clean_steps(task)
|
||||
except exception.NodeCleaningFailure as e:
|
||||
msg = (_('Could not continue cleaning on node '
|
||||
'%(node)s: %(err)s.') %
|
||||
@ -431,83 +527,6 @@ class AgentDeployMixin(object):
|
||||
LOG.error(msg)
|
||||
return manager_utils.cleaning_error_handler(task, msg)
|
||||
|
||||
@METRICS.timer('AgentDeployMixin.heartbeat')
|
||||
def heartbeat(self, task, callback_url):
|
||||
"""Process a heartbeat.
|
||||
|
||||
:param task: task to work with.
|
||||
:param callback_url: agent HTTP API URL.
|
||||
"""
|
||||
# TODO(dtantsur): upgrade lock only if we actually take action other
|
||||
# than updating the last timestamp.
|
||||
task.upgrade_lock()
|
||||
|
||||
node = task.node
|
||||
LOG.debug('Heartbeat from node %s', node.uuid)
|
||||
|
||||
driver_internal_info = node.driver_internal_info
|
||||
driver_internal_info['agent_url'] = callback_url
|
||||
|
||||
# TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't
|
||||
# being used so remove that entry if it exists.
|
||||
# Hopefully all nodes will have been updated by Pike, so
|
||||
# we can delete this code then.
|
||||
driver_internal_info.pop('agent_last_heartbeat', None)
|
||||
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
|
||||
# Async call backs don't set error state on their own
|
||||
# TODO(jimrollenhagen) improve error messages here
|
||||
msg = _('Failed checking if deploy is done.')
|
||||
try:
|
||||
if node.maintenance:
|
||||
# this shouldn't happen often, but skip the rest if it does.
|
||||
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
|
||||
'not taking any action.', {'node': node.uuid})
|
||||
return
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
not self.deploy_has_started(task)):
|
||||
msg = _('Node failed to deploy.')
|
||||
self.continue_deploy(task)
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
self.deploy_is_done(task)):
|
||||
msg = _('Node failed to move to active state.')
|
||||
self.reboot_to_instance(task)
|
||||
elif (node.provision_state == states.DEPLOYWAIT and
|
||||
self.deploy_has_started(task)):
|
||||
node.touch_provisioning()
|
||||
elif node.provision_state == states.CLEANWAIT:
|
||||
node.touch_provisioning()
|
||||
try:
|
||||
if not node.clean_step:
|
||||
LOG.debug('Node %s just booted to start cleaning.',
|
||||
node.uuid)
|
||||
msg = _('Node failed to start the first cleaning '
|
||||
'step.')
|
||||
# First, cache the clean steps
|
||||
self._refresh_clean_steps(task)
|
||||
# Then set/verify node clean steps and start cleaning
|
||||
manager_utils.set_node_cleaning_steps(task)
|
||||
_notify_conductor_resume_clean(task)
|
||||
else:
|
||||
msg = _('Node failed to check cleaning progress.')
|
||||
self.continue_cleaning(task)
|
||||
except exception.NoFreeConductorWorker:
|
||||
# waiting for the next heartbeat, node.last_error and
|
||||
# logging message is filled already via conductor's hook
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
err_info = {'node': node.uuid, 'msg': msg, 'e': e}
|
||||
last_error = _('Asynchronous exception for node %(node)s: '
|
||||
'%(msg)s Exception: %(e)s') % err_info
|
||||
LOG.exception(last_error)
|
||||
if node.provision_state in (states.CLEANING, states.CLEANWAIT):
|
||||
manager_utils.cleaning_error_handler(task, last_error)
|
||||
elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
|
||||
deploy_utils.set_failed_state(task, last_error)
|
||||
|
||||
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')
|
||||
def reboot_and_finish_deploy(self, task):
|
||||
"""Helper method to trigger reboot on the node and finish deploy.
|
||||
|
@ -58,12 +58,16 @@ class AgentDeployMixinBaseTest(db_base.DbTestCase):
|
||||
self.node = object_utils.create_test_node(self.context, **n)
|
||||
|
||||
|
||||
class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
def setUp(self):
|
||||
super(HeartbeatMixinTest, self).setUp()
|
||||
self.deploy = agent_base_vendor.HeartbeatMixin()
|
||||
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'deploy_has_started', autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done',
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
|
||||
def test_heartbeat_deploy_done_fails(self, log_mock, done_mock,
|
||||
@ -75,16 +79,17 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
task.node.provision_state = states.DEPLOYWAIT
|
||||
task.node.target_provision_state = states.ACTIVE
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080')
|
||||
failed_mock.assert_called_once_with(task, mock.ANY)
|
||||
failed_mock.assert_called_once_with(
|
||||
task, mock.ANY, collect_logs=True)
|
||||
log_mock.assert_called_once_with(
|
||||
'Asynchronous exception for node '
|
||||
'1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy '
|
||||
'is done. Exception: LlamaException')
|
||||
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'deploy_has_started', autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done',
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
|
||||
def test_heartbeat_deploy_done_raises_with_event(self, log_mock, done_mock,
|
||||
@ -114,8 +119,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
'is done. Exception: LlamaException')
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
'_refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@ -135,8 +140,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler')
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
'_refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@ -169,7 +174,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
failed_mock.side_effect = None
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'continue_cleaning', autospec=True)
|
||||
def test_heartbeat_continue_cleaning(self, mock_continue, mock_touch):
|
||||
self.node.clean_step = {
|
||||
@ -188,7 +193,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
mock_continue.assert_called_once_with(mock.ANY, task)
|
||||
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler')
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'continue_cleaning', autospec=True)
|
||||
def test_heartbeat_continue_cleaning_fails(self, mock_continue,
|
||||
mock_handler):
|
||||
@ -211,7 +216,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
mock_handler.assert_called_once_with(task, mock.ANY)
|
||||
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler')
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'continue_cleaning', autospec=True)
|
||||
def test_heartbeat_continue_cleaning_no_worker(self, mock_continue,
|
||||
mock_handler):
|
||||
@ -233,9 +238,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
mock_continue.assert_called_once_with(mock.ANY, task)
|
||||
self.assertFalse(mock_handler.called)
|
||||
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy',
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'reboot_to_instance', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@ -258,9 +263,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
node.refresh()
|
||||
self.assertNotIn('agent_last_heartbeat', node.driver_internal_info)
|
||||
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy',
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'reboot_to_instance', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@ -281,7 +286,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
self.assertEqual(0, cd_mock.call_count)
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'deploy_has_started', autospec=True)
|
||||
def test_heartbeat_touch_provisioning(self, mock_deploy_started,
|
||||
mock_touch):
|
||||
@ -295,6 +300,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
|
||||
|
||||
class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
||||
|
||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@ -966,7 +974,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
'_refresh_clean_steps', autospec=True)
|
||||
'refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def _test_continue_cleaning_clean_version_mismatch(
|
||||
@ -1005,7 +1013,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
|
||||
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
|
||||
'_refresh_clean_steps', autospec=True)
|
||||
'refresh_clean_steps', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_continue_cleaning_clean_version_mismatch_fail(
|
||||
@ -1155,13 +1163,13 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
||||
autospec=True)
|
||||
def test__refresh_clean_steps(self, client_mock):
|
||||
def test_refresh_clean_steps(self, client_mock):
|
||||
client_mock.return_value = {
|
||||
'command_result': self.clean_steps}
|
||||
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy._refresh_clean_steps(task)
|
||||
self.deploy.refresh_clean_steps(task)
|
||||
|
||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||
task.ports)
|
||||
@ -1182,7 +1190,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
||||
autospec=True)
|
||||
def test__refresh_clean_steps_missing_steps(self, client_mock):
|
||||
def test_refresh_clean_steps_missing_steps(self, client_mock):
|
||||
del self.clean_steps['clean_steps']
|
||||
client_mock.return_value = {
|
||||
'command_result': self.clean_steps}
|
||||
@ -1191,14 +1199,14 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertRaisesRegex(exception.NodeCleaningFailure,
|
||||
'invalid result',
|
||||
self.deploy._refresh_clean_steps,
|
||||
self.deploy.refresh_clean_steps,
|
||||
task)
|
||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||
task.ports)
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
|
||||
autospec=True)
|
||||
def test__refresh_clean_steps_missing_interface(self, client_mock):
|
||||
def test_refresh_clean_steps_missing_interface(self, client_mock):
|
||||
step = self.clean_steps['clean_steps']['SpecificHardwareManager'][1]
|
||||
del step['interface']
|
||||
client_mock.return_value = {
|
||||
@ -1208,7 +1216,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertRaisesRegex(exception.NodeCleaningFailure,
|
||||
'invalid clean step',
|
||||
self.deploy._refresh_clean_steps,
|
||||
self.deploy.refresh_clean_steps,
|
||||
task)
|
||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||
task.ports)
|
||||
|
Loading…
x
Reference in New Issue
Block a user