Merge "Move heartbeat processing to separate mixin class"

This commit is contained in:
Jenkins 2017-03-15 16:43:21 +00:00 committed by Gerrit Code Review
commit 82a4b288ba
2 changed files with 135 additions and 108 deletions

View File

@ -206,8 +206,8 @@ def log_and_raise_deployment_error(task, msg):
raise exception.InstanceDeployFailure(msg)
class AgentDeployMixin(object):
"""Mixin with deploy methods."""
class HeartbeatMixin(object):
"""Mixin class implementing heartbeat processing."""
def __init__(self):
self._client = _get_client()
@ -240,8 +240,104 @@ class AgentDeployMixin(object):
"""
@METRICS.timer('AgentDeployMixin._refresh_clean_steps')
def _refresh_clean_steps(self, task):
def refresh_clean_steps(self, task):
"""Refresh the node's cached clean steps
:param task: a TaskManager instance
"""
def continue_cleaning(self, task):
"""Start the next cleaning step if the previous one is complete.
:param task: a TaskManager instance
"""
@METRICS.timer('HeartbeatMixin.heartbeat')
def heartbeat(self, task, callback_url):
"""Process a heartbeat.
:param task: task to work with.
:param callback_url: agent HTTP API URL.
"""
# TODO(dtantsur): upgrade lock only if we actually take action other
# than updating the last timestamp.
task.upgrade_lock()
node = task.node
LOG.debug('Heartbeat from node %s', node.uuid)
driver_internal_info = node.driver_internal_info
driver_internal_info['agent_url'] = callback_url
# TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't
# being used so remove that entry if it exists.
# Hopefully all nodes will have been updated by Pike, so
# we can delete this code then.
driver_internal_info.pop('agent_last_heartbeat', None)
node.driver_internal_info = driver_internal_info
node.save()
# Async call backs don't set error state on their own
# TODO(jimrollenhagen) improve error messages here
msg = _('Failed checking if deploy is done.')
try:
if node.maintenance:
# this shouldn't happen often, but skip the rest if it does.
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
'not taking any action.', {'node': node.uuid})
return
elif (node.provision_state == states.DEPLOYWAIT and
not self.deploy_has_started(task)):
msg = _('Node failed to deploy.')
self.continue_deploy(task)
elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_is_done(task)):
msg = _('Node failed to move to active state.')
self.reboot_to_instance(task)
elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_has_started(task)):
node.touch_provisioning()
elif node.provision_state == states.CLEANWAIT:
node.touch_provisioning()
try:
if not node.clean_step:
LOG.debug('Node %s just booted to start cleaning.',
node.uuid)
msg = _('Node failed to start the first cleaning '
'step.')
# First, cache the clean steps
self.refresh_clean_steps(task)
# Then set/verify node clean steps and start cleaning
manager_utils.set_node_cleaning_steps(task)
_notify_conductor_resume_clean(task)
else:
msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task)
except exception.NoFreeConductorWorker:
# waiting for the next heartbeat, node.last_error and
# logging message is filled already via conductor's hook
pass
except Exception as e:
err_info = {'node': node.uuid, 'msg': msg, 'e': e}
last_error = _('Asynchronous exception for node %(node)s: '
'%(msg)s Exception: %(e)s') % err_info
LOG.exception(last_error)
if node.provision_state in (states.CLEANING, states.CLEANWAIT):
manager_utils.cleaning_error_handler(task, last_error)
elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
deploy_utils.set_failed_state(
task, last_error, collect_logs=bool(self._client))
class AgentDeployMixin(HeartbeatMixin):
"""Mixin with deploy methods."""
@METRICS.timer('AgentDeployMixin.refresh_clean_steps')
def refresh_clean_steps(self, task):
"""Refresh the node's cached clean steps from the booted agent.
Gets the node's clean steps from the booted agent and caches them.
@ -353,7 +449,7 @@ class AgentDeployMixin(object):
elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH':
# Cache the new clean steps (and 'hardware_manager_version')
try:
self._refresh_clean_steps(task)
self.refresh_clean_steps(task)
except exception.NodeCleaningFailure as e:
msg = (_('Could not continue cleaning on node '
'%(node)s: %(err)s.') %
@ -431,83 +527,6 @@ class AgentDeployMixin(object):
LOG.error(msg)
return manager_utils.cleaning_error_handler(task, msg)
@METRICS.timer('AgentDeployMixin.heartbeat')
def heartbeat(self, task, callback_url):
"""Process a heartbeat.
:param task: task to work with.
:param callback_url: agent HTTP API URL.
"""
# TODO(dtantsur): upgrade lock only if we actually take action other
# than updating the last timestamp.
task.upgrade_lock()
node = task.node
LOG.debug('Heartbeat from node %s', node.uuid)
driver_internal_info = node.driver_internal_info
driver_internal_info['agent_url'] = callback_url
# TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't
# being used so remove that entry if it exists.
# Hopefully all nodes will have been updated by Pike, so
# we can delete this code then.
driver_internal_info.pop('agent_last_heartbeat', None)
node.driver_internal_info = driver_internal_info
node.save()
# Async call backs don't set error state on their own
# TODO(jimrollenhagen) improve error messages here
msg = _('Failed checking if deploy is done.')
try:
if node.maintenance:
# this shouldn't happen often, but skip the rest if it does.
LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
'not taking any action.', {'node': node.uuid})
return
elif (node.provision_state == states.DEPLOYWAIT and
not self.deploy_has_started(task)):
msg = _('Node failed to deploy.')
self.continue_deploy(task)
elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_is_done(task)):
msg = _('Node failed to move to active state.')
self.reboot_to_instance(task)
elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_has_started(task)):
node.touch_provisioning()
elif node.provision_state == states.CLEANWAIT:
node.touch_provisioning()
try:
if not node.clean_step:
LOG.debug('Node %s just booted to start cleaning.',
node.uuid)
msg = _('Node failed to start the first cleaning '
'step.')
# First, cache the clean steps
self._refresh_clean_steps(task)
# Then set/verify node clean steps and start cleaning
manager_utils.set_node_cleaning_steps(task)
_notify_conductor_resume_clean(task)
else:
msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task)
except exception.NoFreeConductorWorker:
# waiting for the next heartbeat, node.last_error and
# logging message is filled already via conductor's hook
pass
except Exception as e:
err_info = {'node': node.uuid, 'msg': msg, 'e': e}
last_error = _('Asynchronous exception for node %(node)s: '
'%(msg)s Exception: %(e)s') % err_info
LOG.exception(last_error)
if node.provision_state in (states.CLEANING, states.CLEANWAIT):
manager_utils.cleaning_error_handler(task, last_error)
elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
deploy_utils.set_failed_state(task, last_error)
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')
def reboot_and_finish_deploy(self, task):
"""Helper method to trigger reboot on the node and finish deploy.

View File

@ -58,12 +58,16 @@ class AgentDeployMixinBaseTest(db_base.DbTestCase):
self.node = object_utils.create_test_node(self.context, **n)
class TestHeartbeat(AgentDeployMixinBaseTest):
class HeartbeatMixinTest(AgentDeployMixinBaseTest):
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
def setUp(self):
super(HeartbeatMixinTest, self).setUp()
self.deploy = agent_base_vendor.HeartbeatMixin()
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'deploy_has_started', autospec=True)
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done',
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done',
autospec=True)
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
def test_heartbeat_deploy_done_fails(self, log_mock, done_mock,
@ -75,16 +79,17 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
task.node.provision_state = states.DEPLOYWAIT
task.node.target_provision_state = states.ACTIVE
self.deploy.heartbeat(task, 'http://127.0.0.1:8080')
failed_mock.assert_called_once_with(task, mock.ANY)
failed_mock.assert_called_once_with(
task, mock.ANY, collect_logs=True)
log_mock.assert_called_once_with(
'Asynchronous exception for node '
'1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy '
'is done. Exception: LlamaException')
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'deploy_has_started', autospec=True)
@mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done',
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done',
autospec=True)
@mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
def test_heartbeat_deploy_done_raises_with_event(self, log_mock, done_mock,
@ -114,8 +119,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
'is done. Exception: LlamaException')
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
'_refresh_clean_steps', autospec=True)
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'refresh_clean_steps', autospec=True)
@mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@ -135,8 +140,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
@mock.patch.object(manager_utils, 'cleaning_error_handler')
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
'_refresh_clean_steps', autospec=True)
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'refresh_clean_steps', autospec=True)
@mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@ -169,7 +174,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
failed_mock.side_effect = None
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning(self, mock_continue, mock_touch):
self.node.clean_step = {
@ -188,7 +193,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock_continue.assert_called_once_with(mock.ANY, task)
@mock.patch.object(manager_utils, 'cleaning_error_handler')
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning_fails(self, mock_continue,
mock_handler):
@ -211,7 +216,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock_handler.assert_called_once_with(task, mock.ANY)
@mock.patch.object(manager_utils, 'cleaning_error_handler')
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning_no_worker(self, mock_continue,
mock_handler):
@ -233,9 +238,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock_continue.assert_called_once_with(mock.ANY, task)
self.assertFalse(mock_handler.called)
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy',
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'reboot_to_instance', autospec=True)
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@ -258,9 +263,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
node.refresh()
self.assertNotIn('agent_last_heartbeat', node.driver_internal_info)
@mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy',
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'reboot_to_instance', autospec=True)
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@ -281,7 +286,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
self.assertEqual(0, cd_mock.call_count)
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
'deploy_has_started', autospec=True)
def test_heartbeat_touch_provisioning(self, mock_deploy_started,
mock_touch):
@ -295,6 +300,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
mock_touch.assert_called_once_with(mock.ANY)
class AgentDeployMixinTest(AgentDeployMixinBaseTest):
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@ -966,7 +974,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
'_refresh_clean_steps', autospec=True)
'refresh_clean_steps', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def _test_continue_cleaning_clean_version_mismatch(
@ -1005,7 +1013,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
@mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(agent_base_vendor.AgentDeployMixin,
'_refresh_clean_steps', autospec=True)
'refresh_clean_steps', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_clean_version_mismatch_fail(
@ -1155,13 +1163,13 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
autospec=True)
def test__refresh_clean_steps(self, client_mock):
def test_refresh_clean_steps(self, client_mock):
client_mock.return_value = {
'command_result': self.clean_steps}
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.deploy._refresh_clean_steps(task)
self.deploy.refresh_clean_steps(task)
client_mock.assert_called_once_with(mock.ANY, task.node,
task.ports)
@ -1182,7 +1190,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
autospec=True)
def test__refresh_clean_steps_missing_steps(self, client_mock):
def test_refresh_clean_steps_missing_steps(self, client_mock):
del self.clean_steps['clean_steps']
client_mock.return_value = {
'command_result': self.clean_steps}
@ -1191,14 +1199,14 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
self.context, self.node.uuid, shared=False) as task:
self.assertRaisesRegex(exception.NodeCleaningFailure,
'invalid result',
self.deploy._refresh_clean_steps,
self.deploy.refresh_clean_steps,
task)
client_mock.assert_called_once_with(mock.ANY, task.node,
task.ports)
@mock.patch.object(agent_client.AgentClient, 'get_clean_steps',
autospec=True)
def test__refresh_clean_steps_missing_interface(self, client_mock):
def test_refresh_clean_steps_missing_interface(self, client_mock):
step = self.clean_steps['clean_steps']['SpecificHardwareManager'][1]
del step['interface']
client_mock.return_value = {
@ -1208,7 +1216,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
self.context, self.node.uuid, shared=False) as task:
self.assertRaisesRegex(exception.NodeCleaningFailure,
'invalid clean step',
self.deploy._refresh_clean_steps,
self.deploy.refresh_clean_steps,
task)
client_mock.assert_called_once_with(mock.ANY, task.node,
task.ports)