diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index 361cf0904a..4d1a132e5e 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -15,6 +15,7 @@ import inspect import threading +import eventlet import futurist from futurist import periodics from futurist import rejection @@ -176,6 +177,13 @@ class BaseConductorManager(object): states.DEPLOYING, 'provision_updated_at', last_error=last_error) + # Start consoles if it set enabled in a greenthread. + try: + self._spawn_worker(self._start_consoles, + ironic_context.get_admin_context()) + except exception.NoFreeConductorWorker: + LOG.warning(_LW('Failed to start worker for restarting consoles.')) + # Spawn a dedicated greenthread for the keepalive try: self._spawn_worker(self._conductor_service_record_keepalive) @@ -371,3 +379,48 @@ class BaseConductorManager(object): workers_count += 1 if workers_count >= CONF.conductor.periodic_max_workers: break + + def _start_consoles(self, context): + """Start consoles if set enabled. + + :param: context: request context + """ + filters = {'console_enabled': True} + + node_iter = self.iter_nodes(filters=filters) + + for node_uuid, driver in node_iter: + try: + with task_manager.acquire(context, node_uuid, shared=False, + purpose='start console') as task: + try: + LOG.debug('Trying to start console of node %(node)s', + {'node': node_uuid}) + task.driver.console.start_console(task) + LOG.info(_LI('Successfully started console of node ' + '%(node)s'), {'node': node_uuid}) + except Exception as err: + msg = (_('Failed to start console of node %(node)s ' + 'while starting the conductor, so changing ' + 'the console_enabled status to False, error: ' + '%(err)s') + % {'node': node_uuid, 'err': err}) + LOG.error(msg) + # If starting console failed, set node console_enabled + # back to False and set node's last error. + task.node.last_error = msg + task.node.console_enabled = False + task.node.save() + except exception.NodeLocked: + LOG.warning(_LW('Node %(node)s is locked while trying to ' + 'start console on conductor startup'), + {'node': node_uuid}) + continue + except exception.NodeNotFound: + LOG.warning(_LW("During starting console on conductor " + "startup, node %(node)s was not found"), + {'node': node_uuid}) + continue + finally: + # Yield on every iteration + eventlet.sleep(0) diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index 5e25c0d3c0..7efd4e4080 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -220,6 +220,8 @@ class Connection(api.Connection): (datetime.timedelta( seconds=filters['inspection_started_before']))) query = query.filter(models.Node.inspection_started_at < limit) + if 'console_enabled' in filters: + query = query.filter_by(console_enabled=filters['console_enabled']) return query diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 02c3a75066..62a9cd1794 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -544,6 +544,8 @@ class ConsoleInterface(object): def start_console(self, task): """Start a remote console for the task's node. + This method should not raise an exception if console already started. + :param task: a TaskManager instance containing the node to act on. """ diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 1245b65375..c069b4a883 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -18,11 +18,13 @@ from futurist import periodics import mock from oslo_config import cfg from oslo_db import exception as db_exception +from oslo_utils import uuidutils from ironic.common import driver_factory from ironic.common import exception from ironic.conductor import base_manager from ironic.conductor import manager +from ironic.conductor import task_manager from ironic.drivers import base as drivers_base from ironic import objects from ironic.tests import base as tests_base @@ -218,3 +220,84 @@ class ManagerSpawnWorkerTestCase(tests_base.TestCase): self.assertRaises(exception.NoFreeConductorWorker, self.service._spawn_worker, 'fake') + + +class StartConsolesTestCase(mgr_utils.ServiceSetUpMixin, + tests_db_base.DbTestCase): + def test__start_consoles(self): + obj_utils.create_test_node(self.context, + driver='fake', + console_enabled=True) + obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake', + console_enabled=True + ) + obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake' + ) + self._start_service() + with mock.patch.object(self.driver.console, + 'start_console') as mock_start_console: + self.service._start_consoles(self.context) + self.assertEqual(2, mock_start_console.call_count) + + def test__start_consoles_no_console_enabled(self): + obj_utils.create_test_node(self.context, + driver='fake', + console_enabled=False) + self._start_service() + with mock.patch.object(self.driver.console, + 'start_console') as mock_start_console: + self.service._start_consoles(self.context) + self.assertFalse(mock_start_console.called) + + def test__start_consoles_failed(self): + test_node = obj_utils.create_test_node(self.context, + driver='fake', + console_enabled=True) + self._start_service() + with mock.patch.object(self.driver.console, + 'start_console') as mock_start_console: + mock_start_console.side_effect = Exception() + self.service._start_consoles(self.context) + mock_start_console.assert_called_once_with(mock.ANY) + test_node.refresh() + self.assertFalse(test_node.console_enabled) + self.assertIsNotNone(test_node.last_error) + + @mock.patch.object(base_manager, 'LOG') + def test__start_consoles_node_locked(self, log_mock): + test_node = obj_utils.create_test_node(self.context, + driver='fake', + console_enabled=True, + reservation='fake-host') + self._start_service() + with mock.patch.object(self.driver.console, + 'start_console') as mock_start_console: + self.service._start_consoles(self.context) + self.assertFalse(mock_start_console.called) + test_node.refresh() + self.assertTrue(test_node.console_enabled) + self.assertIsNone(test_node.last_error) + self.assertTrue(log_mock.warning.called) + + @mock.patch.object(base_manager, 'LOG') + def test__start_consoles_node_not_found(self, log_mock): + test_node = obj_utils.create_test_node(self.context, + driver='fake', + console_enabled=True) + self._start_service() + with mock.patch.object(task_manager, 'acquire') as mock_acquire: + mock_acquire.side_effect = exception.NodeNotFound(node='not found') + with mock.patch.object(self.driver.console, + 'start_console') as mock_start_console: + self.service._start_consoles(self.context) + self.assertFalse(mock_start_console.called) + test_node.refresh() + self.assertTrue(test_node.console_enabled) + self.assertIsNone(test_node.last_error) + self.assertTrue(log_mock.warning.called) diff --git a/releasenotes/notes/restart-console-on-conductor-startup-5cff6128c325b18e.yaml b/releasenotes/notes/restart-console-on-conductor-startup-5cff6128c325b18e.yaml new file mode 100644 index 0000000000..eb0d2771a4 --- /dev/null +++ b/releasenotes/notes/restart-console-on-conductor-startup-5cff6128c325b18e.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - Some nodes' console may be enabled but the corresponding console + services stopped while starting conductors, this tries to start + consoles on conductor startup to make the status consistent.