From 8add0e4c55fe5b753c5ae9f1ef34bea8dd6403ef Mon Sep 17 00:00:00 2001 From: Joshua Kraitberg Date: Fri, 19 Jul 2024 16:02:50 -0400 Subject: [PATCH] Implement sw-deploy-strategy rollback for AIO-SX Adds sequence and initial sw-deploy abort code. Initial support is only for AIO-SX. Called like so: sw-manager sw-deploy-strategy create --rollback TEST PLAN PASS: AIO-SX sw-deploy-strategy (major) * Trigger from deploy-failed Depends-On: https://review.opendev.org/c/starlingx/nfv/+/925899 Depends-On: https://review.opendev.org/c/starlingx/nfv/+/926587 Story: 2011045 Task: 50880 Change-Id: If051dac9847aaefb1542e96c1824a18c739e2ce7 Signed-off-by: Joshua Kraitberg --- nfv/nfv-client/scripts/sw-manager.completion | 5 + .../nfvi_plugins/nfvi_infrastructure_api.py | 168 ++++++- .../nfv_plugins/nfvi_plugins/openstack/usm.py | 35 +- .../tests/test_sw_deploy_strategy.py | 438 ++++++++++++++++++ .../nfv_vim/directors/_host_director.py | 8 +- nfv/nfv-vim/nfv_vim/nfvi/__init__.py | 2 + .../nfvi/_nfvi_infrastructure_module.py | 22 +- .../nfv_vim/nfvi/objects/v1/_upgrade.py | 40 ++ nfv/nfv-vim/nfv_vim/strategy/__init__.py | 2 + nfv/nfv-vim/nfv_vim/strategy/_strategy.py | 179 ++++++- .../nfv_vim/strategy/_strategy_stages.py | 2 + .../nfv_vim/strategy/_strategy_steps.py | 216 ++++++++- 12 files changed, 1088 insertions(+), 29 deletions(-) diff --git a/nfv/nfv-client/scripts/sw-manager.completion b/nfv/nfv-client/scripts/sw-manager.completion index 6d25d841..9a13c98f 100755 --- a/nfv/nfv-client/scripts/sw-manager.completion +++ b/nfv/nfv-client/scripts/sw-manager.completion @@ -53,6 +53,7 @@ function _swmanager() --max-parallel-worker-hosts --instance-action --alarm-restrictions + --rollback " local createopt=${prev} case "$createopt" in @@ -80,6 +81,10 @@ function _swmanager() COMPREPLY=($(compgen -W "strict relaxed permissive" -- ${cur})) return 0 ;; + --rollback) + COMPREPLY=($(compgen -W "${createopts}" -- ${cur})) + return 0 + ;; *) ;; esac diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py index 02f5fa12..9750a000 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py @@ -2686,6 +2686,166 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): callback.send(response) callback.close() + def sw_deploy_abort(self, future, callback): + """ + Abort a USM software deployement + """ + response = dict() + response['completed'] = False + response['reason'] = '' + response['complete-data'] = '' + + try: + future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) + + if self._platform_token is None or \ + self._platform_token.is_expired(): + future.work(openstack.get_token, self._platform_directory) + future.result = (yield) + + if not future.result.is_complete() or \ + future.result.data is None: + DLOG.error("OpenStack get-token did not complete.") + return + + self._platform_token = future.result.data + + future.work(usm.sw_deploy_abort, self._platform_token) + future.result = (yield) + + if not future.result.is_complete(): + DLOG.error("USM software deploy abort did not complete.") + return + + response['complete-data'] = future.result.data + + future.work(usm.sw_deploy_get_upgrade_obj, self._platform_token, None) + future.result = (yield) + + if not future.result.is_complete(): + error_msg = ( + "Could not obtain deployment information from USM, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + response['error-message'] = error_msg + return + + response['result-data'] = future.result.data + response['completed'] = True + + except exceptions.OpenStackRestAPIException as e: + x = json.loads(e.http_response_body) + error_msg = x.get("error", x.get("info")) + if httplib.UNAUTHORIZED == e.http_status_code: + response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED + if self._platform_token is not None: + self._platform_token.set_expired() + + elif httplib.NOT_ACCEPTABLE == e.http_status_code: + if not error_msg: + error_msg = ( + "Unknown error while trying software deploy abort, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + else: + error_msg = f"Software deploy abort was rejected: {error_msg}" + + elif not error_msg: + error_msg = f"Caught exception while trying software deploy abort, error={e}" + + if error_msg: + response["error-message"] = error_msg.strip() + DLOG.exception(error_msg) + + except Exception as e: + error_msg = f"Caught exception while trying software deploy abort, error={e}" + response["error-message"] = error_msg + DLOG.exception(error_msg) + + finally: + callback.send(response) + callback.close() + + def sw_deploy_activate_rollback(self, future, callback): + """ + Activate rollback a USM software deployement + """ + response = dict() + response['completed'] = False + response['reason'] = '' + response['complete-data'] = '' + + try: + future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) + + if self._platform_token is None or \ + self._platform_token.is_expired(): + future.work(openstack.get_token, self._platform_directory) + future.result = (yield) + + if not future.result.is_complete() or \ + future.result.data is None: + DLOG.error("OpenStack get-token did not complete.") + return + + self._platform_token = future.result.data + + future.work(usm.sw_deploy_activate_rollback, self._platform_token) + future.result = (yield) + + if not future.result.is_complete(): + DLOG.error("USM software deploy activate did not complete.") + return + + response['complete-data'] = future.result.data + + future.work(usm.sw_deploy_get_upgrade_obj, self._platform_token, None) + future.result = (yield) + + if not future.result.is_complete(): + error_msg = ( + "Could not obtain deployment information from USM, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + response['error-message'] = error_msg + return + + response['result-data'] = future.result.data + response['completed'] = True + + except exceptions.OpenStackRestAPIException as e: + x = json.loads(e.http_response_body) + error_msg = x.get("error", x.get("info")) + if httplib.UNAUTHORIZED == e.http_status_code: + response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED + if self._platform_token is not None: + self._platform_token.set_expired() + + elif httplib.NOT_ACCEPTABLE == e.http_status_code: + if not error_msg: + error_msg = ( + "Unknown error while trying software deploy activate-rollback, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + else: + error_msg = f"Software deploy activate was rejected: {error_msg}" + + elif not error_msg: + error_msg = f"Caught exception while trying software deploy activate-rollback, error={e}" + + if error_msg: + response["error-message"] = error_msg.strip() + DLOG.exception(error_msg) + + except Exception as e: + error_msg = f"Caught exception while trying software deploy activate-rollback, error={e}" + response["error-message"] = error_msg + DLOG.exception(error_msg) + + finally: + callback.send(response) + callback.close() + def delete_host_services(self, future, host_uuid, host_name, host_personality, callback): """ @@ -3816,7 +3976,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): callback.send(response) callback.close() - def upgrade_host(self, future, host_uuid, host_name, callback): + def upgrade_host(self, future, host_uuid, host_name, rollback, callback): """ Upgrade a host """ @@ -3826,6 +3986,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): response['host_name'] = host_name response['reason'] = '' response['complete-data'] = '' + kind = "deploy" if not rollback else "rollback" + method = usm.sw_deploy_execute if not rollback else usm.sw_deploy_rollback try: future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) @@ -3843,10 +4005,10 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): self._platform_token = future.result.data - future.work(usm.sw_deploy_execute, self._platform_token, host_name) + future.work(method, self._platform_token, host_name) future.result = (yield) if not future.result.is_complete(): - DLOG.error("USM software deploy host %s did not complete." % host_name) + DLOG.error(f"USM software {kind} host %s did not complete." % host_name) return response['completed'] = True diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/usm.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/usm.py index 97d9875f..d234a6a0 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/usm.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/usm.py @@ -67,7 +67,7 @@ def sw_deploy_get_releases(token): Query USM for information about all releases """ - uri = f"release" # noqa:F541 pylint: disable=W1309 + uri = "release" # noqa:F541 pylint: disable=W1309 url = _usm_api_cmd(token, uri) response = _api_get(token, url) return response @@ -130,6 +130,17 @@ def sw_deploy_execute(token, host_name): return response +def sw_deploy_rollback(token, host_name): + """ + Ask USM to rollback a deployment on a host + """ + + uri = f"deploy_host/{host_name}/rollback" + url = _usm_api_cmd(token, uri) + response = _api_post(token, url, {}) + return response + + def sw_deploy_activate(token): """ Ask USM activate a deployment @@ -152,6 +163,28 @@ def sw_deploy_complete(token): return response +def sw_deploy_abort(token): + """ + Ask USM abort a deployment + """ + + uri = f"deploy/abort" # noqa:F541 pylint: disable=W1309 + url = _usm_api_cmd(token, uri) + response = _api_post(token, url, {}) + return response + + +def sw_deploy_activate_rollback(token): + """ + Ask USM activate rollback a deployment + """ + + uri = f"deploy/activate-rollback" # noqa:F541 pylint: disable=W1309 + url = _usm_api_cmd(token, uri) + response = _api_post(token, url, {}) + return response + + def sw_deploy_get_upgrade_obj(token, release): """Quickly gather all information about a software deployment""" diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_deploy_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_deploy_strategy.py index 84157379..cedb66d8 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_deploy_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_deploy_strategy.py @@ -1826,3 +1826,441 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase): sw_update_testcase.validate_strategy_persists(strategy) sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_complete(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - complete + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'completed', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_active_done(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - activate-deon + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'activate-done', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_activate_failed(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - activate-failed + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'activate-failed', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_host_done(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - host-done + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'host-done', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_host_failed(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - host-failed + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'host-failed', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) + + def test_sw_deploy_strategy_aiosx_rollback_from_host(self): + """ + Test the sw_deploy strategy apply phase: + - aio-sx + - major + - host + Verify: + - Pass + """ + + release = '888.8' + _, strategy = self._gen_aiosx_hosts_and_strategy( + release=None, + rollback=True, + nfvi_upgrade=nfvi.objects.v1.Upgrade( + release, + { + 'state': 'deploying', + 'reboot_required': True, + 'sw_version': MAJOR_RELEASE_UPGRADE, + }, + { + 'state': 'host', + 'from_release': INITIAL_RELEASE, + 'to_release': MAJOR_RELEASE_UPGRADE, + }, + None, + ) + ) + + fake_upgrade_obj = SwUpgrade() + strategy.sw_update_obj = fake_upgrade_obj + + strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "") + apply_phase = strategy.apply_phase.as_dict() + + expected_results = { + 'total_stages': 3, + 'stages': [ + { + 'name': 'sw-upgrade-rollback-start', + 'total_steps': 3, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'sw-deploy-abort'}, + {'name': 'sw-deploy-activate-rollback'}, + ], + }, + { + 'name': 'sw-upgrade-worker-hosts', + 'total_steps': 6, + 'steps': [ + {'name': 'query-alarms'}, + {'name': 'lock-hosts', 'entity_names': ['controller-0']}, + {'name': 'upgrade-hosts', 'entity_names': ['controller-0']}, + {'name': 'system-stabilize', 'timeout': 15}, + {'name': 'unlock-hosts'}, + {'name': 'wait-alarms-clear', 'timeout': 2400}, + ] + }, + { + 'name': 'sw-upgrade-rollback-complete', + 'total_steps': 1, + 'steps': [ + {'name': 'query-alarms'}, + ], + }, + ], + } + + sw_update_testcase.validate_strategy_persists(strategy) + sw_update_testcase.validate_phase(apply_phase, expected_results) diff --git a/nfv/nfv-vim/nfv_vim/directors/_host_director.py b/nfv/nfv-vim/nfv_vim/directors/_host_director.py index 54629aaa..7ee32fce 100755 --- a/nfv/nfv-vim/nfv_vim/directors/_host_director.py +++ b/nfv/nfv-vim/nfv_vim/directors/_host_director.py @@ -305,11 +305,11 @@ class HostDirector(object): sw_mgmt_director = directors.get_sw_mgmt_director() sw_mgmt_director.host_upgrade_changed(result) - def _nfvi_upgrade_host(self, host_uuid, host_name): + def _nfvi_upgrade_host(self, host_uuid, host_name, rollback): """ NFVI Upgrade Host """ - nfvi.nfvi_upgrade_host(host_uuid, host_name, + nfvi.nfvi_upgrade_host(host_uuid, host_name, rollback, self._nfvi_upgrade_host_callback()) @coroutine @@ -680,7 +680,7 @@ class HostDirector(object): return host_operation - def upgrade_hosts(self, host_names): + def upgrade_hosts(self, host_names, rollback): """ Upgrade a list of hosts """ @@ -705,7 +705,7 @@ class HostDirector(object): return host_operation host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS) - self._nfvi_upgrade_host(host.uuid, host.name) + self._nfvi_upgrade_host(host.uuid, host.name, rollback=rollback) if host_operation.is_inprogress(): self._host_operation = host_operation diff --git a/nfv/nfv-vim/nfv_vim/nfvi/__init__.py b/nfv/nfv-vim/nfv_vim/nfvi/__init__.py index 422d9a99..a9a2d5e4 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/__init__.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/__init__.py @@ -151,6 +151,8 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_state_ch from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_update_callback # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_upgrade_callback # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_sw_update_get_callback # noqa: F401 +from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_abort # noqa: F401 +from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_activate_rollback # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_precheck # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_swact_from_host # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_unlock_host # noqa: F401 diff --git a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py index cf4ee014..fea1e9ae 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py @@ -439,6 +439,24 @@ def nfvi_upgrade_complete(release, callback): return cmd_id +def nfvi_sw_deploy_abort(callback): + """ + Software deploy abort + """ + cmd_id = _infrastructure_plugin.invoke_plugin('sw_deploy_abort', + callback=callback) + return cmd_id + + +def nfvi_sw_deploy_activate_rollback(callback): + """ + Software deploy activate rollback + """ + cmd_id = _infrastructure_plugin.invoke_plugin('sw_deploy_activate_rollback', + callback=callback) + return cmd_id + + def nfvi_disable_container_host_services(host_uuid, host_name, host_personality, host_offline, callback): @@ -578,12 +596,12 @@ def nfvi_reboot_host(host_uuid, host_name, callback): return cmd_id -def nfvi_upgrade_host(host_uuid, host_name, callback): +def nfvi_upgrade_host(host_uuid, host_name, rollback, callback): """ Upgrade a host """ cmd_id = _infrastructure_plugin.invoke_plugin('upgrade_host', host_uuid, - host_name, callback=callback) + host_name, rollback, callback=callback) return cmd_id diff --git a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_upgrade.py b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_upgrade.py index 321ef236..230d7e4e 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_upgrade.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_upgrade.py @@ -137,6 +137,38 @@ class Upgrade(ObjectData): def is_activate_failed(self): return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_FAILED.value + @property + def is_rollback(self): + return self.deploy_state and "rollback" in self.deploy_state + + @property + def is_activate_rollback(self): + return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK.value + + @property + def is_activate_rollback_pending(self): + return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING.value + + @property + def is_activate_rollback_done(self): + return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE.value + + @property + def is_activate_rollback_failed(self): + return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value + + @property + def is_rollback_hosts(self): + return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK.value + + @property + def is_rollback_hosts_done(self): + return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK_DONE.value + + @property + def is_rollback_hosts_failed(self): + return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK_FAILED.value + @property def is_deploy_completed(self): return self.deploy_state == usm_states.DEPLOY_STATES.COMPLETED.value @@ -155,3 +187,11 @@ class Upgrade(ObjectData): for v in self.hosts_info: if v["hostname"] == hostname: return v["host_state"] == usm_states.DEPLOY_HOST_STATES.DEPLOYED.value + + def is_host_pending(self, hostname): + if not self.hosts_info: + return None + + for v in self.hosts_info: + if v["hostname"] == hostname: + return v["host_state"] == usm_states.DEPLOY_HOST_STATES.PENDING.value diff --git a/nfv/nfv-vim/nfv_vim/strategy/__init__.py b/nfv/nfv-vim/nfv_vim/strategy/__init__.py index 008ae29a..21be0d8e 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/__init__.py +++ b/nfv/nfv-vim/nfv_vim/strategy/__init__.py @@ -58,6 +58,8 @@ from nfv_vim.strategy._strategy_steps import StartInstancesStep # noqa: F401 from nfv_vim.strategy._strategy_steps import StopInstancesStep # noqa: F401 from nfv_vim.strategy._strategy_steps import STRATEGY_STEP_NAME # noqa: F401 from nfv_vim.strategy._strategy_steps import SwactHostsStep # noqa: F401 +from nfv_vim.strategy._strategy_steps import SwDeployAbortStep # noqa: F401 +from nfv_vim.strategy._strategy_steps import SwDeployActivateRollbackStep # noqa: F401 from nfv_vim.strategy._strategy_steps import SwDeployPrecheckStep # noqa: F401 from nfv_vim.strategy._strategy_steps import SwPatchHostsStep # noqa: F401 from nfv_vim.strategy._strategy_steps import SystemConfigUpdateHostsStep # noqa: F401 diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py index fd9cbf78..16232f63 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py @@ -1259,8 +1259,16 @@ class UpdateWorkerHostsMixin(object): hosts_to_lock = list() hosts_to_reboot = list() if reboot: - hosts_to_lock = [x for x in host_list if not x.is_locked()] - hosts_to_reboot = [x for x in host_list if x.is_locked()] + if ( + isinstance(self, SwUpgradeStrategy) and + any(HOST_PERSONALITY.CONTROLLER in v.personality for v in host_list) + ): + # Always lock/unlock controllers during rollback/upgrade + hosts_to_lock = host_list + hosts_to_reboot = [] + else: + hosts_to_lock = [x for x in host_list if not x.is_locked()] + hosts_to_reboot = [x for x in host_list if x.is_locked()] stage = strategy.StrategyStage(strategy_stage_name) @@ -1828,14 +1836,12 @@ class SwUpgradeStrategy( super(SwUpgradeStrategy, self).build() def _build_rollback(self): - reason = "Rollback not supported yet." - DLOG.warn(reason) - self._state = strategy.STRATEGY_STATE.BUILD_FAILED - self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED - self.build_phase.result_reason = reason - self.sw_update_obj.strategy_build_complete( - False, self.build_phase.result_reason) - self.save() + from nfv_vim import strategy + + stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_QUERY) + stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms)) + stage.add_step(strategy.QueryUpgradeStep(release=None)) + self.build_phase.add_stage(stage) super(SwUpgradeStrategy, self).build() def build(self): @@ -2109,15 +2115,152 @@ class SwUpgradeStrategy( self.sw_update_obj.strategy_build_complete(True, '') self.save() + def _add_rollback_start_stage(self): + """ + Add rollback start strategy stage + """ + + from nfv_vim import strategy + + stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_ROLLBACK_START) + + stage.add_step(strategy.QueryAlarmsStep(fail_on_alarms=False, ignore_alarms=self._ignore_alarms)) + stage.add_step(strategy.SwDeployAbortStep()) + stage.add_step(strategy.SwDeployActivateRollbackStep()) + self.apply_phase.add_stage(stage) + + def _add_rollback_hosts_stages(self): + """ + Add rollback hosts strategy stage + """ + + from nfv_vim import strategy + from nfv_vim import tables + + host_table = tables.tables_get_host_table() + reboot_required = self.nfvi_upgrade.reboot_required + controller_strategy = self._add_controller_strategy_stages + controllers_hosts = list() + storage_hosts = list() + worker_hosts = list() + + for host in host_table.values(): + if self.nfvi_upgrade.is_host_pending(host.name): + DLOG.info("Skipping host-rollback for pending host: {host.name}") + continue + + if HOST_PERSONALITY.CONTROLLER in host.personality: + controllers_hosts.append(host) + if HOST_PERSONALITY.WORKER in host.personality: + # We need to use this strategy on AIO type + controller_strategy = self._add_worker_strategy_stages + + elif HOST_PERSONALITY.STORAGE in host.personality: + storage_hosts.append(host) + + elif HOST_PERSONALITY.WORKER in host.personality: + worker_hosts.append(host) + + else: + DLOG.error(f"Unsupported personality for host {host.name}.") + self._state = strategy.STRATEGY_STATE.BUILD_FAILED + self.build_phase.result = \ + strategy.STRATEGY_PHASE_RESULT.FAILED + self.build_phase.result_reason = \ + 'Unsupported personality for host' + self.sw_update_obj.strategy_build_complete( + False, self.build_phase.result_reason) + self.save() + return + + # Sort the controller such that host other than + # current local_host_name is the first element in the list. + # This sorting is to reduce the number of swact required since + # sw-deploy patch release orchestration can start on host that + # is currently active. + local_host_name = get_local_host_name() + controllers_hosts = sorted( + controllers_hosts, + key=lambda x: x.name == local_host_name, + ) + + strategy_pairs = [ + (self._add_worker_strategy_stages, worker_hosts), + (self._add_storage_strategy_stages, storage_hosts), + (controller_strategy, controllers_hosts), + ] + + for stage_func, host_list in strategy_pairs: + if host_list: + success, reason = stage_func(host_list, reboot_required) + if not success: + self._state = strategy.STRATEGY_STATE.BUILD_FAILED + self.build_phase.result = \ + strategy.STRATEGY_PHASE_RESULT.FAILED + self.build_phase.result_reason = reason + self.sw_update_obj.strategy_build_complete( + False, self.build_phase.result_reason) + self.save() + return + + def _add_rollback_complete_stage(self): + """ + Add rollback complete strategy stage + """ + from nfv_vim import strategy + + stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_ROLLBACK_COMPLETE) + + stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms)) + self.apply_phase.add_stage(stage) + def _build_complete_rollback(self, result, result_reason): - reason = "Rollback not supported yet." - DLOG.warn(reason) - self._state = strategy.STRATEGY_STATE.BUILD_FAILED - self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED - self.build_phase.result_reason = reason - self.sw_update_obj.strategy_build_complete( - False, self.build_phase.result_reason) - self.save() + from nfv_vim import strategy + + reason = "" + result, result_reason = \ + super(SwUpgradeStrategy, self).build_complete(result, result_reason) + + DLOG.info("Build Complete Callback, result=%s, reason=%s." + % (result, result_reason)) + + if result not in [strategy.STRATEGY_RESULT.SUCCESS, strategy.STRATEGY_RESULT.DEGRADED]: + self.sw_update_obj.strategy_build_complete( + False, self.build_phase.result_reason) + + self.sw_update_obj.strategy_build_complete(True, '') + self.save() + return + + if not self.nfvi_upgrade.release_info or self.nfvi_upgrade.is_unavailable: + reason = "Software release does not exist or is unavailable." + + elif not self.nfvi_upgrade.is_deploying: + reason = ( + "Software release must be deploying for a rollback, " + + f"found={self.nfvi_upgrade.release_info}." + ) + + elif not self._single_controller: + reason = "Rollback only supported for AIO-SX currently" + + elif not self.nfvi_upgrade.major_release: + reason = "Rollback only supported for major releases currently" + + if reason: + DLOG.warn(reason) + self._state = strategy.STRATEGY_STATE.BUILD_FAILED + self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED + self.build_phase.result_reason = reason + self.sw_update_obj.strategy_build_complete( + False, self.build_phase.result_reason) + self.save() + return + + # Unlike with normal deployments we will defer skip logic to the steps + self._add_rollback_start_stage() + self._add_rollback_hosts_stages() + self._add_rollback_complete_stage() def build_complete(self, result, result_reason): """ diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py index 07c9c0f2..ea7c2e0b 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py @@ -28,10 +28,12 @@ class StrategyStageNames(Constants): # upgrade stages SW_UPGRADE_QUERY = Constant('sw-upgrade-query') SW_UPGRADE_START = Constant('sw-upgrade-start') + SW_UPGRADE_ROLLBACK_START = Constant('sw-upgrade-rollback-start') SW_UPGRADE_CONTROLLERS = Constant('sw-upgrade-controllers') SW_UPGRADE_STORAGE_HOSTS = Constant('sw-upgrade-storage-hosts') SW_UPGRADE_WORKER_HOSTS = Constant('sw-upgrade-worker-hosts') SW_UPGRADE_COMPLETE = Constant('sw-upgrade-complete') + SW_UPGRADE_ROLLBACK_COMPLETE = Constant('sw-upgrade-rollback-complete') # firmware update stages FW_UPDATE_QUERY = Constant('fw-update-query') FW_UPDATE_HOSTS_QUERY = Constant('fw-update-hosts-query') diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index 213e6a33..515b8b38 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -41,6 +41,8 @@ class StrategyStepNames(Constants): SW_DEPLOY_PRECHECK = Constant('sw-deploy-precheck') START_UPGRADE = Constant('start-upgrade') ACTIVATE_UPGRADE = Constant('activate-upgrade') + SW_DEPLOY_ABORT = Constant('sw-deploy-abort') + SW_DEPLOY_ACTIVATE_ROLLBACK = Constant('sw-deploy-activate-rollback') COMPLETE_UPGRADE = Constant('complete-upgrade') SWACT_HOSTS = Constant('swact-hosts') SW_PATCH_HOSTS = Constant('sw-patch-hosts') @@ -1096,7 +1098,7 @@ class UpgradeHostsStep(strategy.StrategyStep): DLOG.info("Step (%s) apply for hosts %s." % (self._name, self._host_names)) host_director = directors.get_host_director() - operation = host_director.upgrade_hosts(self._host_names) + operation = host_director.upgrade_hosts(self._host_names, self.strategy._rollback) if operation.is_inprogress(): return strategy.STRATEGY_STEP_RESULT.WAIT, "" elif operation.is_failed(): @@ -1516,6 +1518,212 @@ class UpgradeCompleteStep(strategy.StrategyStep): return data +class SwDeployAbortStep(strategy.StrategyStep): + """ + Software Deploy Abort - Strategy Step + """ + def __init__(self): + super(SwDeployAbortStep, self).__init__( + STRATEGY_STEP_NAME.SW_DEPLOY_ABORT, timeout_in_secs=60) + + @coroutine + def _sw_deploy_abort_callback(self): + """ + Handle Activate Upgrade Callback + """ + + response = (yield) + DLOG.debug("Handle SW-Deploy-Abort callback: response=%s." % response) + + if response['completed']: + DLOG.debug("sw-deploy abort completed") + self.strategy.nfvi_upgrade = response['result-data'] + result = strategy.STRATEGY_STEP_RESULT.SUCCESS + self.stage.step_complete(result, '') + else: + reason = response.get("error-message", + "Unknown error while trying software deploy abort, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + result = strategy.STRATEGY_STEP_RESULT.FAILED + detailed_reason = str(response) + self.phase.result_complete_response(detailed_reason) + self.stage.step_complete(result, reason) + + def apply(self): + """ + Upgrade Activate + """ + from nfv_vim import nfvi + + DLOG.info("Step (%s) apply." % self._name) + + if self.strategy.nfvi_upgrade.is_rollback: + reason = "Rollback already in progress, skipping abort call" + result = strategy.STRATEGY_STEP_RESULT.SUCCESS + DLOG.info(reason) + return result, reason + + nfvi.nfvi_sw_deploy_abort(self._sw_deploy_abort_callback()) + return strategy.STRATEGY_STEP_RESULT.WAIT, "" + + def from_dict(self, data): + """ + Returns the upgrade activate step object initialized using the given + dictionary + """ + super(SwDeployAbortStep, self).from_dict(data) + return self + + def as_dict(self): + """ + Represent the upgrade activate step as a dictionary + """ + data = super(SwDeployAbortStep, self).as_dict() + data['entity_type'] = '' + data['entity_names'] = list() + data['entity_uuids'] = list() + return data + + +class SwDeployActivateRollbackStep(strategy.StrategyStep): + """ + Software Deploy Activate-Rollback - Strategy Step + """ + def __init__(self): + super(SwDeployActivateRollbackStep, self).__init__( + STRATEGY_STEP_NAME.SW_DEPLOY_ACTIVATE_ROLLBACK, timeout_in_secs=1830) + + self._query_inprogress = False + + @coroutine + def _activate_rollback_callback(self): + """ + Activate-Rollback Callback + """ + + response = (yield) + DLOG.debug("Activate-Rollback callback response=%s." % response) + + if not response['completed']: + reason = response.get("error-message", + "Unknown error while trying software deploy activate-rollback, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + result = strategy.STRATEGY_STEP_RESULT.FAILED + detailed_reason = str(response) + self.phase.result_complete_response(detailed_reason) + self.stage.step_complete(result, reason) + + @coroutine + def _handle_activate_rollback_callback(self): + """ + Handle Activate Upgrade Callback + """ + + response = (yield) + DLOG.debug("Handle Activate-Rollback callback response=%s." % response) + + self._query_inprogress = False + + if not response['completed']: + # Something went wrong while collecting state info + return + + self.strategy.nfvi_upgrade = response['result-data'] + + if self.strategy.nfvi_upgrade.is_activate_rollback_done: + DLOG.debug("Handle Activate-Rollback callback, deploy activate is done") + reason = "" + result = strategy.STRATEGY_STEP_RESULT.SUCCESS + self.stage.step_complete(result, reason) + + elif self.strategy.nfvi_upgrade.is_activate_rollback_failed: + reason = ( + "Failed software deploy activate-rollback, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + result = strategy.STRATEGY_STEP_RESULT.FAILED + detailed_reason = str(response) + self.phase.result_complete_response(detailed_reason) + self.stage.step_complete(result, reason) + + elif not self.strategy.nfvi_upgrade.is_activate_rollback: + reason = ( + "Unknown error while doing software deploy activate-rollback, " + "check /var/log/nfv-vim.log or /var/log/software.log for more information." + ) + result = strategy.STRATEGY_STEP_RESULT.FAILED + detailed_reason = str(response) + self.phase.result_complete_response(detailed_reason) + self.stage.step_complete(result, reason) + + else: + DLOG.debug("Handle Activate-Rollback callback, deploy activate in progress...") + + def apply(self): + """ + Upgrade Activate + """ + from nfv_vim import nfvi + + DLOG.info("Step (%s) apply." % self._name) + + result = strategy.STRATEGY_STEP_RESULT.WAIT + reason = "" + + if self.strategy.nfvi_upgrade.is_activate_rollback: + DLOG.info("Deployment already activating, skipping activate call") + elif self.strategy.nfvi_upgrade.is_activate_rollback_done: + DLOG.info("Deployment already activated, skipping activate call") + result = strategy.STRATEGY_STEP_RESULT.SUCCESS + elif ( + self.strategy.nfvi_upgrade.is_activate_rollback_pending or + self.strategy.nfvi_upgrade.is_activate_rollback_failed + ): + nfvi.nfvi_sw_deploy_activate_rollback(self._activate_rollback_callback()) + else: + DLOG.info("software deploy activate-rollback not required, skipping call") + result = strategy.STRATEGY_STEP_RESULT.SUCCESS + + return result, reason + + def handle_event(self, event, event_data=None): + """ + Handle Host events + """ + from nfv_vim import nfvi + + DLOG.debug("Step (%s) handle event (%s)." % (self._name, event)) + + if event == STRATEGY_EVENT.HOST_AUDIT: + if not self._query_inprogress: + self._query_inprogress = True + nfvi.nfvi_get_upgrade(None, self._handle_activate_rollback_callback()) + return True + + return False + + def from_dict(self, data): + """ + Returns the activate-rollback step object initialized using the given + dictionary + """ + super(SwDeployActivateRollbackStep, self).from_dict(data) + self._query_inprogress = False + return self + + def as_dict(self): + """ + Represent the activate-rollback step as a dictionary + """ + data = super(SwDeployActivateRollbackStep, self).as_dict() + data['entity_type'] = '' + data['entity_names'] = list() + data['entity_uuids'] = list() + return data + + class MigrateInstancesFromHostStep(strategy.StrategyStep): """ Migrate Instances From Host - Strategy Step @@ -5302,6 +5510,12 @@ def strategy_step_rebuild_from_dict(data): elif STRATEGY_STEP_NAME.COMPLETE_UPGRADE == data['name']: step_obj = object.__new__(UpgradeCompleteStep) + elif STRATEGY_STEP_NAME.SW_DEPLOY_ABORT == data['name']: + step_obj = object.__new__(SwDeployAbortStep) + + elif STRATEGY_STEP_NAME.SW_DEPLOY_ACTIVATE_ROLLBACK == data['name']: + step_obj = object.__new__(SwDeployActivateRollbackStep) + elif STRATEGY_STEP_NAME.SW_PATCH_HOSTS == data['name']: step_obj = object.__new__(SwPatchHostsStep)