Implement sw-deploy-strategy rollback for AIO-SX

Adds sequence and initial sw-deploy abort code. Initial support is only
for AIO-SX.

Called like so:
sw-manager sw-deploy-strategy create --rollback

TEST PLAN
PASS: AIO-SX sw-deploy-strategy (major)
* Trigger from deploy-failed

Depends-On: https://review.opendev.org/c/starlingx/nfv/+/925899
Depends-On: https://review.opendev.org/c/starlingx/nfv/+/926587
Story: 2011045
Task: 50880
Change-Id: If051dac9847aaefb1542e96c1824a18c739e2ce7
Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com>
This commit is contained in:
Joshua Kraitberg 2024-07-19 16:02:50 -04:00
parent aa5f4303b3
commit 8add0e4c55
12 changed files with 1088 additions and 29 deletions

View File

@ -53,6 +53,7 @@ function _swmanager()
--max-parallel-worker-hosts
--instance-action
--alarm-restrictions
--rollback
"
local createopt=${prev}
case "$createopt" in
@ -80,6 +81,10 @@ function _swmanager()
COMPREPLY=($(compgen -W "strict relaxed permissive" -- ${cur}))
return 0
;;
--rollback)
COMPREPLY=($(compgen -W "${createopts}" -- ${cur}))
return 0
;;
*)
;;
esac

View File

@ -2686,6 +2686,166 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def sw_deploy_abort(self, future, callback):
"""
Abort a USM software deployement
"""
response = dict()
response['completed'] = False
response['reason'] = ''
response['complete-data'] = ''
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete.")
return
self._platform_token = future.result.data
future.work(usm.sw_deploy_abort, self._platform_token)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("USM software deploy abort did not complete.")
return
response['complete-data'] = future.result.data
future.work(usm.sw_deploy_get_upgrade_obj, self._platform_token, None)
future.result = (yield)
if not future.result.is_complete():
error_msg = (
"Could not obtain deployment information from USM, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
response['error-message'] = error_msg
return
response['result-data'] = future.result.data
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
x = json.loads(e.http_response_body)
error_msg = x.get("error", x.get("info"))
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
elif httplib.NOT_ACCEPTABLE == e.http_status_code:
if not error_msg:
error_msg = (
"Unknown error while trying software deploy abort, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
else:
error_msg = f"Software deploy abort was rejected: {error_msg}"
elif not error_msg:
error_msg = f"Caught exception while trying software deploy abort, error={e}"
if error_msg:
response["error-message"] = error_msg.strip()
DLOG.exception(error_msg)
except Exception as e:
error_msg = f"Caught exception while trying software deploy abort, error={e}"
response["error-message"] = error_msg
DLOG.exception(error_msg)
finally:
callback.send(response)
callback.close()
def sw_deploy_activate_rollback(self, future, callback):
"""
Activate rollback a USM software deployement
"""
response = dict()
response['completed'] = False
response['reason'] = ''
response['complete-data'] = ''
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete.")
return
self._platform_token = future.result.data
future.work(usm.sw_deploy_activate_rollback, self._platform_token)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("USM software deploy activate did not complete.")
return
response['complete-data'] = future.result.data
future.work(usm.sw_deploy_get_upgrade_obj, self._platform_token, None)
future.result = (yield)
if not future.result.is_complete():
error_msg = (
"Could not obtain deployment information from USM, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
response['error-message'] = error_msg
return
response['result-data'] = future.result.data
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
x = json.loads(e.http_response_body)
error_msg = x.get("error", x.get("info"))
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
elif httplib.NOT_ACCEPTABLE == e.http_status_code:
if not error_msg:
error_msg = (
"Unknown error while trying software deploy activate-rollback, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
else:
error_msg = f"Software deploy activate was rejected: {error_msg}"
elif not error_msg:
error_msg = f"Caught exception while trying software deploy activate-rollback, error={e}"
if error_msg:
response["error-message"] = error_msg.strip()
DLOG.exception(error_msg)
except Exception as e:
error_msg = f"Caught exception while trying software deploy activate-rollback, error={e}"
response["error-message"] = error_msg
DLOG.exception(error_msg)
finally:
callback.send(response)
callback.close()
def delete_host_services(self, future, host_uuid, host_name,
host_personality, callback):
"""
@ -3816,7 +3976,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def upgrade_host(self, future, host_uuid, host_name, callback):
def upgrade_host(self, future, host_uuid, host_name, rollback, callback):
"""
Upgrade a host
"""
@ -3826,6 +3986,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
response['host_name'] = host_name
response['reason'] = ''
response['complete-data'] = ''
kind = "deploy" if not rollback else "rollback"
method = usm.sw_deploy_execute if not rollback else usm.sw_deploy_rollback
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
@ -3843,10 +4005,10 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
self._platform_token = future.result.data
future.work(usm.sw_deploy_execute, self._platform_token, host_name)
future.work(method, self._platform_token, host_name)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("USM software deploy host %s did not complete." % host_name)
DLOG.error(f"USM software {kind} host %s did not complete." % host_name)
return
response['completed'] = True

View File

@ -67,7 +67,7 @@ def sw_deploy_get_releases(token):
Query USM for information about all releases
"""
uri = f"release" # noqa:F541 pylint: disable=W1309
uri = "release" # noqa:F541 pylint: disable=W1309
url = _usm_api_cmd(token, uri)
response = _api_get(token, url)
return response
@ -130,6 +130,17 @@ def sw_deploy_execute(token, host_name):
return response
def sw_deploy_rollback(token, host_name):
"""
Ask USM to rollback a deployment on a host
"""
uri = f"deploy_host/{host_name}/rollback"
url = _usm_api_cmd(token, uri)
response = _api_post(token, url, {})
return response
def sw_deploy_activate(token):
"""
Ask USM activate a deployment
@ -152,6 +163,28 @@ def sw_deploy_complete(token):
return response
def sw_deploy_abort(token):
"""
Ask USM abort a deployment
"""
uri = f"deploy/abort" # noqa:F541 pylint: disable=W1309
url = _usm_api_cmd(token, uri)
response = _api_post(token, url, {})
return response
def sw_deploy_activate_rollback(token):
"""
Ask USM activate rollback a deployment
"""
uri = f"deploy/activate-rollback" # noqa:F541 pylint: disable=W1309
url = _usm_api_cmd(token, uri)
response = _api_post(token, url, {})
return response
def sw_deploy_get_upgrade_obj(token, release):
"""Quickly gather all information about a software deployment"""

View File

@ -1826,3 +1826,441 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_complete(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- complete
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'completed',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_active_done(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- activate-deon
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'activate-done',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_activate_failed(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- activate-failed
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'activate-failed',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_host_done(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- host-done
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'host-done',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_host_failed(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- host-failed
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'host-failed',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_deploy_strategy_aiosx_rollback_from_host(self):
"""
Test the sw_deploy strategy apply phase:
- aio-sx
- major
- host
Verify:
- Pass
"""
release = '888.8'
_, strategy = self._gen_aiosx_hosts_and_strategy(
release=None,
rollback=True,
nfvi_upgrade=nfvi.objects.v1.Upgrade(
release,
{
'state': 'deploying',
'reboot_required': True,
'sw_version': MAJOR_RELEASE_UPGRADE,
},
{
'state': 'host',
'from_release': INITIAL_RELEASE,
'to_release': MAJOR_RELEASE_UPGRADE,
},
None,
)
)
fake_upgrade_obj = SwUpgrade()
strategy.sw_update_obj = fake_upgrade_obj
strategy.build_complete(common_strategy.STRATEGY_RESULT.SUCCESS, "")
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 3,
'stages': [
{
'name': 'sw-upgrade-rollback-start',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-deploy-abort'},
{'name': 'sw-deploy-activate-rollback'},
],
},
{
'name': 'sw-upgrade-worker-hosts',
'total_steps': 6,
'steps': [
{'name': 'query-alarms'},
{'name': 'lock-hosts', 'entity_names': ['controller-0']},
{'name': 'upgrade-hosts', 'entity_names': ['controller-0']},
{'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts'},
{'name': 'wait-alarms-clear', 'timeout': 2400},
]
},
{
'name': 'sw-upgrade-rollback-complete',
'total_steps': 1,
'steps': [
{'name': 'query-alarms'},
],
},
],
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)

View File

@ -305,11 +305,11 @@ class HostDirector(object):
sw_mgmt_director = directors.get_sw_mgmt_director()
sw_mgmt_director.host_upgrade_changed(result)
def _nfvi_upgrade_host(self, host_uuid, host_name):
def _nfvi_upgrade_host(self, host_uuid, host_name, rollback):
"""
NFVI Upgrade Host
"""
nfvi.nfvi_upgrade_host(host_uuid, host_name,
nfvi.nfvi_upgrade_host(host_uuid, host_name, rollback,
self._nfvi_upgrade_host_callback())
@coroutine
@ -680,7 +680,7 @@ class HostDirector(object):
return host_operation
def upgrade_hosts(self, host_names):
def upgrade_hosts(self, host_names, rollback):
"""
Upgrade a list of hosts
"""
@ -705,7 +705,7 @@ class HostDirector(object):
return host_operation
host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS)
self._nfvi_upgrade_host(host.uuid, host.name)
self._nfvi_upgrade_host(host.uuid, host.name, rollback=rollback)
if host_operation.is_inprogress():
self._host_operation = host_operation

View File

@ -151,6 +151,8 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_state_ch
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_update_callback # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_host_upgrade_callback # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_register_sw_update_get_callback # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_abort # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_activate_rollback # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_sw_deploy_precheck # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_swact_from_host # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_unlock_host # noqa: F401

View File

@ -439,6 +439,24 @@ def nfvi_upgrade_complete(release, callback):
return cmd_id
def nfvi_sw_deploy_abort(callback):
"""
Software deploy abort
"""
cmd_id = _infrastructure_plugin.invoke_plugin('sw_deploy_abort',
callback=callback)
return cmd_id
def nfvi_sw_deploy_activate_rollback(callback):
"""
Software deploy activate rollback
"""
cmd_id = _infrastructure_plugin.invoke_plugin('sw_deploy_activate_rollback',
callback=callback)
return cmd_id
def nfvi_disable_container_host_services(host_uuid, host_name,
host_personality, host_offline,
callback):
@ -578,12 +596,12 @@ def nfvi_reboot_host(host_uuid, host_name, callback):
return cmd_id
def nfvi_upgrade_host(host_uuid, host_name, callback):
def nfvi_upgrade_host(host_uuid, host_name, rollback, callback):
"""
Upgrade a host
"""
cmd_id = _infrastructure_plugin.invoke_plugin('upgrade_host', host_uuid,
host_name, callback=callback)
host_name, rollback, callback=callback)
return cmd_id

View File

@ -137,6 +137,38 @@ class Upgrade(ObjectData):
def is_activate_failed(self):
return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_FAILED.value
@property
def is_rollback(self):
return self.deploy_state and "rollback" in self.deploy_state
@property
def is_activate_rollback(self):
return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK.value
@property
def is_activate_rollback_pending(self):
return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING.value
@property
def is_activate_rollback_done(self):
return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE.value
@property
def is_activate_rollback_failed(self):
return self.deploy_state == usm_states.DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value
@property
def is_rollback_hosts(self):
return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK.value
@property
def is_rollback_hosts_done(self):
return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK_DONE.value
@property
def is_rollback_hosts_failed(self):
return self.deploy_state == usm_states.DEPLOY_STATES.HOST_ROLLBACK_FAILED.value
@property
def is_deploy_completed(self):
return self.deploy_state == usm_states.DEPLOY_STATES.COMPLETED.value
@ -155,3 +187,11 @@ class Upgrade(ObjectData):
for v in self.hosts_info:
if v["hostname"] == hostname:
return v["host_state"] == usm_states.DEPLOY_HOST_STATES.DEPLOYED.value
def is_host_pending(self, hostname):
if not self.hosts_info:
return None
for v in self.hosts_info:
if v["hostname"] == hostname:
return v["host_state"] == usm_states.DEPLOY_HOST_STATES.PENDING.value

View File

@ -58,6 +58,8 @@ from nfv_vim.strategy._strategy_steps import StartInstancesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import StopInstancesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import STRATEGY_STEP_NAME # noqa: F401
from nfv_vim.strategy._strategy_steps import SwactHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import SwDeployAbortStep # noqa: F401
from nfv_vim.strategy._strategy_steps import SwDeployActivateRollbackStep # noqa: F401
from nfv_vim.strategy._strategy_steps import SwDeployPrecheckStep # noqa: F401
from nfv_vim.strategy._strategy_steps import SwPatchHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import SystemConfigUpdateHostsStep # noqa: F401

View File

@ -1259,6 +1259,14 @@ class UpdateWorkerHostsMixin(object):
hosts_to_lock = list()
hosts_to_reboot = list()
if reboot:
if (
isinstance(self, SwUpgradeStrategy) and
any(HOST_PERSONALITY.CONTROLLER in v.personality for v in host_list)
):
# Always lock/unlock controllers during rollback/upgrade
hosts_to_lock = host_list
hosts_to_reboot = []
else:
hosts_to_lock = [x for x in host_list if not x.is_locked()]
hosts_to_reboot = [x for x in host_list if x.is_locked()]
@ -1828,14 +1836,12 @@ class SwUpgradeStrategy(
super(SwUpgradeStrategy, self).build()
def _build_rollback(self):
reason = "Rollback not supported yet."
DLOG.warn(reason)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
from nfv_vim import strategy
stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_QUERY)
stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.QueryUpgradeStep(release=None))
self.build_phase.add_stage(stage)
super(SwUpgradeStrategy, self).build()
def build(self):
@ -2109,8 +2115,139 @@ class SwUpgradeStrategy(
self.sw_update_obj.strategy_build_complete(True, '')
self.save()
def _add_rollback_start_stage(self):
"""
Add rollback start strategy stage
"""
from nfv_vim import strategy
stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_ROLLBACK_START)
stage.add_step(strategy.QueryAlarmsStep(fail_on_alarms=False, ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.SwDeployAbortStep())
stage.add_step(strategy.SwDeployActivateRollbackStep())
self.apply_phase.add_stage(stage)
def _add_rollback_hosts_stages(self):
"""
Add rollback hosts strategy stage
"""
from nfv_vim import strategy
from nfv_vim import tables
host_table = tables.tables_get_host_table()
reboot_required = self.nfvi_upgrade.reboot_required
controller_strategy = self._add_controller_strategy_stages
controllers_hosts = list()
storage_hosts = list()
worker_hosts = list()
for host in host_table.values():
if self.nfvi_upgrade.is_host_pending(host.name):
DLOG.info("Skipping host-rollback for pending host: {host.name}")
continue
if HOST_PERSONALITY.CONTROLLER in host.personality:
controllers_hosts.append(host)
if HOST_PERSONALITY.WORKER in host.personality:
# We need to use this strategy on AIO type
controller_strategy = self._add_worker_strategy_stages
elif HOST_PERSONALITY.STORAGE in host.personality:
storage_hosts.append(host)
elif HOST_PERSONALITY.WORKER in host.personality:
worker_hosts.append(host)
else:
DLOG.error(f"Unsupported personality for host {host.name}.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = \
'Unsupported personality for host'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
# Sort the controller such that host other than
# current local_host_name is the first element in the list.
# This sorting is to reduce the number of swact required since
# sw-deploy patch release orchestration can start on host that
# is currently active.
local_host_name = get_local_host_name()
controllers_hosts = sorted(
controllers_hosts,
key=lambda x: x.name == local_host_name,
)
strategy_pairs = [
(self._add_worker_strategy_stages, worker_hosts),
(self._add_storage_strategy_stages, storage_hosts),
(controller_strategy, controllers_hosts),
]
for stage_func, host_list in strategy_pairs:
if host_list:
success, reason = stage_func(host_list, reboot_required)
if not success:
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
def _add_rollback_complete_stage(self):
"""
Add rollback complete strategy stage
"""
from nfv_vim import strategy
stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_ROLLBACK_COMPLETE)
stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
self.apply_phase.add_stage(stage)
def _build_complete_rollback(self, result, result_reason):
reason = "Rollback not supported yet."
from nfv_vim import strategy
reason = ""
result, result_reason = \
super(SwUpgradeStrategy, self).build_complete(result, result_reason)
DLOG.info("Build Complete Callback, result=%s, reason=%s."
% (result, result_reason))
if result not in [strategy.STRATEGY_RESULT.SUCCESS, strategy.STRATEGY_RESULT.DEGRADED]:
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.sw_update_obj.strategy_build_complete(True, '')
self.save()
return
if not self.nfvi_upgrade.release_info or self.nfvi_upgrade.is_unavailable:
reason = "Software release does not exist or is unavailable."
elif not self.nfvi_upgrade.is_deploying:
reason = (
"Software release must be deploying for a rollback, " +
f"found={self.nfvi_upgrade.release_info}."
)
elif not self._single_controller:
reason = "Rollback only supported for AIO-SX currently"
elif not self.nfvi_upgrade.major_release:
reason = "Rollback only supported for major releases currently"
if reason:
DLOG.warn(reason)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
@ -2118,6 +2255,12 @@ class SwUpgradeStrategy(
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
# Unlike with normal deployments we will defer skip logic to the steps
self._add_rollback_start_stage()
self._add_rollback_hosts_stages()
self._add_rollback_complete_stage()
def build_complete(self, result, result_reason):
"""

View File

@ -28,10 +28,12 @@ class StrategyStageNames(Constants):
# upgrade stages
SW_UPGRADE_QUERY = Constant('sw-upgrade-query')
SW_UPGRADE_START = Constant('sw-upgrade-start')
SW_UPGRADE_ROLLBACK_START = Constant('sw-upgrade-rollback-start')
SW_UPGRADE_CONTROLLERS = Constant('sw-upgrade-controllers')
SW_UPGRADE_STORAGE_HOSTS = Constant('sw-upgrade-storage-hosts')
SW_UPGRADE_WORKER_HOSTS = Constant('sw-upgrade-worker-hosts')
SW_UPGRADE_COMPLETE = Constant('sw-upgrade-complete')
SW_UPGRADE_ROLLBACK_COMPLETE = Constant('sw-upgrade-rollback-complete')
# firmware update stages
FW_UPDATE_QUERY = Constant('fw-update-query')
FW_UPDATE_HOSTS_QUERY = Constant('fw-update-hosts-query')

View File

@ -41,6 +41,8 @@ class StrategyStepNames(Constants):
SW_DEPLOY_PRECHECK = Constant('sw-deploy-precheck')
START_UPGRADE = Constant('start-upgrade')
ACTIVATE_UPGRADE = Constant('activate-upgrade')
SW_DEPLOY_ABORT = Constant('sw-deploy-abort')
SW_DEPLOY_ACTIVATE_ROLLBACK = Constant('sw-deploy-activate-rollback')
COMPLETE_UPGRADE = Constant('complete-upgrade')
SWACT_HOSTS = Constant('swact-hosts')
SW_PATCH_HOSTS = Constant('sw-patch-hosts')
@ -1096,7 +1098,7 @@ class UpgradeHostsStep(strategy.StrategyStep):
DLOG.info("Step (%s) apply for hosts %s." % (self._name,
self._host_names))
host_director = directors.get_host_director()
operation = host_director.upgrade_hosts(self._host_names)
operation = host_director.upgrade_hosts(self._host_names, self.strategy._rollback)
if operation.is_inprogress():
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
elif operation.is_failed():
@ -1516,6 +1518,212 @@ class UpgradeCompleteStep(strategy.StrategyStep):
return data
class SwDeployAbortStep(strategy.StrategyStep):
"""
Software Deploy Abort - Strategy Step
"""
def __init__(self):
super(SwDeployAbortStep, self).__init__(
STRATEGY_STEP_NAME.SW_DEPLOY_ABORT, timeout_in_secs=60)
@coroutine
def _sw_deploy_abort_callback(self):
"""
Handle Activate Upgrade Callback
"""
response = (yield)
DLOG.debug("Handle SW-Deploy-Abort callback: response=%s." % response)
if response['completed']:
DLOG.debug("sw-deploy abort completed")
self.strategy.nfvi_upgrade = response['result-data']
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
self.stage.step_complete(result, '')
else:
reason = response.get("error-message",
"Unknown error while trying software deploy abort, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
result = strategy.STRATEGY_STEP_RESULT.FAILED
detailed_reason = str(response)
self.phase.result_complete_response(detailed_reason)
self.stage.step_complete(result, reason)
def apply(self):
"""
Upgrade Activate
"""
from nfv_vim import nfvi
DLOG.info("Step (%s) apply." % self._name)
if self.strategy.nfvi_upgrade.is_rollback:
reason = "Rollback already in progress, skipping abort call"
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
DLOG.info(reason)
return result, reason
nfvi.nfvi_sw_deploy_abort(self._sw_deploy_abort_callback())
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
def from_dict(self, data):
"""
Returns the upgrade activate step object initialized using the given
dictionary
"""
super(SwDeployAbortStep, self).from_dict(data)
return self
def as_dict(self):
"""
Represent the upgrade activate step as a dictionary
"""
data = super(SwDeployAbortStep, self).as_dict()
data['entity_type'] = ''
data['entity_names'] = list()
data['entity_uuids'] = list()
return data
class SwDeployActivateRollbackStep(strategy.StrategyStep):
"""
Software Deploy Activate-Rollback - Strategy Step
"""
def __init__(self):
super(SwDeployActivateRollbackStep, self).__init__(
STRATEGY_STEP_NAME.SW_DEPLOY_ACTIVATE_ROLLBACK, timeout_in_secs=1830)
self._query_inprogress = False
@coroutine
def _activate_rollback_callback(self):
"""
Activate-Rollback Callback
"""
response = (yield)
DLOG.debug("Activate-Rollback callback response=%s." % response)
if not response['completed']:
reason = response.get("error-message",
"Unknown error while trying software deploy activate-rollback, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
result = strategy.STRATEGY_STEP_RESULT.FAILED
detailed_reason = str(response)
self.phase.result_complete_response(detailed_reason)
self.stage.step_complete(result, reason)
@coroutine
def _handle_activate_rollback_callback(self):
"""
Handle Activate Upgrade Callback
"""
response = (yield)
DLOG.debug("Handle Activate-Rollback callback response=%s." % response)
self._query_inprogress = False
if not response['completed']:
# Something went wrong while collecting state info
return
self.strategy.nfvi_upgrade = response['result-data']
if self.strategy.nfvi_upgrade.is_activate_rollback_done:
DLOG.debug("Handle Activate-Rollback callback, deploy activate is done")
reason = ""
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
self.stage.step_complete(result, reason)
elif self.strategy.nfvi_upgrade.is_activate_rollback_failed:
reason = (
"Failed software deploy activate-rollback, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
result = strategy.STRATEGY_STEP_RESULT.FAILED
detailed_reason = str(response)
self.phase.result_complete_response(detailed_reason)
self.stage.step_complete(result, reason)
elif not self.strategy.nfvi_upgrade.is_activate_rollback:
reason = (
"Unknown error while doing software deploy activate-rollback, "
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
)
result = strategy.STRATEGY_STEP_RESULT.FAILED
detailed_reason = str(response)
self.phase.result_complete_response(detailed_reason)
self.stage.step_complete(result, reason)
else:
DLOG.debug("Handle Activate-Rollback callback, deploy activate in progress...")
def apply(self):
"""
Upgrade Activate
"""
from nfv_vim import nfvi
DLOG.info("Step (%s) apply." % self._name)
result = strategy.STRATEGY_STEP_RESULT.WAIT
reason = ""
if self.strategy.nfvi_upgrade.is_activate_rollback:
DLOG.info("Deployment already activating, skipping activate call")
elif self.strategy.nfvi_upgrade.is_activate_rollback_done:
DLOG.info("Deployment already activated, skipping activate call")
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
elif (
self.strategy.nfvi_upgrade.is_activate_rollback_pending or
self.strategy.nfvi_upgrade.is_activate_rollback_failed
):
nfvi.nfvi_sw_deploy_activate_rollback(self._activate_rollback_callback())
else:
DLOG.info("software deploy activate-rollback not required, skipping call")
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
return result, reason
def handle_event(self, event, event_data=None):
"""
Handle Host events
"""
from nfv_vim import nfvi
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
if event == STRATEGY_EVENT.HOST_AUDIT:
if not self._query_inprogress:
self._query_inprogress = True
nfvi.nfvi_get_upgrade(None, self._handle_activate_rollback_callback())
return True
return False
def from_dict(self, data):
"""
Returns the activate-rollback step object initialized using the given
dictionary
"""
super(SwDeployActivateRollbackStep, self).from_dict(data)
self._query_inprogress = False
return self
def as_dict(self):
"""
Represent the activate-rollback step as a dictionary
"""
data = super(SwDeployActivateRollbackStep, self).as_dict()
data['entity_type'] = ''
data['entity_names'] = list()
data['entity_uuids'] = list()
return data
class MigrateInstancesFromHostStep(strategy.StrategyStep):
"""
Migrate Instances From Host - Strategy Step
@ -5302,6 +5510,12 @@ def strategy_step_rebuild_from_dict(data):
elif STRATEGY_STEP_NAME.COMPLETE_UPGRADE == data['name']:
step_obj = object.__new__(UpgradeCompleteStep)
elif STRATEGY_STEP_NAME.SW_DEPLOY_ABORT == data['name']:
step_obj = object.__new__(SwDeployAbortStep)
elif STRATEGY_STEP_NAME.SW_DEPLOY_ACTIVATE_ROLLBACK == data['name']:
step_obj = object.__new__(SwDeployActivateRollbackStep)
elif STRATEGY_STEP_NAME.SW_PATCH_HOSTS == data['name']:
step_obj = object.__new__(SwPatchHostsStep)