Merge "Rework sw-deploy-host logic and error messaging"
This commit is contained in:
commit
719621fe6e
@ -2367,8 +2367,9 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying to query deployment info, error={e}"
|
||||
|
||||
response["error-message"] = error_msg
|
||||
DLOG.exception(error_msg)
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Caught exception while trying to query deployment info, error={e}"
|
||||
@ -2429,12 +2430,15 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
"Unknown error while trying software deploy precheck, "
|
||||
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
|
||||
)
|
||||
else:
|
||||
error_msg = f"Software deploy precheck was rejected: {error_msg}"
|
||||
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying software deploy precheck, error={e}"
|
||||
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Caught exception while trying software deploy precheck, error={e}"
|
||||
@ -2504,12 +2508,15 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
"Unknown error while trying software deploy start, "
|
||||
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
|
||||
)
|
||||
else:
|
||||
error_msg = f"Software deploy start was rejected: {error_msg}"
|
||||
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying software deploy start, error={e}"
|
||||
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Caught exception while trying software deploy start, error={e}"
|
||||
@ -2581,12 +2588,15 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
"Unknown error while trying software deploy activate, "
|
||||
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
|
||||
)
|
||||
else:
|
||||
error_msg = f"Software deploy activate was rejected: {error_msg}"
|
||||
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying software deploy activate, error={e}"
|
||||
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Caught exception while trying software deploy activate, error={e}"
|
||||
@ -2657,12 +2667,15 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
"Unknown error while trying software deploy complete, "
|
||||
"check /var/log/nfv-vim.log or /var/log/software.log for more information."
|
||||
)
|
||||
else:
|
||||
error_msg = f"Software deploy complete was rejected: {error_msg}"
|
||||
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying software deploy complete, error={e}"
|
||||
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Caught exception while trying software deploy complete, error={e}"
|
||||
@ -3841,22 +3854,23 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
|
||||
except exceptions.OpenStackRestAPIException as e:
|
||||
x = json.loads(e.http_response_body)
|
||||
msg = x.get("error", x.get("info"))
|
||||
response["error-message"] = msg.strip()
|
||||
error_msg = x.get("error", x.get("info"))
|
||||
if httplib.UNAUTHORIZED == e.http_status_code:
|
||||
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
|
||||
if self._platform_token is not None:
|
||||
self._platform_token.set_expired()
|
||||
|
||||
else:
|
||||
DLOG.exception("Caught exception while trying to upgrade "
|
||||
"a host %s, error=%s." % (host_name, e))
|
||||
response['reason'] = e.http_response_reason
|
||||
response["error-message"] = msg.strip()
|
||||
elif not error_msg:
|
||||
error_msg = f"Caught exception while trying software deploy host {host_name}, error={e}"
|
||||
|
||||
if error_msg:
|
||||
response["error-message"] = error_msg.strip()
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
DLOG.exception("Caught exception while trying to upgrade a "
|
||||
"host %s, error=%s." % (host_name, e))
|
||||
error_msg = f"Caught exception while trying software deploy host {host_name}, error={e}"
|
||||
response["error-message"] = error_msg
|
||||
DLOG.exception(error_msg)
|
||||
|
||||
finally:
|
||||
callback.send(response)
|
||||
|
@ -162,29 +162,45 @@ def sw_deploy_get_upgrade_obj(token, release):
|
||||
release_data = sw_deploy_get_releases(token).result_data
|
||||
deploy_data = sw_deploy_show(token).result_data
|
||||
hosts_info_data = sw_deploy_host_list(token).result_data
|
||||
error_template = "{}, check /var/log/nfv-vim.log or /var/log/software.log for more information."
|
||||
|
||||
# Parse responses
|
||||
for rel in release_data:
|
||||
if release and rel['release_id'] == release:
|
||||
release_info = rel
|
||||
break
|
||||
elif not release and rel['state'] == usm_states.DEPLOYING:
|
||||
release = rel['release_id']
|
||||
release_info = rel
|
||||
break
|
||||
try:
|
||||
for rel in release_data:
|
||||
if release and rel['release_id'] == release:
|
||||
release_info = rel
|
||||
break
|
||||
elif not release and rel['state'] == usm_states.DEPLOYING:
|
||||
release = rel['release_id']
|
||||
release_info = rel
|
||||
break
|
||||
except Exception as e:
|
||||
error = "Failed to parse 'software list'"
|
||||
DLOG.exception(f"{error}: {release_data}")
|
||||
raise ValueError(error_template.format(error)) from e
|
||||
|
||||
if not release_info:
|
||||
if release:
|
||||
error_msg = f"Software release not found: {release}"
|
||||
error = f"Software release not found: {release}"
|
||||
else:
|
||||
error_msg = "Software release not found"
|
||||
raise EnvironmentError(error_msg)
|
||||
error = "Software release not found"
|
||||
raise EnvironmentError(error)
|
||||
|
||||
if deploy_data:
|
||||
deploy_info = deploy_data[0]
|
||||
try:
|
||||
if deploy_data:
|
||||
deploy_info = deploy_data[0]
|
||||
except Exception as e:
|
||||
error = "Failed to parse 'software deploy show'"
|
||||
DLOG.exception(f"{error}: {deploy_data}")
|
||||
raise ValueError(error_template.format(error)) from e
|
||||
|
||||
if hosts_info_data:
|
||||
hosts_info = hosts_info_data
|
||||
try:
|
||||
if hosts_info_data:
|
||||
hosts_info = hosts_info_data
|
||||
except Exception as e:
|
||||
error = "Failed to parse 'software deploy host-list'"
|
||||
DLOG.exception(f"{error}: {hosts_info_data}")
|
||||
raise ValueError(error_template.format(error)) from e
|
||||
|
||||
upgrade_obj = nfvi.objects.v1.Upgrade(
|
||||
release,
|
||||
|
@ -26,6 +26,8 @@ PATCH_RELEASE_UPGRADE = "3.2.2"
|
||||
MINOR_RELEASE_UPGRADE = "4.0.1"
|
||||
MAJOR_RELEASE_UPGRADE = "4.0.1"
|
||||
|
||||
DEPLOY_START_DELAY = 120
|
||||
|
||||
|
||||
# utility method for the formatting of unlock-hosts stage as dict
|
||||
# workers default to 5 retries with 120 second delay between attempts
|
||||
@ -278,7 +280,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
}
|
||||
]
|
||||
@ -326,7 +328,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
}
|
||||
]
|
||||
@ -373,7 +375,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
}
|
||||
]
|
||||
@ -423,7 +425,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
}
|
||||
]
|
||||
@ -1224,7 +1226,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1291,7 +1293,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1362,7 +1364,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1438,7 +1440,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1526,7 +1528,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1624,7 +1626,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
@ -1746,7 +1748,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'swact-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'start-upgrade', 'release': release},
|
||||
{'name': 'system-stabilize', 'timeout': 15},
|
||||
{'name': 'system-stabilize', 'timeout': DEPLOY_START_DELAY},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -36,6 +36,8 @@ IGNORE_ALARMS_LIST = [
|
||||
"900.231",
|
||||
]
|
||||
|
||||
DEPLOY_START_DELAY = 120
|
||||
|
||||
|
||||
# TODO(jkraitbe): Update this when retry count is decicded.
|
||||
# utility method for the formatting of unlock-hosts stage as dict
|
||||
@ -1432,7 +1434,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': strategy.nfvi_upgrade.release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-upgrade-controllers',
|
||||
@ -1602,7 +1604,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
||||
{'name': 'start-upgrade',
|
||||
'release': strategy.nfvi_upgrade.release},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
'timeout': DEPLOY_START_DELAY},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-upgrade-controllers',
|
||||
|
@ -273,27 +273,37 @@ class HostDirector(object):
|
||||
|
||||
response = (yield)
|
||||
DLOG.verbose("NFVI Upgrade Host callback response=%s." % response)
|
||||
|
||||
host_table = tables.tables_get_host_table()
|
||||
host = host_table.get(response['host_name'], None)
|
||||
if host is None:
|
||||
DLOG.verbose("Host %s does not exist." % response['host_name'])
|
||||
return
|
||||
|
||||
if self._host_operation is None:
|
||||
DLOG.verbose("No host %s operation in progress." % host.name)
|
||||
return
|
||||
|
||||
if OPERATION_TYPE.UPGRADE_HOSTS != self._host_operation.operation_type:
|
||||
DLOG.verbose("Unexpected host %s operation %s, ignoring."
|
||||
% (host.name, self._host_operation.operation_type))
|
||||
return
|
||||
|
||||
if not response['completed']:
|
||||
DLOG.info("Upgrade of host %s failed, reason=%s."
|
||||
% (response['host_name'], response.get('error-message', response['reason'])))
|
||||
|
||||
result = {"host": host, "error-message": response["error-message"]}
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
sw_mgmt_director.host_upgrade_failed(result)
|
||||
|
||||
else:
|
||||
DLOG.info("Upgrade of host %s succeeded, reason=%s."
|
||||
% (response['host_name'], response['reason']))
|
||||
|
||||
host_table = tables.tables_get_host_table()
|
||||
host = host_table.get(response['host_name'], None)
|
||||
if host is None:
|
||||
DLOG.verbose("Host %s does not exist." % response['host_name'])
|
||||
return
|
||||
|
||||
if self._host_operation is None:
|
||||
DLOG.verbose("No host %s operation in progress." % host.name)
|
||||
return
|
||||
|
||||
if OPERATION_TYPE.UPGRADE_HOSTS != self._host_operation.operation_type:
|
||||
DLOG.verbose("Unexpected host %s operation %s, ignoring."
|
||||
% (host.name, self._host_operation.operation_type))
|
||||
return
|
||||
|
||||
result = {"host": host}
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
sw_mgmt_director.host_upgrade_failed(host)
|
||||
sw_mgmt_director.host_upgrade_changed(result)
|
||||
|
||||
def _nfvi_upgrade_host(self, host_uuid, host_name):
|
||||
"""
|
||||
|
@ -351,13 +351,21 @@ class SwMgmtDirector(object):
|
||||
self._sw_update.handle_event(
|
||||
strategy.STRATEGY_EVENT.HOST_SWACT_FAILED, host)
|
||||
|
||||
def host_upgrade_failed(self, host):
|
||||
def host_upgrade_failed(self, result):
|
||||
"""
|
||||
Called when an upgrade of a host failed
|
||||
"""
|
||||
if self._sw_update is not None:
|
||||
self._sw_update.handle_event(
|
||||
strategy.STRATEGY_EVENT.HOST_UPGRADE_FAILED, host)
|
||||
strategy.STRATEGY_EVENT.HOST_UPGRADE_FAILED, result)
|
||||
|
||||
def host_upgrade_changed(self, result):
|
||||
"""
|
||||
Called when an upgrade of a host succeeded
|
||||
"""
|
||||
if self._sw_update is not None:
|
||||
self._sw_update.handle_event(
|
||||
strategy.STRATEGY_EVENT.HOST_UPGRADE_CHANGED, result)
|
||||
|
||||
def host_fw_update_abort_failed(self, host):
|
||||
"""
|
||||
|
@ -141,6 +141,13 @@ class Upgrade(ObjectData):
|
||||
def is_deploy_completed(self):
|
||||
return self.deploy_state == usm_states.DEPLOY_STATES.COMPLETED.value
|
||||
|
||||
@property
|
||||
def host_states(self):
|
||||
return {
|
||||
v["hostname"]: v["host_state"]
|
||||
for v in self.hosts_info
|
||||
}
|
||||
|
||||
def is_host_deployed(self, hostname):
|
||||
if not self.hosts_info:
|
||||
return None
|
||||
|
@ -47,6 +47,8 @@ STRATEGY_NAME = StrategyNames()
|
||||
MTCE_DELAY = 15
|
||||
# a no-reboot patch can stabilize in 30 seconds
|
||||
NO_REBOOT_DELAY = 30
|
||||
# How long to wait after deploy-start-done
|
||||
DEPLOY_START_DONE_DELAY = 120
|
||||
|
||||
# constants used by the patching API for state and repo state
|
||||
PATCH_REPO_STATE_APPLIED = 'Applied'
|
||||
@ -1902,7 +1904,8 @@ class SwUpgradeStrategy(
|
||||
# sw-deploy start for major releases must be done on controller-0
|
||||
self._swact_fix(stage, HOST_NAME.CONTROLLER_1)
|
||||
stage.add_step(strategy.UpgradeStartStep(release=self._release))
|
||||
stage.add_step(strategy.SystemStabilizeStep(timeout_in_secs=MTCE_DELAY))
|
||||
# There can be alarms related to CPU/memory/disk usage after start
|
||||
stage.add_step(strategy.SystemStabilizeStep(DEPLOY_START_DONE_DELAY))
|
||||
self.apply_phase.add_stage(stage)
|
||||
|
||||
def _add_upgrade_hosts_stages(self):
|
||||
@ -1918,7 +1921,7 @@ class SwUpgradeStrategy(
|
||||
|
||||
for host in host_table.values():
|
||||
if self.nfvi_upgrade.is_host_deployed(host.name):
|
||||
DLOG.info("Skipping deploy-host for already deployed host: {host.name}")
|
||||
DLOG.info(f"Skipping deploy-host for already deployed host: {host.name}")
|
||||
continue
|
||||
|
||||
if HOST_PERSONALITY.CONTROLLER in host.personality:
|
||||
|
@ -18,6 +18,7 @@ class EventNames(object):
|
||||
HOST_UNLOCK_FAILED = Constant('host-unlock-failed')
|
||||
HOST_REBOOT_FAILED = Constant('host-reboot-failed')
|
||||
HOST_UPGRADE_FAILED = Constant('host-upgrade-failed')
|
||||
HOST_UPGRADE_CHANGED = Constant('host-upgrade-changed')
|
||||
HOST_FW_UPDATE_FAILED = Constant('host-fw-update-failed')
|
||||
HOST_FW_UPDATE_ABORT_FAILED = Constant('host-fw-update-abort-failed')
|
||||
HOST_SWACT_FAILED = Constant('host-swact-failed')
|
||||
|
@ -3,6 +3,7 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
import json
|
||||
import six
|
||||
|
||||
from nfv_common import debug
|
||||
@ -16,6 +17,7 @@ from nfv_vim import objects
|
||||
from nfv_vim.strategy._strategy_defs import FW_UPDATE_LABEL
|
||||
from nfv_vim.strategy._strategy_defs import STRATEGY_EVENT
|
||||
from nfv_vim import tables
|
||||
import software.states as usm_states
|
||||
|
||||
DLOG = debug.debug_get_logger('nfv_vim.strategy.step')
|
||||
|
||||
@ -24,11 +26,6 @@ KUBE_CERT_UPDATE_TRUSTBOTHCAS = "trust-both-cas"
|
||||
KUBE_CERT_UPDATE_TRUSTNEWCA = "trust-new-ca"
|
||||
KUBE_CERT_UPDATE_UPDATECERTS = "update-certs"
|
||||
|
||||
# sw-deploy strategy constants
|
||||
SW_DEPLOY_START = 'start-done'
|
||||
SW_HOST_DEPLOYED = 'deployed'
|
||||
SW_DEPLOY_ACTIVATE_DONE = 'activate-done'
|
||||
|
||||
|
||||
@six.add_metaclass(Singleton)
|
||||
class StrategyStepNames(Constants):
|
||||
@ -1008,38 +1005,84 @@ class UpgradeHostsStep(strategy.StrategyStep):
|
||||
for host in hosts:
|
||||
self._host_names.append(host.name)
|
||||
self._query_inprogress = False
|
||||
self._step_complete = False
|
||||
self._deployed_hosts = {}
|
||||
self._failed_hosts = {}
|
||||
self._unknown_hosts = 0
|
||||
|
||||
def _get_upgrade_callback_inner(self, response):
|
||||
"""
|
||||
Get Upgrade Callback
|
||||
"""
|
||||
|
||||
if not response['completed']:
|
||||
return False
|
||||
|
||||
self.strategy.nfvi_upgrade = response['result-data']
|
||||
hosts_states = self.strategy.nfvi_upgrade.host_states
|
||||
# This information is already in the response, but this adds an easy view.
|
||||
response['hosts-states'] = hosts_states
|
||||
|
||||
completed_hosts = self._deployed_hosts.keys() | self._failed_hosts.keys()
|
||||
missing_hosts = set(self._host_names) - completed_hosts
|
||||
|
||||
if len(missing_hosts) - self._unknown_hosts > 0:
|
||||
# TODO(jkraitbe): Allow reason to be updated during STRATEGY_STEP_RESULT.WAIT
|
||||
reason = f"Deploy hosts still in progress, waiting for: {missing_hosts}"
|
||||
DLOG.error(reason)
|
||||
return False
|
||||
|
||||
# Determine if any hosts failed and why
|
||||
failed_hosts = {}
|
||||
for v in self._host_names:
|
||||
if v in self._deployed_hosts:
|
||||
continue
|
||||
|
||||
fail_reason = None
|
||||
if v not in hosts_states:
|
||||
fail_reason = self._failed_hosts.get(v, "Missing host from software deploy host-list")
|
||||
elif usm_states.DEPLOY_HOST_STATES.PENDING.value in hosts_states[v]:
|
||||
fail_reason = self._failed_hosts.get(v, "Host was detected in pending state")
|
||||
elif usm_states.DEPLOY_HOST_STATES.FAILED.value in hosts_states[v]:
|
||||
fail_reason = self._failed_hosts.get(v, "Host was detected in failed state")
|
||||
elif usm_states.DEPLOY_HOST_STATES.DEPLOYING.value in hosts_states[v]:
|
||||
fail_reason = self._failed_hosts.get(v, "Host was still deploying when it was expected to be done")
|
||||
elif usm_states.DEPLOY_HOST_STATES.DEPLOYED.value not in hosts_states[v]:
|
||||
fail_reason = self._failed_hosts.get(v, f"Host was detected in invalid state: {hosts_states[v]}")
|
||||
|
||||
if fail_reason:
|
||||
failed_hosts[v] = fail_reason
|
||||
DLOG.error(f"{v}: {fail_reason}")
|
||||
|
||||
# # Wait for all hosts to be done transitioning before declaring pass/fail
|
||||
if failed_hosts:
|
||||
response['failed-hosts'] = failed_hosts
|
||||
reason = f"Deploy hosts failed for some hosts: {json.dumps(failed_hosts, indent=2)}"
|
||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
detailed_reason = str(response)
|
||||
self.phase.result_complete_response(detailed_reason)
|
||||
self.stage.step_complete(result, reason)
|
||||
return True
|
||||
|
||||
reason = "Deploy hosts succeeded for all hosts"
|
||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
DLOG.info(reason)
|
||||
self.stage.step_complete(result, reason)
|
||||
return True
|
||||
|
||||
@coroutine
|
||||
def _get_upgrade_callback(self):
|
||||
"""
|
||||
Get Upgrade Callback
|
||||
"""
|
||||
|
||||
response = (yield)
|
||||
DLOG.debug("Query-Upgrade callback response=%s." % response)
|
||||
self._query_inprogress = False
|
||||
if response['completed']:
|
||||
self.strategy.nfvi_upgrade = response['result-data']
|
||||
host_count = 0
|
||||
match_count = 0
|
||||
host_info_list = self.strategy.nfvi_upgrade['hosts_info']
|
||||
for host_name in self._host_names:
|
||||
for host in host_info_list:
|
||||
if (host_name == host['hostname']) and (host['host_state'] == SW_HOST_DEPLOYED):
|
||||
match_count += 1
|
||||
host_count += 1
|
||||
if match_count == len(self._host_names):
|
||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
DLOG.info("Upgrade Hosts completed")
|
||||
self.stage.step_complete(result, "")
|
||||
else:
|
||||
# keep waiting for Upgrade host state to change
|
||||
pass
|
||||
else:
|
||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
DLOG.info("Host Upgrade failed")
|
||||
detailed_reason = str(response)
|
||||
self.phase.result_complete_response(detailed_reason)
|
||||
self.stage.step_complete(result, response['reason'])
|
||||
|
||||
try:
|
||||
self._step_complete = self._get_upgrade_callback_inner(response)
|
||||
finally:
|
||||
self._query_inprogress = False
|
||||
|
||||
def apply(self):
|
||||
"""
|
||||
@ -1066,14 +1109,36 @@ class UpgradeHostsStep(strategy.StrategyStep):
|
||||
|
||||
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
|
||||
|
||||
update = False
|
||||
|
||||
if event == STRATEGY_EVENT.HOST_UPGRADE_FAILED:
|
||||
host = event_data
|
||||
if host is not None and host.name in self._host_names:
|
||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
self.stage.step_complete(result, "host upgrade failed")
|
||||
return True
|
||||
host = event_data["host"]
|
||||
if host and host.name in self._host_names:
|
||||
error = f"Failed software deploy host {host.name}: {event_data['error-message']}"
|
||||
self._failed_hosts[host.name] = error
|
||||
DLOG.error(error)
|
||||
else:
|
||||
DLOG.error(f"Unknown software deploy host failed: {event_data}")
|
||||
self._unknown_hosts += 1
|
||||
update = True
|
||||
|
||||
elif event == STRATEGY_EVENT.HOST_UPGRADE_CHANGED:
|
||||
host = event_data["host"]
|
||||
if host and host.name in self._host_names:
|
||||
self._deployed_hosts[host.name] = None
|
||||
DLOG.info(f"Completed software deploy host {host.name}")
|
||||
else:
|
||||
DLOG.error(f"Unknown software deploy host completed: {event_data}")
|
||||
self._unknown_hosts += 1
|
||||
update = True
|
||||
|
||||
elif event in [STRATEGY_EVENT.HOST_AUDIT]:
|
||||
update = True
|
||||
|
||||
if self._query_inprogress or self._step_complete:
|
||||
return True
|
||||
|
||||
if update:
|
||||
self._query_inprogress = True
|
||||
release = self.strategy.nfvi_upgrade['release']
|
||||
nfvi.nfvi_get_upgrade(release, self._get_upgrade_callback())
|
||||
@ -1090,6 +1155,10 @@ class UpgradeHostsStep(strategy.StrategyStep):
|
||||
self._host_uuids = list()
|
||||
self._host_names = data['entity_names']
|
||||
self._query_inprogress = False
|
||||
self._step_complete = False
|
||||
self._failed_hosts = data["failed_hosts"]
|
||||
self._deployed_hosts = data["deployed_hosts"]
|
||||
self._unknown_hosts = data["unknown_hosts"]
|
||||
return self
|
||||
|
||||
def as_dict(self):
|
||||
@ -1100,6 +1169,9 @@ class UpgradeHostsStep(strategy.StrategyStep):
|
||||
data['entity_type'] = 'hosts'
|
||||
data['entity_names'] = self._host_names
|
||||
data['entity_uuids'] = self._host_uuids
|
||||
data['failed_hosts'] = self._failed_hosts
|
||||
data['deployed_hosts'] = self._deployed_hosts
|
||||
data['unknown_hosts'] = self._unknown_hosts
|
||||
return data
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user