Improved sw-manager logs and output
Made several changes: * Added host names to `sw-manager *-strategy show` output * Improved logging for alarms * Improved how alarms are displayed in `sw-manager *-strategy show` * Fixed a silent bug that caused duplicate alarms in DB * Fixed a bug in ``sw-manager *-strategy show` that showed incorrect step on timeout * Fixed a bug that caused misalignment in logs when PID became large * Added unified phase_reason on apply success TEST PLAN PASS: AIO-SX minor upgrade RR PASS: AIO-SX minor upgrade NRR PASS: AIO-DX minor upgrade RR Story: 2011045 Bug: 51566 Change-Id: Ic9c989c191892f16891c3ca5303d9780a1afdb2c Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com>
This commit is contained in:
parent
a8d6da428e
commit
2ef5f9290d
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2016-2024 Wind River Systems, Inc.
|
# Copyright (c) 2016-2025 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -104,7 +104,10 @@ def _get_current_stage_and_step(strategy):
|
|||||||
if stage.inprogress or stage == current_phase.stages[-1]:
|
if stage.inprogress or stage == current_phase.stages[-1]:
|
||||||
for step in current_stage.steps:
|
for step in current_stage.steps:
|
||||||
current_step = step
|
current_step = step
|
||||||
if step.result not in ['initial', 'success'] or step == current_stage.steps[-1]:
|
if (
|
||||||
|
step.result not in ['initial', 'timed-out', 'success'] or
|
||||||
|
step == current_stage.steps[-1]
|
||||||
|
):
|
||||||
break
|
break
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -205,6 +208,8 @@ def _display_strategy(strategy, details=False, active=False, error_details=False
|
|||||||
_print(2, "current-stage", current_stage.stage_name)
|
_print(2, "current-stage", current_stage.stage_name)
|
||||||
if current_step:
|
if current_step:
|
||||||
_print(2, "current-step", current_step.step_name)
|
_print(2, "current-step", current_step.step_name)
|
||||||
|
if current_step.entity_type == "hosts" and current_step.entity_names:
|
||||||
|
_print(2, "entity-names", current_step.entity_names)
|
||||||
_print(2, "current-phase-completion",
|
_print(2, "current-phase-completion",
|
||||||
("%s%%" % strategy.current_phase_completion_percentage))
|
("%s%%" % strategy.current_phase_completion_percentage))
|
||||||
_print(2, "state", strategy.state)
|
_print(2, "state", strategy.state)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
# Copyright (c) 2015-2016,2025 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -8,6 +8,7 @@ import functools
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
import six
|
import six
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -40,10 +41,17 @@ class DebugLogHandler(logging.Handler):
|
|||||||
self.process_name = None
|
self.process_name = None
|
||||||
self.thread_name = None
|
self.thread_name = None
|
||||||
|
|
||||||
|
pid_size = 7
|
||||||
|
try:
|
||||||
|
max_pid = Path("/proc/sys/kernel/pid_max").read_text()
|
||||||
|
pid_size = len(str(int(max_pid)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# To keep syslog-ng happy, we need to add the who field twice. Newer
|
# To keep syslog-ng happy, we need to add the who field twice. Newer
|
||||||
# syslog-ng removes the header formatting
|
# syslog-ng removes the header formatting
|
||||||
fmt = ("%(asctime)s %(who)36s[%(process)d]: %(who)36s[%(process)d] "
|
fmt = (f"%(asctime)s %(who)36s[%(process)d]: %(who)36s[%(process){pid_size}d] "
|
||||||
"%(levelname)8s %(message)s")
|
"%(levelname)8s %(message)s")
|
||||||
formatter = DebugLogFormatter(fmt)
|
formatter = DebugLogFormatter(fmt)
|
||||||
self.setFormatter(formatter)
|
self.setFormatter(formatter)
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -4263,6 +4263,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
|||||||
alarm_data['reason_text'], alarm_data['timestamp'],
|
alarm_data['reason_text'], alarm_data['timestamp'],
|
||||||
alarm_data['mgmt_affecting'])
|
alarm_data['mgmt_affecting'])
|
||||||
alarms.append(alarm)
|
alarms.append(alarm)
|
||||||
|
alarms.sort(key=lambda x: x.alarm_id)
|
||||||
|
|
||||||
response['result-data'] = alarms
|
response['result-data'] = alarms
|
||||||
response['completed'] = True
|
response['completed'] = True
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -2356,6 +2356,20 @@ class SwUpgradeStrategy(
|
|||||||
|
|
||||||
return self._build_complete_normal(result, result_reason)
|
return self._build_complete_normal(result, result_reason)
|
||||||
|
|
||||||
|
def apply_complete(self, result, result_reason):
|
||||||
|
"""
|
||||||
|
Strategy Apply Complete
|
||||||
|
"""
|
||||||
|
|
||||||
|
# On success we change the reason
|
||||||
|
if result == strategy.STRATEGY_RESULT.SUCCESS:
|
||||||
|
if self._rollback:
|
||||||
|
result_reason = f"Rollback to release={self._release} was successful"
|
||||||
|
else:
|
||||||
|
result_reason = f"Upgrade to release={self._release} was successful"
|
||||||
|
|
||||||
|
super(SwUpgradeStrategy, self).apply_complete(result, result_reason)
|
||||||
|
|
||||||
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
|
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
|
||||||
"""
|
"""
|
||||||
Initializes a software upgrade strategy object using the given dictionary
|
Initializes a software upgrade strategy object using the given dictionary
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -2531,35 +2531,39 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
|||||||
if self.strategy is not None:
|
if self.strategy is not None:
|
||||||
nfvi_alarms = self.strategy.nfvi_alarms
|
nfvi_alarms = self.strategy.nfvi_alarms
|
||||||
for nfvi_alarm in response['result-data']:
|
for nfvi_alarm in response['result-data']:
|
||||||
if (self.strategy._alarm_restrictions ==
|
if (nfvi_alarm.alarm_id in self._ignore_alarms or
|
||||||
|
nfvi_alarm.alarm_id in self._ignore_alarms_conditional):
|
||||||
|
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||||
|
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||||
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||||
nfvi_alarm.mgmt_affecting == 'False'):
|
nfvi_alarm.mgmt_affecting == 'False'):
|
||||||
DLOG.warn("Ignoring non-management affecting alarm "
|
DLOG.warn("Relaxed mode ignores alarm: "
|
||||||
"%s - uuid %s due to relaxed alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
|
||||||
elif (self.strategy._alarm_restrictions ==
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||||
DLOG.warn("Ignoring alarm "
|
DLOG.warn("Permissive mode ignores alarm: "
|
||||||
"%s - uuid %s due to permissive alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
|
||||||
elif (nfvi_alarm.alarm_id not in self._ignore_alarms and
|
|
||||||
nfvi_alarm.alarm_id not in self._ignore_alarms_conditional):
|
|
||||||
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
|
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
|
||||||
else:
|
else:
|
||||||
DLOG.warn("Ignoring alarm %s - uuid %s" %
|
DLOG.warn("Detected alarm: %s" % nfvi_alarm.alarm_id)
|
||||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
self.strategy.nfvi_alarms = nfvi_alarms
|
self.strategy.nfvi_alarms = nfvi_alarms
|
||||||
|
|
||||||
if self._fail_on_alarms and self.strategy.nfvi_alarms:
|
if self.strategy.nfvi_alarms:
|
||||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
result = (
|
||||||
alarm_ids = [str(alarm.get('alarm_id')) for alarm in self.strategy.nfvi_alarms]
|
strategy.STRATEGY_STEP_RESULT.FAILED
|
||||||
reason = "alarms %s from %s are present" % (alarm_ids, fm_service)
|
if self._fail_on_alarms
|
||||||
|
else strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||||
|
)
|
||||||
|
reason = (
|
||||||
|
f"Unignored {fm_service} alarms are present: "
|
||||||
|
f"{json.dumps([v.as_dict() for v in self.strategy.nfvi_alarms], indent=2)}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||||
reason = ""
|
reason = "No unignored alarms present"
|
||||||
|
|
||||||
self.stage.step_complete(result, reason)
|
self.stage.step_complete(result, reason)
|
||||||
else:
|
else:
|
||||||
@ -2631,24 +2635,22 @@ class WaitDataSyncStep(strategy.StrategyStep):
|
|||||||
if self.strategy is not None:
|
if self.strategy is not None:
|
||||||
nfvi_alarms = list()
|
nfvi_alarms = list()
|
||||||
for nfvi_alarm in response['result-data']:
|
for nfvi_alarm in response['result-data']:
|
||||||
if (self.strategy._alarm_restrictions ==
|
if nfvi_alarm.alarm_id in self._ignore_alarms:
|
||||||
|
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||||
|
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||||
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||||
nfvi_alarm.mgmt_affecting == 'False'):
|
nfvi_alarm.mgmt_affecting == 'False'):
|
||||||
DLOG.warn("Ignoring non-management affecting alarm "
|
DLOG.warn("Relaxed mode ignores alarm: "
|
||||||
"%s - uuid %s due to relaxed alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
|
||||||
elif (self.strategy._alarm_restrictions ==
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||||
DLOG.warn("Ignoring alarm "
|
DLOG.warn("Permissive mode ignores alarm: "
|
||||||
"%s - uuid %s due to permissive alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
|
||||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
|
||||||
else:
|
else:
|
||||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
|
||||||
self.strategy.nfvi_alarms = nfvi_alarms
|
self.strategy.nfvi_alarms = nfvi_alarms
|
||||||
|
|
||||||
if self.strategy.nfvi_alarms:
|
if self.strategy.nfvi_alarms:
|
||||||
@ -2749,20 +2751,21 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
if self.strategy is not None:
|
if self.strategy is not None:
|
||||||
nfvi_alarms = list()
|
nfvi_alarms = list()
|
||||||
for nfvi_alarm in response['result-data']:
|
for nfvi_alarm in response['result-data']:
|
||||||
if (self.strategy._alarm_restrictions ==
|
if nfvi_alarm.alarm_id in self._ignore_alarms:
|
||||||
|
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||||
|
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||||
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||||
nfvi_alarm.mgmt_affecting == 'False'):
|
nfvi_alarm.mgmt_affecting == 'False'):
|
||||||
DLOG.warn("Ignoring non-management affecting alarm "
|
DLOG.warn("Relaxed mode ignores alarm: "
|
||||||
"%s - uuid %s due to relaxed alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
|
||||||
elif (self.strategy._alarm_restrictions ==
|
elif (self.strategy._alarm_restrictions ==
|
||||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||||
DLOG.warn("Ignoring alarm "
|
DLOG.warn("Permissive mode ignores alarm: "
|
||||||
"%s - uuid %s due to permissive alarm "
|
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
nfvi_alarm.alarm_uuid))
|
||||||
nfvi_alarm.alarm_uuid))
|
else:
|
||||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
|
||||||
# For ignoring stale alarm(currently 750.006)
|
# For ignoring stale alarm(currently 750.006)
|
||||||
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
|
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
|
||||||
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
||||||
@ -2784,10 +2787,6 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
else:
|
else:
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
|
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
|
||||||
else:
|
|
||||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
|
||||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
|
||||||
self.strategy.nfvi_alarms = nfvi_alarms
|
self.strategy.nfvi_alarms = nfvi_alarms
|
||||||
|
|
||||||
if self.strategy.nfvi_alarms:
|
if self.strategy.nfvi_alarms:
|
||||||
@ -2798,9 +2797,15 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
# Removes only the alarm which has
|
# Removes only the alarm which has
|
||||||
# not yet reached specified timeout.
|
# not yet reached specified timeout.
|
||||||
self.strategy.nfvi_alarms.remove(alarm)
|
self.strategy.nfvi_alarms.remove(alarm)
|
||||||
# Keep waiting for alarms to clear
|
|
||||||
pass
|
for v in self.strategy.nfvi_alarms:
|
||||||
else:
|
DLOG.info(
|
||||||
|
"Waiting for unignored alarm to clear "
|
||||||
|
f"id={v.alarm_id}, uuid={v.alarm_uuid}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Do not use elif, nfvi_alarms can be modified during previous block
|
||||||
|
if not self.strategy.nfvi_alarms:
|
||||||
# Alarms have all cleared
|
# Alarms have all cleared
|
||||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||||
self.stage.step_complete(result, "")
|
self.stage.step_complete(result, "")
|
||||||
@ -2864,6 +2869,18 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def timeout(self):
|
||||||
|
"""
|
||||||
|
Strategy Step Timeout Override
|
||||||
|
"""
|
||||||
|
|
||||||
|
result, _ = super(WaitAlarmsClearStep, self).timeout()
|
||||||
|
reason = (
|
||||||
|
"Unignored alarms did not clear before timeout: "
|
||||||
|
f"{json.dumps([v.as_dict() for v in self.strategy.nfvi_alarms], indent=2)}"
|
||||||
|
)
|
||||||
|
return result, reason
|
||||||
|
|
||||||
|
|
||||||
class QuerySwPatchesStep(strategy.StrategyStep):
|
class QuerySwPatchesStep(strategy.StrategyStep):
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user