Improved sw-manager logs and output
Made several changes: * Added host names to `sw-manager *-strategy show` output * Improved logging for alarms * Improved how alarms are displayed in `sw-manager *-strategy show` * Fixed a silent bug that caused duplicate alarms in DB * Fixed a bug in ``sw-manager *-strategy show` that showed incorrect step on timeout * Fixed a bug that caused misalignment in logs when PID became large * Added unified phase_reason on apply success TEST PLAN PASS: AIO-SX minor upgrade RR PASS: AIO-SX minor upgrade NRR PASS: AIO-DX minor upgrade RR Story: 2011045 Bug: 51566 Change-Id: Ic9c989c191892f16891c3ca5303d9780a1afdb2c Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com>
This commit is contained in:
parent
a8d6da428e
commit
2ef5f9290d
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2016-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2016-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -104,7 +104,10 @@ def _get_current_stage_and_step(strategy):
|
||||
if stage.inprogress or stage == current_phase.stages[-1]:
|
||||
for step in current_stage.steps:
|
||||
current_step = step
|
||||
if step.result not in ['initial', 'success'] or step == current_stage.steps[-1]:
|
||||
if (
|
||||
step.result not in ['initial', 'timed-out', 'success'] or
|
||||
step == current_stage.steps[-1]
|
||||
):
|
||||
break
|
||||
break
|
||||
|
||||
@ -205,6 +208,8 @@ def _display_strategy(strategy, details=False, active=False, error_details=False
|
||||
_print(2, "current-stage", current_stage.stage_name)
|
||||
if current_step:
|
||||
_print(2, "current-step", current_step.step_name)
|
||||
if current_step.entity_type == "hosts" and current_step.entity_names:
|
||||
_print(2, "entity-names", current_step.entity_names)
|
||||
_print(2, "current-phase-completion",
|
||||
("%s%%" % strategy.current_phase_completion_percentage))
|
||||
_print(2, "state", strategy.state)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2016,2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -8,6 +8,7 @@ import functools
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import six
|
||||
import sys
|
||||
|
||||
@ -40,10 +41,17 @@ class DebugLogHandler(logging.Handler):
|
||||
self.process_name = None
|
||||
self.thread_name = None
|
||||
|
||||
pid_size = 7
|
||||
try:
|
||||
max_pid = Path("/proc/sys/kernel/pid_max").read_text()
|
||||
pid_size = len(str(int(max_pid)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# To keep syslog-ng happy, we need to add the who field twice. Newer
|
||||
# syslog-ng removes the header formatting
|
||||
fmt = ("%(asctime)s %(who)36s[%(process)d]: %(who)36s[%(process)d] "
|
||||
"%(levelname)8s %(message)s")
|
||||
fmt = (f"%(asctime)s %(who)36s[%(process)d]: %(who)36s[%(process){pid_size}d] "
|
||||
"%(levelname)8s %(message)s")
|
||||
formatter = DebugLogFormatter(fmt)
|
||||
self.setFormatter(formatter)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -4263,6 +4263,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
alarm_data['reason_text'], alarm_data['timestamp'],
|
||||
alarm_data['mgmt_affecting'])
|
||||
alarms.append(alarm)
|
||||
alarms.sort(key=lambda x: x.alarm_id)
|
||||
|
||||
response['result-data'] = alarms
|
||||
response['completed'] = True
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -2356,6 +2356,20 @@ class SwUpgradeStrategy(
|
||||
|
||||
return self._build_complete_normal(result, result_reason)
|
||||
|
||||
def apply_complete(self, result, result_reason):
|
||||
"""
|
||||
Strategy Apply Complete
|
||||
"""
|
||||
|
||||
# On success we change the reason
|
||||
if result == strategy.STRATEGY_RESULT.SUCCESS:
|
||||
if self._rollback:
|
||||
result_reason = f"Rollback to release={self._release} was successful"
|
||||
else:
|
||||
result_reason = f"Upgrade to release={self._release} was successful"
|
||||
|
||||
super(SwUpgradeStrategy, self).apply_complete(result, result_reason)
|
||||
|
||||
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
|
||||
"""
|
||||
Initializes a software upgrade strategy object using the given dictionary
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -2531,35 +2531,39 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
||||
if self.strategy is not None:
|
||||
nfvi_alarms = self.strategy.nfvi_alarms
|
||||
for nfvi_alarm in response['result-data']:
|
||||
if (self.strategy._alarm_restrictions ==
|
||||
if (nfvi_alarm.alarm_id in self._ignore_alarms or
|
||||
nfvi_alarm.alarm_id in self._ignore_alarms_conditional):
|
||||
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||
nfvi_alarm.mgmt_affecting == 'False'):
|
||||
DLOG.warn("Ignoring non-management affecting alarm "
|
||||
"%s - uuid %s due to relaxed alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
DLOG.warn("Relaxed mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||
DLOG.warn("Ignoring alarm "
|
||||
"%s - uuid %s due to permissive alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif (nfvi_alarm.alarm_id not in self._ignore_alarms and
|
||||
nfvi_alarm.alarm_id not in self._ignore_alarms_conditional):
|
||||
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
DLOG.warn("Permissive mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
else:
|
||||
DLOG.warn("Ignoring alarm %s - uuid %s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
DLOG.warn("Detected alarm: %s" % nfvi_alarm.alarm_id)
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
self.strategy.nfvi_alarms = nfvi_alarms
|
||||
|
||||
if self._fail_on_alarms and self.strategy.nfvi_alarms:
|
||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
alarm_ids = [str(alarm.get('alarm_id')) for alarm in self.strategy.nfvi_alarms]
|
||||
reason = "alarms %s from %s are present" % (alarm_ids, fm_service)
|
||||
if self.strategy.nfvi_alarms:
|
||||
result = (
|
||||
strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
if self._fail_on_alarms
|
||||
else strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
)
|
||||
reason = (
|
||||
f"Unignored {fm_service} alarms are present: "
|
||||
f"{json.dumps([v.as_dict() for v in self.strategy.nfvi_alarms], indent=2)}"
|
||||
)
|
||||
else:
|
||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
reason = ""
|
||||
reason = "No unignored alarms present"
|
||||
|
||||
self.stage.step_complete(result, reason)
|
||||
else:
|
||||
@ -2631,24 +2635,22 @@ class WaitDataSyncStep(strategy.StrategyStep):
|
||||
if self.strategy is not None:
|
||||
nfvi_alarms = list()
|
||||
for nfvi_alarm in response['result-data']:
|
||||
if (self.strategy._alarm_restrictions ==
|
||||
if nfvi_alarm.alarm_id in self._ignore_alarms:
|
||||
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||
nfvi_alarm.mgmt_affecting == 'False'):
|
||||
DLOG.warn("Ignoring non-management affecting alarm "
|
||||
"%s - uuid %s due to relaxed alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
DLOG.warn("Relaxed mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||
DLOG.warn("Ignoring alarm "
|
||||
"%s - uuid %s due to permissive alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
DLOG.warn("Permissive mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
else:
|
||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
self.strategy.nfvi_alarms = nfvi_alarms
|
||||
|
||||
if self.strategy.nfvi_alarms:
|
||||
@ -2749,20 +2751,21 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
if self.strategy is not None:
|
||||
nfvi_alarms = list()
|
||||
for nfvi_alarm in response['result-data']:
|
||||
if (self.strategy._alarm_restrictions ==
|
||||
if nfvi_alarm.alarm_id in self._ignore_alarms:
|
||||
DLOG.info("Strategy ignores alarm: id=%s, uuid=%s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||
nfvi_alarm.mgmt_affecting == 'False'):
|
||||
DLOG.warn("Ignoring non-management affecting alarm "
|
||||
"%s - uuid %s due to relaxed alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
DLOG.warn("Relaxed mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.PERMISSIVE):
|
||||
DLOG.warn("Ignoring alarm "
|
||||
"%s - uuid %s due to permissive alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||
DLOG.warn("Permissive mode ignores alarm: "
|
||||
"id=%s, uuid=%s" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
else:
|
||||
# For ignoring stale alarm(currently 750.006)
|
||||
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
|
||||
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
||||
@ -2784,10 +2787,6 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
else:
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
else:
|
||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
self.strategy.nfvi_alarms = nfvi_alarms
|
||||
|
||||
if self.strategy.nfvi_alarms:
|
||||
@ -2798,9 +2797,15 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
# Removes only the alarm which has
|
||||
# not yet reached specified timeout.
|
||||
self.strategy.nfvi_alarms.remove(alarm)
|
||||
# Keep waiting for alarms to clear
|
||||
pass
|
||||
else:
|
||||
|
||||
for v in self.strategy.nfvi_alarms:
|
||||
DLOG.info(
|
||||
"Waiting for unignored alarm to clear "
|
||||
f"id={v.alarm_id}, uuid={v.alarm_uuid}"
|
||||
)
|
||||
|
||||
# Do not use elif, nfvi_alarms can be modified during previous block
|
||||
if not self.strategy.nfvi_alarms:
|
||||
# Alarms have all cleared
|
||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
self.stage.step_complete(result, "")
|
||||
@ -2864,6 +2869,18 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
||||
return data
|
||||
|
||||
def timeout(self):
|
||||
"""
|
||||
Strategy Step Timeout Override
|
||||
"""
|
||||
|
||||
result, _ = super(WaitAlarmsClearStep, self).timeout()
|
||||
reason = (
|
||||
"Unignored alarms did not clear before timeout: "
|
||||
f"{json.dumps([v.as_dict() for v in self.strategy.nfvi_alarms], indent=2)}"
|
||||
)
|
||||
return result, reason
|
||||
|
||||
|
||||
class QuerySwPatchesStep(strategy.StrategyStep):
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user