8c015878d0
Replace the references of infra network or interface with cluster-host. Story: 2004273 Task: 30582 Change-Id: I9c6212f95adfba1a2bb0ea250723370690ab0345 Signed-off-by: Teresa Ho <teresa.ho@windriver.com>
3216 lines
114 KiB
YAML
Executable File
3216 lines
114 KiB
YAML
Executable File
---
|
|
|
|
#
|
|
# Copyright (c) 2013-2018 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
############################################################################
|
|
#
|
|
# ALARM & CUSTOMER LOG DOCUMENTATION
|
|
#
|
|
############################################################################
|
|
|
|
############################################################################
|
|
#
|
|
# Record Format ... for documentation
|
|
#
|
|
# 100.001:
|
|
# Type: < Alarm | Log >
|
|
# Description: < yaml string >
|
|
# OR
|
|
# [ < yaml string >, // list of yaml strings
|
|
# < yaml string > ]
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Entity_Instance_ID: < yaml string ... e.g. host=<hostname>.interface=<ifname> >
|
|
# OR
|
|
# [ < yaml string >, // list of yaml strings
|
|
# < yaml string > ]
|
|
# Severity: < critical | major | minor | warning >
|
|
# OR
|
|
# [ critical, major ] // list of severity values
|
|
# Proposed_Repair_Action: < yaml string > // NOTE ALARM ONLY FIELD
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Maintenance_Action: < yaml string > // NOTE ALARM ONLY FIELD
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Inhibit_Alarms: < True | False > // NOTE ALARM ONLY FIELD
|
|
# Alarm_Type: < operational-violation | ... >
|
|
# Probable_Cause: < timing-problem | ... >
|
|
# OR
|
|
# [ < timing-problem | ... >, // list of probable-causes
|
|
# < timing-problem | ... > ]
|
|
# Service_Affecting: < True | False >
|
|
# Suppression: < True | False > // NOTE ALARM ONLY FIELD
|
|
# Management_Affecting_Severity: < none | critical | major | minor | warning >
|
|
# // lowest alarm level of this type that will block forced upgrades & orchestration actions
|
|
# Degrade_Affecting_Severity: < none | critical | major | minor >
|
|
# // lowest alarm level of this type sets a host to 'degraded'
|
|
#
|
|
#
|
|
# Other Notes:
|
|
# - use general record format above
|
|
# - the only dictionaries allowed are ones indexed by severity
|
|
# - if there are multiple lists in a record,
|
|
# then they should all have the same # of items and corresponding list items represent instance of alarm
|
|
# - if you can't describe the alarm/log based on the above rules,
|
|
# then you can use a multi-line string format
|
|
# - DELETING alarms from events.yaml: alarms should only be deleted when going to a new Titanium Cloud release
|
|
# - if all possible alarm severities are mgmt affecting, the convention is to
|
|
# use 'warning' as the Management_Affecting_Severity, even if warning is not a possible severity for that alarm
|
|
#
|
|
# Testing:
|
|
# - Testing of events.yaml can be done by running regular make command
|
|
# and specifying fm-doc:
|
|
# nice -n 20 ionice -c Idle make -C build fm-doc.rebuild
|
|
# - When building, events.yaml will be parsed for correct format, and also
|
|
# to ensure that Alarm IDs defined in constants.py and fmAlarm.h are
|
|
# listed in events.yaml
|
|
#
|
|
############################################################################
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# Monitored Resource Alarms
|
|
#---------------------------------------------------------------------------
|
|
|
|
|
|
100.101:
|
|
Type: Alarm
|
|
Description: |-
|
|
Platform CPU threshold exceeded; threshold x%, actual y% .
|
|
CRITICAL @ 95%
|
|
MAJOR @ 90%
|
|
MINOR @ 80%
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: major
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
100.102:
|
|
Type: Alarm
|
|
Description: |-
|
|
VSwitch CPU threshold exceeded; threshold x%, actual y% .
|
|
CRITICAL @ 95%
|
|
MAJOR @ 90%
|
|
MINOR @ 80%
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
100.103:
|
|
Type: Alarm
|
|
Description: |-
|
|
Memory threshold exceeded; threshold x%, actual y% .
|
|
CRITICAL @ 90%
|
|
MAJOR @ 80%
|
|
MINOR @ 70%
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
100.104: # NOTE This should really be split into two different Alarms.
|
|
Type: Alarm
|
|
Description: |-
|
|
host=<hostname>.filesystem=<mount-dir>
|
|
File System threshold exceeded; threshold x%, actual y% .
|
|
CRITICAL @ 90%
|
|
MAJOR @ 80%
|
|
MINOR @ 70%
|
|
OR
|
|
host=<hostname>.volumegroup=<volumegroup-name>
|
|
Monitor and if condition persists, consider adding additional physical volumes to the volume group.
|
|
Entity_Instance_ID: |-
|
|
host=<hostname>.filesystem=<mount-dir>
|
|
OR
|
|
host=<hostname>.volumegroup=<volumegroup-name>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: critical
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
#--------
|
|
# 100.105: Retired (with R2 release): previously monitored /etc/nova/instances
|
|
# NFS mount from controller to computes
|
|
#--------
|
|
|
|
100.106:
|
|
Type: Alarm
|
|
Description: "'OAM' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.107:
|
|
Type: Alarm
|
|
Description: |-
|
|
'OAM' Interface degraded.
|
|
OR
|
|
'OAM' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.108:
|
|
Type: Alarm
|
|
Description: "'MGMT' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.109:
|
|
Type: Alarm
|
|
Description: |-
|
|
'MGMT' Interface degraded.
|
|
OR
|
|
'MGMT' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.110:
|
|
Type: Alarm
|
|
Description: "'CLUSTER-HOST' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.111:
|
|
Type: Alarm
|
|
Description: |-
|
|
'CLUSTER-HOST' Interface degraded.
|
|
OR
|
|
'CLUSTER-HOST' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.112:
|
|
Type: Alarm
|
|
Description: "'DATA-VRS' Port down."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.113:
|
|
Type: Alarm
|
|
Description: |-
|
|
'DATA-VRS' Interface degraded.
|
|
OR
|
|
'DATA-VRS' Interface down.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: major
|
|
|
|
100.114:
|
|
Type: Alarm
|
|
Description:
|
|
major: "NTP configuration does not contain any valid or reachable NTP servers."
|
|
minor: "NTP address <IP address> is not a valid or a reachable NTP server."
|
|
Entity_Instance_ID:
|
|
major: host=<hostname>.ntp
|
|
minor: host=<hostname>.ntp=<IP address>
|
|
Severity: [major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
100.115:
|
|
Type: Alarm
|
|
Description: "VSwitch Memory Usage, processor <processor> threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>.processor=<processor>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
100.116:
|
|
Type: Alarm
|
|
Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
100.117:
|
|
Type: Alarm
|
|
Description: "Nova LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: major
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
100.118:
|
|
Type: Alarm
|
|
Description: Controller cannot establish connection with remote logging server.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: minor
|
|
Proposed_Repair_Action: "Ensure Remote Log Server IP is reachable from Controller through OAM interface; otherwise contact next level of support."
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: communication
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
100.119:
|
|
Type: Alarm
|
|
Description:
|
|
major: "PTP configuration or out-of-tolerance timestamping conditions"
|
|
minor: "PTP out-of-tolerance timestamping condition"
|
|
Entity_Instance_ID: |-
|
|
host=<hostname>.ptp
|
|
OR
|
|
host=<hostname>.ptp=no-lock
|
|
OR
|
|
host=<hostname>.ptp=<interface>.unsupported=hardware-timestamping
|
|
OR
|
|
host=<hostname>.ptp=<interface>.unsupported=software-timestamping
|
|
OR
|
|
host=<hostname>.ptp=<interface>.unsupported=legacy-timestamping
|
|
OR
|
|
host=<hostname>.ptp=out-of-tolerance
|
|
Severity: [major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
#---------------------------------------------------------------------------
|
|
# MAINTENANCE
|
|
#---------------------------------------------------------------------------
|
|
|
|
|
|
200.001:
|
|
Type: Alarm
|
|
Description: <hostname> was administratively locked to take it out-of-service.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Administratively unlock Host to bring it back in-service.
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms: True
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: out-of-service
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.004:
|
|
Type: Alarm
|
|
Description: |-
|
|
<hostname> experienced a service-affecting failure.
|
|
Host is being auto recovered by Reboot.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: application-subsystem-failure
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.011:
|
|
Type: Alarm
|
|
Description: <hostname> experienced a configuration failure during initialization. Host is being re-configured by Reboot.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: auto-recover
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: configuration-or-customization-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.010:
|
|
Type: Alarm
|
|
Description: <hostname> access to board management module has failed.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Check Host's board management configuration and connectivity.
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.012:
|
|
Type: Alarm
|
|
Description: <hostname> controller function has in-service failure while compute services remain healthy.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Lock and then Unlock host to recover. Avoid using 'Force Lock' action as that will impact compute services running on this host. If lock action fails then contact next level of support to investigate and recover.
|
|
Maintenance_Action: "degrade - requires manual action"
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
200.013:
|
|
Type: Alarm
|
|
Description: <hostname> compute service of the only available controller is not poperational. Auto-recovery is disabled. Deggrading host instead.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service.
|
|
Maintenance_Action: "degrade - requires manual action"
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
200.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Degrade:
|
|
<hostname> is experiencing an intermittent 'Management Network' communication failures that have exceeded its lower alarming threshold.
|
|
|
|
Failure:
|
|
<hostname> is experiencing a persistent critical 'Management Network' communication failure."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: "Check 'Management Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.009:
|
|
Type: Alarm
|
|
Description: |-
|
|
Degrade:
|
|
<hostname> is experiencing an intermittent 'Cluster-host Network' communication failures that have exceeded its lower alarming threshold.
|
|
|
|
Failure:
|
|
<hostname> is experiencing a persistent critical 'Cluster-host Network' communication failure."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: "Check 'Cluster-host Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
200.006:
|
|
Type: Alarm
|
|
Description: |-
|
|
Main Process Monitor Daemon Failure (major):
|
|
<hostname> 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process.
|
|
|
|
Monitored Process Failure (critical/major/minor):
|
|
Critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
|
|
Auto-recovery progression by host reboot is required and in progress.
|
|
Major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
|
|
Minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
|
|
OR
|
|
<hostname> '<processname>' process has failed. Manual recovery is required.
|
|
Entity_Instance_ID: host=<hostname>.process=<processname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: |-
|
|
If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked then contact next level of support for root cause analysis and recovery.
|
|
|
|
If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysis and recovery."
|
|
Maintenance_Action:
|
|
critical: auto-recover
|
|
major: degrade
|
|
minor:
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting:
|
|
critical: True
|
|
major: True
|
|
minor: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
# 200.006: // NOTE using duplicate ID of a completely analogous Alarm for this
|
|
# Type: Log
|
|
# Description: |-
|
|
# Main Process Monitor Daemon Failure (major)
|
|
# <hostname> 'Process Monitor' (pmond) process is not running or functioning properly.
|
|
# The system is trying to recover this process.
|
|
#
|
|
# Monitored Process Failure (critical/major/minor)
|
|
# critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
|
|
# Auto-recovery progression by host reboot is required and in progress.
|
|
# major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
|
|
# minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
|
|
# OR
|
|
# <hostname> '<processname>' process has failed. Manual recovery is required.
|
|
# Entity_Instance_ID: host=<hostname>.process=<process-name>
|
|
# Severity: minor
|
|
# Alarm_Type: other
|
|
# Probable_Cause: unspecified-reason
|
|
# Service_Affecting: True
|
|
|
|
|
|
200.007:
|
|
Type: Alarm
|
|
Description:
|
|
critical: "Host is degraded due to a 'critical' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
major: "Host is degraded due to a 'major' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
minor: "Host is reporting a 'minor' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
Entity_Instance_ID: host=<hostname>.sensor=<sensorname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
minor: auto-recover (polling)
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting:
|
|
critical: True
|
|
major: False
|
|
minor: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
200.014:
|
|
Type: Alarm
|
|
Description: "The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: minor
|
|
Proposed_Repair_Action: Check Board Management Controller provisioning. Try reprovisioning the BMC. If problem persists try power cycling the host and then the entire server including the BMC power. If problem persists then contact next level of support.
|
|
Maintenance_Action: None
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: False
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
200.015:
|
|
Type: Alarm
|
|
Description: Unable to read one or more sensor groups from this host's board management controller
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: None
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
200.020:
|
|
Type: Log
|
|
Description: ["<hostname> has been 'discovered' on the network",
|
|
"<hostname> has been 'added' to the system",
|
|
"<hostname> has 'entered' multi-node failure avoidance",
|
|
"<hostname> has 'exited' multi-node failure avoidance"]
|
|
Entity_Instance_ID: [host=<hostname>.event=discovered,
|
|
host=<hostname>.event=add,
|
|
host=<hostname>.event=mnfa_enter,
|
|
host=<hostname>.event=mnfa_exit]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
|
|
200.021:
|
|
Type: Log
|
|
Description: ["<hostname> board management controller has been 'provisioned'",
|
|
"<hostname> board management controller has been 're-provisioned'",
|
|
"<hostname> board management controller has been 'de-provisioned'",
|
|
"<hostname> manual 'unlock' request",
|
|
"<hostname> manual 'reboot' request",
|
|
"<hostname> manual 'reset' request",
|
|
"<hostname> manual 'power-off' request",
|
|
"<hostname> manual 'power-on' request",
|
|
"<hostname> manual 'reinstall' request",
|
|
"<hostname> manual 'force-lock' request",
|
|
"<hostname> manual 'delete' request",
|
|
"<hostname> manual 'controller switchover' request"]
|
|
Entity_Instance_ID: [host=<hostname>.command=provision,
|
|
host=<hostname>.command=reprovision,
|
|
host=<hostname>.command=deprovision,
|
|
host=<hostname>.command=unlock,
|
|
host=<hostname>.command=reboot,
|
|
host=<hostname>.command=reset,
|
|
host=<hostname>.command=power-off,
|
|
host=<hostname>.command=power-on,
|
|
host=<hostname>.command=reinstall,
|
|
host=<hostname>.command=force-lock,
|
|
host=<hostname>.command=delete,
|
|
host=<hostname>.command=swact]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
|
|
200.022:
|
|
Type: Log
|
|
Description: ["<hostname> is now 'disabled'",
|
|
"<hostname> is now 'enabled'",
|
|
"<hostname> is now 'online'",
|
|
"<hostname> is now 'offline'",
|
|
"<hostname> is 'disabled-failed' to the system",
|
|
"<hostname> reinstall failed",
|
|
"<hostname> reinstall completed successfully"]
|
|
Entity_Instance_ID: [host=<hostname>.state=disabled,
|
|
host=<hostname>.state=enabled,
|
|
host=<hostname>.status=online,
|
|
host=<hostname>.status=offline,
|
|
host=<hostname>.status=failed,
|
|
host=<hostname>.status=reinstall-failed,
|
|
host=<hostname>.status=reinstall-complete]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# BACKUP AND RESTORE
|
|
#---------------------------------------------------------------------------
|
|
|
|
210.001:
|
|
Type: Alarm
|
|
Description: System Backup in progress.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: minor
|
|
Proposed_Repair_Action: No action required.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# SYSTEM CONFIGURATION
|
|
#---------------------------------------------------------------------------
|
|
|
|
250.001:
|
|
Type: Alarm
|
|
Description: <hostname> Configuration is out-of-date.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Administratively lock and unlock <hostname> to update config.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
250.002:
|
|
Type: Alarm
|
|
Description: <hostname> Ceph cache tiering configuration is out-of-date.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Apply Ceph service parameter settings.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# VM Compute Services
|
|
#---------------------------------------------------------------------------
|
|
270.001:
|
|
Type: Alarm
|
|
Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
|
|
Entity_Instance_ID: host=<host_name>.services=compute
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for host services recovery to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
270.101:
|
|
Type: Log
|
|
Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
270.102:
|
|
Type: Log
|
|
Description: Host <host_name> compute services enabled
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
270.103:
|
|
Type: Log
|
|
Description: Host <host_name> compute services disabled
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
|
|
275.001:
|
|
Type: Log
|
|
Description: Host <host_name> hypervisor is now <administrative_state>-<operational_state>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# DISTRIBUTED CLOUD
|
|
#---------------------------------------------------------------------------
|
|
|
|
280.001:
|
|
Type: Alarm
|
|
Description: <subcloud> is offline
|
|
Entity_Instance_ID: subcloud=<subcloud>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for subcloud to become online; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
280.002:
|
|
Type: Alarm
|
|
Description: <subcloud> <resource> sync_status is out-of-sync
|
|
Entity_Instance_ID: [subcloud=<subcloud>.resource=<compute | network | platform | volumev2>]
|
|
Severity: major
|
|
Proposed_Repair_Action: If problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: other
|
|
Probable_Cause: application-subsystem-failure
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# NETWORK
|
|
#---------------------------------------------------------------------------
|
|
|
|
300.001:
|
|
Type: Alarm
|
|
Description: "'Data' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
300.002:
|
|
Type: Alarm
|
|
Description: |-
|
|
'Data' Interface degraded.
|
|
OR
|
|
'Data' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-uuid>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
|
|
300.003:
|
|
Type: Alarm
|
|
Description: Networking Agent not responding.
|
|
Entity_Instance_ID: host=<hostname>.agent=<agent-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
300.004:
|
|
Type: Alarm
|
|
Description: No enabled compute host with connectivity to provider network.
|
|
Entity_Instance_ID: service=networking.providernet=<pnet-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Enable compute hosts with required provider network connectivity.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
300.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Communication failure detected over provider network x% for ranges y% on host z%.
|
|
OR
|
|
Communication failure detected over provider network x% on host z%.
|
|
Entity_Instance_ID: host=<hostname>.service=networking.providernet=<pnet-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check neighbour switch port VLAN assignments.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
300.010:
|
|
Type: Alarm
|
|
Description: |-
|
|
ML2 Driver Agent non-reachable
|
|
OR
|
|
ML2 Driver Agent reachable but non-responsive
|
|
OR
|
|
ML2 Driver Agent authentication failure
|
|
OR
|
|
ML2 Driver Agent is unable to sync Neutron database
|
|
Entity_Instance_ID: host=<hostname>.ml2driver=<driver>
|
|
Severity: major
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
300.012:
|
|
Type: Alarm
|
|
Description: "Openflow Controller connection failed."
|
|
Entity_Instance_ID: host=<hostname>.openflow-controller=<uri>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
|
|
300.013:
|
|
Type: Alarm
|
|
Description: |-
|
|
No active Openflow controller connections found for this network.
|
|
OR
|
|
One or more Openflow controller connections in disconnected state for this network.
|
|
Entity_Instance_ID: host=<hostname>.openflow-network=<name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
|
|
300.014:
|
|
Type: Alarm
|
|
Description: "OVSDB Manager connection failed."
|
|
Entity_Instance_ID: host=<hostname>.sdn-controller=<uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
|
|
300.015:
|
|
Type: Alarm
|
|
Description: "No active OVSDB connections found."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: critical
|
|
|
|
300.016:
|
|
Type: Alarm
|
|
Description: "Dynamic routing agent x% lost connectivity to peer y%."
|
|
Entity_Instance_ID: host=<hostname>,agent=<agent-uuid>,bgp-peer=<bgp-peer>
|
|
Severity: major
|
|
Proposed_Repair_Action: If condition persists, fix connectivity to peer.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# HIGH AVAILABILITY
|
|
#---------------------------------------------------------------------------
|
|
|
|
400.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Service group failure; <list of affected services>.
|
|
OR
|
|
Service group degraded; <list of affected services>.
|
|
OR
|
|
Service group warning; <list of affected services>.
|
|
Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>.host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: Contact next level of support.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: major
|
|
|
|
|
|
400.002:
|
|
Type: Alarm
|
|
Description: |-
|
|
Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> active member<s> but no active members available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> active member<s> but only <num> active member<s> available.
|
|
Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>
|
|
Severity: major
|
|
Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
400.003:
|
|
Type: Alarm
|
|
Description: |-
|
|
License key is not installed; a valid license key is required for operation.
|
|
OR
|
|
License key has expired or is invalid; a valid license key is required for operation.
|
|
OR
|
|
Evaluation license key will expire on <date>; there are <num_days> days remaining in this evaluation.
|
|
OR
|
|
Evaluation license key will expire on <date>; there is only 1 day remaining in this evaluation.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Contact next level of support to obtain a new license key.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: key-expired
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: critical
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
# 400.004: // NOTE Removed
|
|
# Type: Alarm
|
|
# Description: Service group software modification detected; <list of affected files>.
|
|
# Entity_Instance_ID: host=<hostname>
|
|
# Severity: major
|
|
# Proposed_Repair_Action: Contact next level of support.
|
|
# Maintenance_Action:
|
|
# Inhibit_Alarms: False
|
|
# Alarm_Type: processing-error
|
|
# Probable_Cause: software-program-error
|
|
# Service_Affecting: True
|
|
# Suppression: False
|
|
|
|
|
|
400.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Communication failure detected with peer over port <linux-ifname>.
|
|
OR
|
|
Communication failure detected with peer over port <linux-ifname> within the last 30 seconds.
|
|
Entity_Instance_ID: host=<hostname>.network=<mgmt | oam | cluster-host>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: False
|
|
Alarm_Type: communication
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# SM
|
|
#---------------------------------------------------------------------------
|
|
|
|
401.001:
|
|
Type: Log
|
|
Description: Service group <group> state change from <state> to <state> on host <host_name>
|
|
Entity_Instance_ID: service_domain=<domain>.service_group=<group>.host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
401.002:
|
|
Type: Log
|
|
Description: |-
|
|
Service group <group> loss of redundancy; expected <X> standby member but no standby members available
|
|
or
|
|
Service group <group> loss of redundancy; expected <X> standby member but only <Y> standby member(s) available
|
|
or
|
|
Service group <group> has no active members available; expected <X> active member(s)
|
|
or
|
|
Service group <group> loss of redundancy; expected <X> active member(s) but only <Y> active member(s) available
|
|
Entity_Instance_ID: service_domain=<domain>.service_group=<group>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
401.003:
|
|
Type: Log
|
|
Description: |-
|
|
License key has expired or is invalid
|
|
or
|
|
Evaluation license key will expire on <date>
|
|
or
|
|
License key is valid
|
|
Entity_Instance_ID: host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
401.005:
|
|
Type: Log
|
|
Description: |-
|
|
Communication failure detected with peer over port <port> on host <host name>
|
|
or
|
|
Communication failure detected with peer over port <port> on host <host name> within the last <X> seconds
|
|
or
|
|
Communication established with peer over port <port> on host <host name>
|
|
Entity_Instance_ID: host=<host_name>.network=<network>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
401.007:
|
|
Type: Log
|
|
Description: Swact or swact-force
|
|
Entity_Instance_ID: host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# SECURITY
|
|
#---------------------------------------------------------------------------
|
|
|
|
500.100:
|
|
Type: Alarm
|
|
Description: TPM initialization failed on host.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: reinstall HTTPS certificate; if problem persists contact next level of support.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
500.101:
|
|
Type: Alarm
|
|
Description: Developer patch certificate enabled.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: critical
|
|
Proposed_Repair_Action: Reinstall system to disable developer certificate and remove untrusted patches.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
500.500:
|
|
Type: Log
|
|
Description: "Host <host_name> has IMA Appraisal failure for service <service> when executing <file>, reason = <reason_text>]"
|
|
Entity_Instance_ID: host=<hostname>.service=<service>
|
|
Severity: major
|
|
Alarm_Type: integrity-violation
|
|
Probable_Cause: information-modification-detected
|
|
Service_Affecting: False
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# VM
|
|
#---------------------------------------------------------------------------
|
|
|
|
700.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Instance <instance_name> owned by <tenant_name> has failed on host <host_name>
|
|
Instance <instance_name> owned by <tenant_name> has failed to schedule
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: The system will attempt recovery; no repair action required
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: software-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.002:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is paused on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Unpause the instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.003:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is suspended on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Resume the instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.004:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is stopped on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Start the instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.005:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is rebooting on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.006:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is rebuilding on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.007:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is evacuating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.008:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is live migrating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.009:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is cold migrating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.010:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> has been cold-migrated to host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Confirm or revert cold-migrate of instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.011:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is reverting cold migrate to host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.012:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is resizing on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.013:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> has been resized on host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: itenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Confirm or revert resize of instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.014:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is reverting resize on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Wait for resize revert to complete; if problem persists contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.015:
|
|
Type: Alarm
|
|
Description: Guest Heartbeat not established for instance <instance_name> owned by <tenant_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disable Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.016:
|
|
Type: Alarm
|
|
Description: Multi-Node Recovery Mode
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: minor
|
|
Proposed_Repair_Action: "Wait for the system to exit out of this mode"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
700.017:
|
|
Type: Alarm
|
|
Description: Server group <server_group_name> <policy> policy was not satisfied
|
|
Entity_Instance_ID: server-group<server-group-uuid>
|
|
Severity: minor
|
|
Proposed_Repair_Action: "Migrate instances in an attempt to satisfy the policy; if problem persists contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
700.101:
|
|
Type: Log
|
|
Description: Instance <instance_name> is enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.102:
|
|
Type: Log
|
|
Description: Instance <instance_name> owned by <tenant_name> has failed[, reason = <reason_text>]
|
|
Instance <instance_name> owned by <tenant_name> has failed to schedule[, reason = <reason_text>]
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.103:
|
|
Type: Log
|
|
Description: Create issued <by <tenant_name>|by the system> against <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.104:
|
|
Type: Log
|
|
Description: Creating instance <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.105:
|
|
Type: Log
|
|
Description: "Create rejected for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.106:
|
|
Type: Log
|
|
Description: "Create cancelled for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.107:
|
|
Type: Log
|
|
Description: "Create failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.108:
|
|
Type: Log
|
|
Description: Inance <instance_name> owned by <tenant_name> has been created
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.109:
|
|
Type: Log
|
|
Description: "Delete issued <by tenant <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.110:
|
|
Type: Log
|
|
Description: Deleting instance <instance_name> owned by <tenatn_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.111:
|
|
Type: Log
|
|
Description: "Delete rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.112:
|
|
Type: Log
|
|
Description: "Delete cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.113:
|
|
Type: Log
|
|
Description: "Delete failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.114:
|
|
Type: Log
|
|
Description: Deleted instance <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.115:
|
|
Type: Log
|
|
Description: "Pause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.116:
|
|
Type: Log
|
|
Description: Pause inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.117:
|
|
Type: Log
|
|
Description: "Pause rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.118:
|
|
Type: Log
|
|
Description: "Pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.119:
|
|
Type: Log
|
|
Description: "Pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.120:
|
|
Type: Log
|
|
Description: Pause complete for instance <instance_name> now paused on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.121:
|
|
Type: Log
|
|
Description: "Unpause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.122:
|
|
Type: Log
|
|
Description: Unpause inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.123:
|
|
Type: Log
|
|
Description: "Unpause rejected for instance <instance_name> paused on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.124:
|
|
Type: Log
|
|
Description: "Unpause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.125:
|
|
Type: Log
|
|
Description: "Unpause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.126:
|
|
Type: Log
|
|
Description: Unpause complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.127:
|
|
Type: Log
|
|
Description: "Suspend issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.128:
|
|
Type: Log
|
|
Description: Suspend inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.129:
|
|
Type: Log
|
|
Description: "Suspend rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.130:
|
|
Type: Log
|
|
Description: "Suspend cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.131:
|
|
Type: Log
|
|
Description: "Suspend failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.132:
|
|
Type: Log
|
|
Description: Suspend complete for instance <instance_name> now suspended on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.133:
|
|
Type: Log
|
|
Description: "Resume issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.134:
|
|
Type: Log
|
|
Description: Resume inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.135:
|
|
Type: Log
|
|
Description: "Resume rejected for instance <instance_name> suspended on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.136:
|
|
Type: Log
|
|
Description: "Resume cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.137:
|
|
Type: Log
|
|
Description: "Resume failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.138:
|
|
Type: Log
|
|
Description: Resume complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.139:
|
|
Type: Log
|
|
Description: "Start issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.140:
|
|
Type: Log
|
|
Description: Start inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.141:
|
|
Type: Log
|
|
Description: "Start rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.142:
|
|
Type: Log
|
|
Description: "Start cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.143:
|
|
Type: Log
|
|
Description: "Start failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.144:
|
|
Type: Log
|
|
Description: Start complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.145:
|
|
Type: Log
|
|
Description: "Stop issued <by <tenant_name>|by the system|by the instance> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.146:
|
|
Type: Log
|
|
Description: Stop inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.147:
|
|
Type: Log
|
|
Description: "Stop rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.148:
|
|
Type: Log
|
|
Description: "Stop cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.149:
|
|
Type: Log
|
|
Description: "Stop failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.150:
|
|
Type: Log
|
|
Description: Stop complete for instance <instance_name> now disabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.151:
|
|
Type: Log
|
|
Description: "Live-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.152:
|
|
Type: Log
|
|
Description: Live-Migrate inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.153:
|
|
Type: Log
|
|
Description: "Live-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.154:
|
|
Type: Log
|
|
Description: "Live-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.155:
|
|
Type: Log
|
|
Description: "Live-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.156:
|
|
Type: Log
|
|
Description: Live-Migrate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.157:
|
|
Type: Log
|
|
Description: "Cold-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.158:
|
|
Type: Log
|
|
Description: Cold-Migrate inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.159:
|
|
Type: Log
|
|
Description: "Cold-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.160:
|
|
Type: Log
|
|
Description: "Cold-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.161:
|
|
Type: Log
|
|
Description: "Cold-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.162:
|
|
Type: Log
|
|
Description: Cold-Migrate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.163:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.164:
|
|
Type: Log
|
|
Description: Cold-Migrate-Confirm inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.165:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm rejected for instance <instance_name> now enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.166:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.167:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.168:
|
|
Type: Log
|
|
Description: Cold-Migrate-Confirm complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.169:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.170:
|
|
Type: Log
|
|
Description: Cold-Migrate-Revert inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.171:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.172:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.173:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.174:
|
|
Type: Log
|
|
Description: Cold-Migrate-Revert complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.175:
|
|
Type: Log
|
|
Description: "Evacuate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.176:
|
|
Type: Log
|
|
Description: Evacuating instance <instance_name> owned by <tenant_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.177:
|
|
Type: Log
|
|
Description: "Evacuate rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.178:
|
|
Type: Log
|
|
Description: "Evacuate cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.179:
|
|
Type: Log
|
|
Description: "Evacuate failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.180:
|
|
Type: Log
|
|
Description: Evacuate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.181:
|
|
Type: Log
|
|
Description: "Reboot <(soft-reboot)|(hard-reboot)> issued <by <tenant_name>|by the system|by the instance> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.182:
|
|
Type: Log
|
|
Description: Reboot inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.183:
|
|
Type: Log
|
|
Description: "Reboot rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.184:
|
|
Type: Log
|
|
Description: "Reboot cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.185:
|
|
Type: Log
|
|
Description: "Reboot failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.186:
|
|
Type: Log
|
|
Description: Reboot complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.187:
|
|
Type: Log
|
|
Description: "Rebuild issued <by <tenant_name>|by the system> against instance <instance_name> using image <image_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.188:
|
|
Type: Log
|
|
Description: Rebuild inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.189:
|
|
Type: Log
|
|
Description: "Rebuild rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.190:
|
|
Type: Log
|
|
Description: "Rebuild cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.191:
|
|
Type: Log
|
|
Description: "Rebuild failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.192:
|
|
Type: Log
|
|
Description: Rebuild complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.193:
|
|
Type: Log
|
|
Description: "Resize issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.194:
|
|
Type: Log
|
|
Description: Resize inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.195:
|
|
Type: Log
|
|
Description: "Resize rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.196:
|
|
Type: Log
|
|
Description: "Resize cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.197:
|
|
Type: Log
|
|
Description: "Resize failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.198:
|
|
Type: Log
|
|
Description: Resize complete for instance <instance_name> enabled on host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.199:
|
|
Type: Log
|
|
Description: "Resize-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.200:
|
|
Type: Log
|
|
Description: Resize-Confirm inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.201:
|
|
Type: Log
|
|
Description: "Resize-Confirm rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.202:
|
|
Type: Log
|
|
Description: "Resize-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.203:
|
|
Type: Log
|
|
Description: "Resize-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.204:
|
|
Type: Log
|
|
Description: Resize-Confirm complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.205:
|
|
Type: Log
|
|
Description: "Resize-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.206:
|
|
Type: Log
|
|
Description: Resize-Revert inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.207:
|
|
Type: Log
|
|
Description: "Resize-Revert rejected for instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.208:
|
|
Type: Log
|
|
Description: "Resize-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.209:
|
|
Type: Log
|
|
Description: "Resize-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.210:
|
|
Type: Log
|
|
Description: Resize-Revert complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.211:
|
|
Type: Log
|
|
Description: Guest Heartbeat established for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.212:
|
|
Type: Log
|
|
Description: Guest Heartbeat disconnected for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.213:
|
|
Type: Log
|
|
Description: "Guest Heartbeat failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.214:
|
|
Type: Log
|
|
Description: Instance <instance_name> has been renamed to <new_instance_name> owned by <tenant_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.215:
|
|
Type: Log
|
|
Description: "Guest Health Check failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
700.216:
|
|
Type: Log
|
|
Description: "Entered Multi-Node Recovery Mode"
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
|
|
700.217:
|
|
Type: Log
|
|
Description: "Exited Multi-Node Recovery Mode"
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# STORAGE
|
|
#---------------------------------------------------------------------------
|
|
|
|
800.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
1 mons down, quorum 1,2 controller-1,storage-0
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: "If problem persists, contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
critical: True
|
|
major: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.010:
|
|
Type: Alarm
|
|
Description: |-
|
|
Potential data loss. No available OSDs in storage replication group.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
|
|
Severity: [critical]
|
|
Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
|
|
Check if OSDs of each storage host are up and running.
|
|
If problem persists contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
critical: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.011:
|
|
Type: Alarm
|
|
Description: |-
|
|
Loss of replication in peergroup.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
|
|
Severity: [major]
|
|
Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
|
|
Check if OSDs of each storage host are up and running.
|
|
If problem persists contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
major: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.002:
|
|
Type: Alarm
|
|
Description: ["Image storage media is full: There is not enough disk space on the image storage media.",
|
|
"Instance <instance name> snapshot failed: There is not enough disk space on the image storage media.",
|
|
"Supplied <attrs> (<supplied>) and <attrs> generated from uploaded image (<actual>) did not match. Setting image status to 'killed'.",
|
|
"Error in store configuration. Adding images to store is disabled.",
|
|
"Forbidden upload attempt: <exception>",
|
|
"Insufficient permissions on image storage media: <exception>",
|
|
"Denying attempt to upload image larger than <size> bytes.",
|
|
"Denying attempt to upload image because it exceeds the quota: <exception>",
|
|
"Received HTTP error while uploading image <image_id>",
|
|
"Client disconnected before sending all data to backend",
|
|
"Failed to upload image <image_id>"]
|
|
Entity_Instance_ID: ["image=<image-uuid>, instance=<instance-uuid>",
|
|
"tenant=<tenant-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>"]
|
|
Alarm_Type: [physical-violation,
|
|
physical-violation,
|
|
integrity-violation,
|
|
integrity-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
communication,
|
|
communication,
|
|
operational-violation]
|
|
Severity: warning
|
|
Proposed_Repair_Action:
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
800.003:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
Quota/Space mismatch for the <tiername> tier. The sum of Ceph pool quotas does not match the tier size.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>.tier=<tiername>
|
|
Severity: minor
|
|
Proposed_Repair_Action: "Update ceph storage pool quotas to use all available tier space."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: configuration-out-of-date
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
800.100:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
Cinder I/O Congestion is above normal range and is building
|
|
Entity_Instance_ID: cinder_io_monitor
|
|
Severity: major
|
|
Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Use
|
|
Cinder QoS mechanisms on high usage volumes."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: qos
|
|
Probable_Cause: congestion
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: none
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.101:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
Cinder I/O Congestion is high and impacting guest performance
|
|
Entity_Instance_ID: cinder_io_monitor
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend.
|
|
Cinder actions may fail until congestion is reduced.
|
|
Use Cinder QoS mechanisms on high usage volumes."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: qos
|
|
Probable_Cause: congestion
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.102:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
PV configuration <error/failed to apply> on <hostname>. Reason: <detailed reason>.
|
|
Entity_Instance_ID: pv=<pv_uuid>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: "Remove failed PV and associated Storage Device then recreate them."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: configuration-or-customization-error
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: major
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.103:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
[ Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold and automatic extension failed,
|
|
Metadata usage for LVM thin pool <VG name>/<Pool name> exceeded threshold ]; threshold x%, actual y%.
|
|
Entity_Instance_ID: <hostname>.lvmthinpool=<VG name>/<Pool name>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Increase Storage Space Allotment for Cinder on the 'lvm' backend.
|
|
Consult the System Administration Manual for more details.
|
|
If problem persists, contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: major
|
|
Degrade_Affecting_Severity: none
|
|
|
|
800.104:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
<storage-backend-name> configuration failed to apply on host: <host-uuid>.
|
|
Entity_Instance_ID: storage_backend=<storage-backend-name>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Update backend setting to reapply configuration.
|
|
Consult the System Administration Manual for more details.
|
|
If problem persists, contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: configuration-or-customization-error
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: major
|
|
Degrade_Affecting_Severity: none
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
# SOFTWARE
|
|
#---------------------------------------------------------------------------
|
|
|
|
900.001:
|
|
Type: Alarm
|
|
Description: Patching operation in progress.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: minor
|
|
Proposed_Repair_Action: Complete reboots of affected hosts.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.002:
|
|
Type: Alarm
|
|
Description: Obsolete patch in system.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: warning
|
|
Proposed_Repair_Action: Remove and delete obsolete patches.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.003:
|
|
Type: Alarm
|
|
Description: Patch host install failure.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Undo patching operation.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.004:
|
|
Type: Alarm
|
|
Description: Host version mismatch.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Reinstall host to update applied load.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.005:
|
|
Type: Alarm
|
|
Description: System Upgrade in progress.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: minor
|
|
Proposed_Repair_Action: No action required.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
Suppression: False
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.101:
|
|
Type: Alarm
|
|
Description: Software patch auto-apply inprogress
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: major
|
|
Proposed_Repair_Action: Wait for software patch auto-apply to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.102:
|
|
Type: Alarm
|
|
Description: Software patch auto-apply aborting
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: major
|
|
Proposed_Repair_Action: Wait for software patch auto-apply abort to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.103:
|
|
Type: Alarm
|
|
Description: Software patch auto-apply failed
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Proposed_Repair_Action: Attempt to apply software patches manually; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.111:
|
|
Type: Log
|
|
Description: Software patch auto-apply start
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.112:
|
|
Type: Log
|
|
Description: Software patch auto-apply inprogress
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.113:
|
|
Type: Log
|
|
Description: Software patch auto-apply rejected
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.114:
|
|
Type: Log
|
|
Description: Software patch auto-apply cancelled
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.115:
|
|
Type: Log
|
|
Description: Software patch auto-apply failed
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.116:
|
|
Type: Log
|
|
Description: Software patch auto-apply completed
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.117:
|
|
Type: Log
|
|
Description: Software patch auto-apply abort
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.118:
|
|
Type: Log
|
|
Description: Software patch auto-apply aborting
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.119:
|
|
Type: Log
|
|
Description: Software patch auto-apply abort rejected
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.120:
|
|
Type: Log
|
|
Description: Software patch auto-apply abort failed
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.121:
|
|
Type: Log
|
|
Description: Software patch auto-apply aborted
|
|
Entity_Instance_ID: orchestration=sw-patch
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.201:
|
|
Type: Alarm
|
|
Description: Software upgrade auto-apply inprogress
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: major
|
|
Proposed_Repair_Action: Wait for software upgrade auto-apply to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.202:
|
|
Type: Alarm
|
|
Description: Software upgrade auto-apply aborting
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: major
|
|
Proposed_Repair_Action: Wait for software upgrade auto-apply abort to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.203:
|
|
Type: Alarm
|
|
Description: Software upgrade auto-apply failed
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Proposed_Repair_Action: Attempt to apply software upgrade manually; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: True
|
|
Suppression: True
|
|
Management_Affecting_Severity: warning
|
|
Degrade_Affecting_Severity: none
|
|
|
|
900.211:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply start
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.212:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply inprogress
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.213:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply rejected
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.214:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply cancelled
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.215:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply failed
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.216:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply completed
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.217:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply abort
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.218:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply aborting
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.219:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply abort rejected
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.220:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply abort failed
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
|
|
900.221:
|
|
Type: Log
|
|
Description: Software upgrade auto-apply aborted
|
|
Entity_Instance_ID: orchestration=sw-upgrade
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: False
|
|
...
|