From 3b341c6e5ef20d12bfe3044e5232b533bc42aecf Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Mon, 16 Nov 2020 12:16:02 -0500 Subject: [PATCH] Add collectd memory plugin entity IDs to fm-doc This update adds the following collectd memory plugin instance based alarm entity IDs to fm-doc's events.yaml file. host=.memory=platform host=.memory=total host=.numa=node It also removes the obsoleted MINOR threshold level from the cpu, memory and filesystem alarm definitions. Change-Id: I259ba5c84ff90c3e1acd82fc7e72ba63a2fab50a Partial-Bug: 1903731 Depends-On: https://review.opendev.org/c/starlingx/monitoring/+/764388 Signed-off-by: Eric MacDonald --- fm-doc/fm_doc/events.yaml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fm-doc/fm_doc/events.yaml b/fm-doc/fm_doc/events.yaml index 0cf01cc1..f1e6c62f 100755 --- a/fm-doc/fm_doc/events.yaml +++ b/fm-doc/fm_doc/events.yaml @@ -93,9 +93,8 @@ Platform CPU threshold exceeded; threshold x%, actual y% . CRITICAL @ 95% MAJOR @ 90% - MINOR @ 80% Entity_Instance_ID: host= - Severity: [critical, major, minor] + Severity: [critical, major] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade @@ -135,9 +134,15 @@ Memory threshold exceeded; threshold x%, actual y% . CRITICAL @ 90% MAJOR @ 80% - MINOR @ 70% - Entity_Instance_ID: host= - Severity: [critical, major, minor] + Entity_Instance_ID: |- + host= + OR + host=.memory=total + OR + host=.memory=platform + OR + host=.numa=node + Severity: [critical, major] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host." Maintenance_Action: critical: degrade @@ -157,7 +162,6 @@ File System threshold exceeded; threshold x%, actual y% . CRITICAL @ 90% MAJOR @ 80% - MINOR @ 70% OR host=.volumegroup= Monitor and if condition persists, consider adding additional physical volumes to the volume group. @@ -165,7 +169,7 @@ host=.filesystem= OR host=.volumegroup= - Severity: [critical, major, minor] + Severity: [critical, major] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade