From b29fb32f60d251c36c5515c3ebd9b3143e80f532 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Mon, 29 Jul 2024 22:14:45 +0000 Subject: [PATCH] Clear 200.014 sensor=profile alarm over model relearn and deprovision The 200.014 Sensor Config sensor=profile alarm was does not get cleared over a Sensor Profile Relearn nor BMC Deprovision actions. This can then lead to a stuck alarm if the sensor read / groups create issue never resolves. Sensor alarms against a host must get deleted if the BMC for that host is deprovisioned. This update removes the long time obsolete sensor=sensors alarm references and adds a clear sensor config "profile" alarm to the 'sensor group profile relearn' and 'bmc deprovisioning' code paths. Test Plan: PASS: Verify sensor config profile alarm is deleted when PASS: - sensor model is relearned PASS: - bmc deprovisioned PASS: - sensor model is properly created (FIT tested) PASS: Verify raised 200.014 alarm persists over a hwmond restart Regression: PASS: Verify basic hardware monitoring and alarming PASS: Verify sensor deprovisioning PASS: Verify sensor model relearn operation PASS: Verify sensor alarming and clear function Closes-Bug: 2074760 Change-Id: I3165105e9e4e933ab7b723bd0b6241a6a2b046ae Signed-off-by: Eric MacDonald --- mtce/src/hwmon/hwmonClass.cpp | 14 ++++++++++---- mtce/src/hwmon/hwmonHdlr.cpp | 23 ++++++++--------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/mtce/src/hwmon/hwmonClass.cpp b/mtce/src/hwmon/hwmonClass.cpp index 7ccef7b5..cee6ac61 100644 --- a/mtce/src/hwmon/hwmonClass.cpp +++ b/mtce/src/hwmon/hwmonClass.cpp @@ -494,7 +494,7 @@ void hwmonHostClass::clear_bm_assertions ( struct hwmonHostClass::hwmon_host * h /* Bug Fix: This was outside the if bm_provisioned clause causing it * to be called even if the bmc was not already provisioned */ - hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "sensors", REASON_DEPROVISIONED ); + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_DEPROVISIONED ); } int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr, bool state ) @@ -672,10 +672,16 @@ int hwmonHostClass::mod_host ( node_inv_type & inv ) need_relearn = false ; } - if (( need_relearn == true ) && ( host_ptr->groups )) + if ( need_relearn == true ) { - ilog ("%s sensor model will be deleted and relearned", inv.name.c_str()); - bmc_learn_sensor_model (hostBase.get_uuid( inv.name )); + if ( host_ptr->alarmed_config == true ) + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_OK ); + + if ( host_ptr->groups ) + { + ilog ("%s sensor model will be deleted and relearned", inv.name.c_str()); + bmc_learn_sensor_model (hostBase.get_uuid( inv.name )); + } } } else diff --git a/mtce/src/hwmon/hwmonHdlr.cpp b/mtce/src/hwmon/hwmonHdlr.cpp index 957c394d..9e915c83 100644 --- a/mtce/src/hwmon/hwmonHdlr.cpp +++ b/mtce/src/hwmon/hwmonHdlr.cpp @@ -460,6 +460,10 @@ int hwmonHostClass::add_host_handler ( struct hwmonHostClass::hwmon_host * host_ case HWMON_ADD__DONE: { ilog ("%s add complete ; %d sensors %d groups\n", host_ptr->hostname.c_str(), host_ptr->sensors, host_ptr->groups ); + + if (( host_ptr->sensors ) && ( host_ptr->groups ) && ( host_ptr->alarmed_config == true )) + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_OK ); + break ; } default: @@ -2064,28 +2068,17 @@ bool hwmonHostClass::manage_startup_states ( struct hwmonHostClass::hwmon_host * int rc = PASS ; if ( host_ptr ) { - + string profile="profile"; std::list::iterator _iter_ptr ; std::list alarm_list ; alarm_list.clear(); - /********************** Manage Profile Alarms ***********************/ + /********************** Manage Profile Alarm ***********************/ - /* clear this config alarm as it is not used anymore - handles patchback case. - * Its cheaper to send a clear than it is to query for it first */ - hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "sensor", REASON_OK ); - -#ifdef WANT_QUERY_SENSOR_CONFIG_ALARM /* We don't degrade for sensor config error - this is similar to a * BMC access error in mtcAgent where we only raise a minor alarm */ - if ( hwmon_alarm_query ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile" ) != FM_ALARM_SEVERITY_CLEAR ) - host_ptr->alarmed_config = true ; -#endif - if ( host_ptr->alarmed_config == false ) - { - hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_OK ); - host_ptr->alarmed_config = false ; - } + if ( hwmon_alarm_query ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, profile ) != FM_ALARM_SEVERITY_CLEAR ) + host_ptr->alarmed_config = true ; /********************** Manage Group Alarms ***********************/ string entity = "host=" + host_ptr->hostname + ".sensorgroup=" ;