diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp index 7ba6197d..1d067210 100755 --- a/mtce-common/src/common/nodeBase.cpp +++ b/mtce-common/src/common/nodeBase.cpp @@ -326,19 +326,19 @@ void print_mtc_message ( string hostname, } /* Graceful recovery stages strings and string get'er */ -static std::string recoveryStages_str [MTC_RECOVERY__STAGES +1] ; -static std::string disableStages_str [MTC_DISABLE__STAGES +1] ; -static std::string enableStages_str [MTC_ENABLE__STAGES +1] ; -static std::string sensorStages_str [MTC_SENSOR__STAGES +1] ; -static std::string powerStages_str [MTC_POWER__STAGES +1] ; -static std::string powercycleStages_str [MTC_POWERCYCLE__STAGES +1] ; +static std::string recoveryStages_str [MTC_RECOVERY__STAGES +1] ; +static std::string disableStages_str [MTC_DISABLE__STAGES +1] ; +static std::string enableStages_str [MTC_ENABLE__STAGES +1] ; +static std::string sensorStages_str [MTC_SENSOR__STAGES +1] ; +static std::string powerStages_str [MTC_POWER__STAGES +1] ; +static std::string powercycleStages_str [MTC_POWERCYCLE__STAGES +1] ; static std::string resetStages_str [MTC_RESET__STAGES +1] ; static std::string reinstallStages_str [MTC_REINSTALL__STAGES +1] ; -static std::string oosTestStages_str [MTC_OOS_TEST__STAGES +1] ; -static std::string insvTestStages_str [MTC_INSV_TEST__STAGES +1] ; -static std::string configStages_str [MTC_CONFIG__STAGES +1] ; -static std::string addStages_str [MTC_ADD__STAGES +1] ; -static std::string delStages_str [MTC_DEL__STAGES +1] ; +static std::string oosTestStages_str [MTC_OOS_TEST__STAGES +1] ; +static std::string insvTestStages_str [MTC_INSV_TEST__STAGES +1] ; +static std::string configStages_str [MTC_CONFIG__STAGES +1] ; +static std::string addStages_str [MTC_ADD__STAGES +1] ; +static std::string delStages_str [MTC_DEL__STAGES +1] ; static std::string subStages_str [MTC_SUBSTAGE__STAGES +1] ; void mtc_stages_init ( void ) @@ -461,12 +461,16 @@ void mtc_stages_init ( void ) reinstallStages_str [MTC_REINSTALL__START ] = "Reinstall-Start"; reinstallStages_str [MTC_REINSTALL__START_WAIT ] = "Reinstall-Start-Wait"; + reinstallStages_str [MTC_REINSTALL__POWERQRY ] = "Reinstall-Power-State-Query"; + reinstallStages_str [MTC_REINSTALL__POWERQRY_WAIT ] = "Reinstall-Power-State-Query-Wait"; reinstallStages_str [MTC_REINSTALL__RESTART ] = "Reinstall-ReStart"; reinstallStages_str [MTC_REINSTALL__RESTART_WAIT ] = "Reinstall-ReStart-Wait"; - reinstallStages_str [MTC_REINSTALL__POWERON ] = "Reinstall-PowerOn"; - reinstallStages_str [MTC_REINSTALL__POWERON_WAIT ] = "Reinstall-PowerOn-Wait"; + reinstallStages_str [MTC_REINSTALL__POWEROFF ] = "Reinstall-PowerOff"; + reinstallStages_str [MTC_REINSTALL__POWEROFF_WAIT ] = "Reinstall-PowerOff-Wait"; reinstallStages_str [MTC_REINSTALL__NETBOOT ] = "Reinstall-Netboot"; reinstallStages_str [MTC_REINSTALL__NETBOOT_WAIT ] = "Reinstall-Netboot-Wait"; + reinstallStages_str [MTC_REINSTALL__POWERON ] = "Reinstall-PowerOn"; + reinstallStages_str [MTC_REINSTALL__POWERON_WAIT ] = "Reinstall-PowerOn-Wait"; reinstallStages_str [MTC_REINSTALL__RESET ] = "Reinstall-Reset"; reinstallStages_str [MTC_REINSTALL__RESET_WAIT ] = "Reinstall-Reset-Wait"; reinstallStages_str [MTC_REINSTALL__WIPEDISK ] = "Reinstall-Wipedisk"; diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index d9d5b409..36f62b0a 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -245,9 +245,11 @@ typedef enum #define MTC_TASK_REINSTALL_FAIL_OL "Reinstall Failed ; timeout waiting for offline" #define MTC_TASK_REINSTALL_FAIL_TO "Reinstall Failed ; timeout waiting for online" #define MTC_TASK_REINSTALL_FAIL_BA "Reinstall Failed ; timeout waiting BMC access" -#define MTC_TASK_REINSTALL_FAIL_PO "Reinstall Failed ; could not power on host" +#define MTC_TASK_REINSTALL_FAIL_PO "Reinstall Failed ; could not power off host" +#define MTC_TASK_REINSTALL_FAIL_PU "Reinstall Failed ; could not power on host" #define MTC_TASK_REINSTALL_FAIL_NB "Reinstall Failed ; netboot request" #define MTC_TASK_REINSTALL_FAIL_PR "Reinstall Failed ; power reset request" +#define MTC_TASK_REINSTALL_FAIL_PQ "Reinstall Failed ; could not query power state" #define MTC_TASK_REINSTALL_FAIL "Reinstall Failed" #define MTC_TASK_REINSTALL_SUCCESS "Reinstall Succeeded" @@ -1046,7 +1048,7 @@ typedef enum MTC_RESETPROG__REBOOT, MTC_RESETPROG__WAIT, MTC_RESETPROG__FAIL, - MTC_RESETPROG__STAGES + MTC_RESETPROG__STAGES } mtc_resetProgStages_enum ; /** Return the string representing the specified 'reset' stage */ @@ -1059,10 +1061,14 @@ typedef enum MTC_REINSTALL__START_WAIT, MTC_REINSTALL__RESTART, MTC_REINSTALL__RESTART_WAIT, - MTC_REINSTALL__POWERON, - MTC_REINSTALL__POWERON_WAIT, + MTC_REINSTALL__POWERQRY, + MTC_REINSTALL__POWERQRY_WAIT, + MTC_REINSTALL__POWEROFF, + MTC_REINSTALL__POWEROFF_WAIT, MTC_REINSTALL__NETBOOT, MTC_REINSTALL__NETBOOT_WAIT, + MTC_REINSTALL__POWERON, + MTC_REINSTALL__POWERON_WAIT, MTC_REINSTALL__RESET, MTC_REINSTALL__RESET_WAIT, MTC_REINSTALL__WIPEDISK, diff --git a/mtce/centos/build_srpm.data b/mtce/centos/build_srpm.data index 5bc6d613..675f1683 100644 --- a/mtce/centos/build_srpm.data +++ b/mtce/centos/build_srpm.data @@ -1,3 +1,3 @@ SRC_DIR="src" -TIS_PATCH_VER=157 +TIS_PATCH_VER=158 BUILD_IS_SLOW=5 diff --git a/mtce/src/maintenance/mtcBmcUtil.cpp b/mtce/src/maintenance/mtcBmcUtil.cpp index 7ad40b8c..9e97ff28 100644 --- a/mtce/src/maintenance/mtcBmcUtil.cpp +++ b/mtce/src/maintenance/mtcBmcUtil.cpp @@ -193,7 +193,8 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) { /* handle the redfishtool root query as a special case because * it is likely to fail and we don't want un-necessary error logs */ - if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) && + if ((( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) || + ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO )) && (( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE ))) { blog ("%s bmc redfish %s failed", @@ -201,14 +202,6 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) bmcUtil_getCmd_str( node_ptr->bmc_thread_info.command).c_str()); } - else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO ) && - (( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE ))) - { - wlog ("%s bmc redfish %s failed", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str( - node_ptr->bmc_thread_info.command).c_str()); - } else { elog ("%s bmc redfish %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n", @@ -220,6 +213,7 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) node_ptr->bmc_thread_info.status, node_ptr->bmc_thread_info.status_string.c_str()); } + goto bmc_command_recv_cleanup; } else { @@ -382,6 +376,8 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) } } +bmc_command_recv_cleanup: + if ( rc != RETRY ) { node_ptr->bmc_thread_ctrl.done = true ; diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index 16951d11..c4dd5164 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -4036,14 +4036,15 @@ int nodeLinkClass::reset_handler ( struct nodeLinkClass::node * node_ptr ) * Description: This FSM handles node (re)install with and without * a provisioned Board Management Controller (BMC). * - * BMC provisioned case: using IPMI commands to BMC ... + * BMC provisioned case: board management commands to BMC ... * - * - ensure host power is on + * - power off host * - force network boot on next reset - * - issue node reset + * - power on host * - * BMC not provisioned case: using mtce messaging to node ... + * BMC not provisioned case: mtce messaging to node ... * + * - host must be online * - send mtcClient wipedisk command * fail reinstall if no ACK * - send mtcClient reboot command @@ -4120,17 +4121,9 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr ) mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_TIMEOUT_BMC_ACC ); reinstallStageChange ( node_ptr, MTC_REINSTALL__START_WAIT ); } - else if ( node_ptr->power_on == false ) - { - /* need to power on node */ - wlog ("%s Reinstall power-on required", node_ptr->hostname.c_str()); - reinstallStageChange ( node_ptr, MTC_REINSTALL__POWERON ); - } else { - /* power is on so issue net boot command */ - ilog ("%s Reinstall power is on", node_ptr->hostname.c_str()); - reinstallStageChange ( node_ptr , MTC_REINSTALL__NETBOOT ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__POWERQRY ); } } else @@ -4211,18 +4204,107 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr ) } break ; } - case MTC_REINSTALL__POWERON: + case MTC_REINSTALL__POWERQRY: { - powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND ); - reinstallStageChange ( node_ptr , MTC_REINSTALL__POWERON_WAIT ); + if ( node_ptr->bmc_thread_ctrl.done ) + { + /* Query Host Power Status */ + if ( bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_STATUS ) != PASS ) + { + elog ("%s '%s' send failed\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str()); + pingUtil_restart ( node_ptr->bm_ping_info ); + } + else + { + reinstallStageChange ( node_ptr , MTC_REINSTALL__POWERQRY_WAIT ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + } + else + { + thread_kill ( node_ptr->bmc_thread_ctrl , node_ptr->bmc_thread_info ) ; + } break ; } - case MTC_REINSTALL__POWERON_WAIT: + case MTC_REINSTALL__POWERQRY_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + int rc = bmc_command_recv ( node_ptr ) ; + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + else if ( rc != PASS ) + { + wlog ("%s '%s' failed receive (rc:%d)", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str(), + rc ); + } + else if ( node_ptr->bmc_thread_info.data.empty() ) + { + wlog ("%s '%s' request yielded no response data", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str()); + } + else + { + int rc = + bmcUtil_is_power_on ( node_ptr->hostname, + node_ptr->bmc_protocol, + node_ptr->bmc_thread_info.data, + node_ptr->power_on); + if ( rc == PASS ) + { + if ( node_ptr->power_on == true ) + { + ilog ("%s Reinstall power-off required", + node_ptr->hostname.c_str()); + reinstallStageChange ( node_ptr , MTC_REINSTALL__POWEROFF ); + } + else + { + ilog ("%s Reinstall power-off already", + node_ptr->hostname.c_str()); + reinstallStageChange ( node_ptr , MTC_REINSTALL__NETBOOT ); + } + break ; + } + else + { + elog ("%s Reinstall power query failed (rc:%d)", + node_ptr->hostname.c_str(), rc ); + } + } + mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_PQ ); + reinstallStageChange ( node_ptr, MTC_REINSTALL__FAIL ); + } + else + { + ; /* wait longer */ + } + break ; + } + + case MTC_REINSTALL__POWEROFF: + { + powerStageChange ( node_ptr , MTC_POWEROFF__REQ_SEND ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__POWEROFF_WAIT ); + break ; + } + case MTC_REINSTALL__POWEROFF_WAIT: { /* The power handler manages timeout */ if ( node_ptr->powerStage == MTC_POWER__DONE ) { - if ( node_ptr->power_on == true ) + if ( node_ptr->power_on == false ) { if ( node_ptr->task != MTC_TASK_REINSTALL ) mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL ); @@ -4276,7 +4358,7 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr ) if ( rc == PASS ) { ilog ("%s Reinstall netboot request completed", node_ptr->hostname.c_str()); - reinstallStageChange ( node_ptr, MTC_REINSTALL__RESET); + reinstallStageChange ( node_ptr, MTC_REINSTALL__POWERON); } else if ( rc == RETRY ) { @@ -4293,6 +4375,41 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr ) } break ; } + case MTC_REINSTALL__POWERON: + { + powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__POWERON_WAIT ); + break ; + } + case MTC_REINSTALL__POWERON_WAIT: + { + /* The power handler manages timeout */ + if ( node_ptr->powerStage == MTC_POWER__DONE ) + { + if ( node_ptr->power_on == true ) + { + if ( node_ptr->task != MTC_TASK_REINSTALL ) + mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL ); + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_5 ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__OFFLINE_WAIT ); + } + else + { + elog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_FAIL_PU); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_PU ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL ); + } + } + else + { + /* run the power handler till the host's power is on or + * the power-on handler times out */ + power_handler ( node_ptr ); + } + break ; + } case MTC_REINSTALL__RESET: { int rc = bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_RESET ); @@ -4736,7 +4853,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) else { - rc = bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_OFF ); + rc = bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_OFF ); if ( rc ) { wlog ("%s Power-Off request failed (%d)\n", node_ptr->hostname.c_str(), rc ); @@ -4744,7 +4861,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) } else { - blog ("%s Power-Off requested\n", node_ptr->hostname.c_str()); + ilog ("%s Power-Off requested\n", node_ptr->hostname.c_str()); powerStageChange ( node_ptr , MTC_POWEROFF__RESP_WAIT ); } mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); @@ -4756,14 +4873,13 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) { if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) { - rc = bmc_command_recv ( node_ptr ); - if ( rc == RETRY ) - { - mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); - break ; - } - - if ( rc ) + rc = bmc_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + else if ( rc ) { elog ("%s Power-Off command failed\n", node_ptr->hostname.c_str()); mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); @@ -4772,10 +4888,12 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) else { ilog ("%s is Powering Off\n", node_ptr->hostname.c_str() ); - mtcInvApi_update_task ( node_ptr, "Powering Off" ); + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__REINSTALL ) + { + mtcInvApi_update_task ( node_ptr, "Powering Off" ); + } mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); powerStageChange ( node_ptr , MTC_POWEROFF__DONE ); - node_ptr->power_on = false ; } } break ; @@ -4822,6 +4940,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) availStatusChange ( node_ptr, MTC_AVAIL_STATUS__POWERED_OFF ); powerStageChange ( node_ptr , MTC_POWER__DONE ); + node_ptr->power_on = false ; } break ; } @@ -5021,10 +5140,12 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) else { ilog ("%s is Powering On\n", node_ptr->hostname.c_str() ); - mtcInvApi_update_task ( node_ptr, "Powering On" ); + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__REINSTALL ) + { + mtcInvApi_update_task ( node_ptr, "Powering On" ); + } mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); powerStageChange ( node_ptr , MTC_POWERON__DONE ); - node_ptr->power_on = true ; } } break ; @@ -5067,6 +5188,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); powerStageChange ( node_ptr , MTC_POWER__DONE ); + node_ptr->power_on = true ; } break ; } @@ -5083,7 +5205,10 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) ar_enable ( node_ptr ); - mtcInvApi_force_task ( node_ptr, "" ); + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__REINSTALL ) + { + mtcInvApi_force_task ( node_ptr, "" ); + } break ; } }