Merge "Add network boot support to mtce reinstall handling"
This commit is contained in:
commit
dd9982a902
@ -43,6 +43,9 @@
|
||||
|
||||
#define IPMITOOL_RESTART_CAUSE_CMD ((const char *)("chassis restart_cause"))
|
||||
|
||||
#define IPMITOOL_BOOTDEV_PXE_CMD ((const char *)("chassis bootdev pxe"))
|
||||
#define IPMITOOL_BOOTDEV_PXE_RESP ((const char *)("Set Boot Device to pxe"))
|
||||
|
||||
#define IPMITOOL_MC_INFO_CMD ((const char *)("mc info"))
|
||||
|
||||
#define IPMITOOL_CMD_FILE_SUFFIX ((const char *)("_power_cmd_result"))
|
||||
@ -65,6 +68,7 @@ typedef enum
|
||||
IPMITOOL_THREAD_CMD__MC_INFO,
|
||||
IPMITOOL_THREAD_CMD__POWER_STATUS,
|
||||
IPMITOOL_THREAD_CMD__RESTART_CAUSE,
|
||||
IPMITOOL_THREAD_CMD__BOOTDEV_PXE,
|
||||
|
||||
IPMITOOL_THREAD_CMD__READ_SENSORS,
|
||||
|
||||
|
@ -325,7 +325,7 @@ static std::string sensorStages_str [MTC_SENSOR__STAGES +1] ;
|
||||
static std::string powerStages_str [MTC_POWER__STAGES +1] ;
|
||||
static std::string powercycleStages_str [MTC_POWERCYCLE__STAGES +1] ;
|
||||
static std::string resetStages_str [MTC_RESET__STAGES +1] ;
|
||||
static std::string reinstallStages_str [MTC_RESET__STAGES +1] ;
|
||||
static std::string reinstallStages_str [MTC_REINSTALL__STAGES +1] ;
|
||||
static std::string oosTestStages_str [MTC_OOS_TEST__STAGES +1] ;
|
||||
static std::string insvTestStages_str [MTC_INSV_TEST__STAGES +1] ;
|
||||
static std::string configStages_str [MTC_CONFIG__STAGES +1] ;
|
||||
@ -451,7 +451,17 @@ void mtc_stages_init ( void )
|
||||
resetStages_str [MTC_RESET__STAGES ] = "Reset-Unknown";
|
||||
|
||||
reinstallStages_str [MTC_REINSTALL__START ] = "Reinstall-Start";
|
||||
reinstallStages_str [MTC_REINSTALL__RESP_WAIT ] = "Reinstall-Response-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__START_WAIT ] = "Reinstall-Start-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__RESTART ] = "Reinstall-ReStart";
|
||||
reinstallStages_str [MTC_REINSTALL__RESTART_WAIT ] = "Reinstall-ReStart-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__POWERON ] = "Reinstall-PowerOn";
|
||||
reinstallStages_str [MTC_REINSTALL__POWERON_WAIT ] = "Reinstall-PowerOn-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__NETBOOT ] = "Reinstall-Netboot";
|
||||
reinstallStages_str [MTC_REINSTALL__NETBOOT_WAIT ] = "Reinstall-Netboot-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__RESET ] = "Reinstall-Reset";
|
||||
reinstallStages_str [MTC_REINSTALL__RESET_WAIT ] = "Reinstall-Reset-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__WIPEDISK ] = "Reinstall-Wipedisk";
|
||||
reinstallStages_str [MTC_REINSTALL__WIPEDISK_WAIT ] = "Reinstall-Wipedisk-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__OFFLINE_WAIT ] = "Reinstall-Offline-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__ONLINE_WAIT ] = "Reinstall-Online-Wait";
|
||||
reinstallStages_str [MTC_REINSTALL__FAIL ] = "Reinstall-Failure";
|
||||
|
@ -220,7 +220,17 @@ void daemon_exit ( void );
|
||||
#define MTC_TASK_REBOOT_FAIL_RETRY "Reboot Failed, retrying (%d of %d)"
|
||||
#define MTC_TASK_REBOOT_ABORT "Reboot Failed, try again when host is 'online'"
|
||||
#define MTC_TASK_RESET_PROG "Rebooting/Resetting Host"
|
||||
#define MTC_TASK_REINSTALL "Reinstalling Host"
|
||||
#define MTC_TASK_REINSTALL "Reinstalling"
|
||||
#define MTC_TASK_REINSTALL_WAIT_NA "Reinstall Wait ; BMC not accessible"
|
||||
#define MTC_TASK_REINSTALL_RTRY_PC "Reinstall Retry ; BMC provisioned change during install"
|
||||
#define MTC_TASK_REINSTALL_FAIL_CL "Reinstall Failed ; BMC connectivity lost"
|
||||
#define MTC_TASK_REINSTALL_FAIL_OL "Reinstall Failed ; timeout waiting for offline"
|
||||
#define MTC_TASK_REINSTALL_FAIL_TO "Reinstall Failed ; timeout waiting for online"
|
||||
#define MTC_TASK_REINSTALL_FAIL_BA "Reinstall Failed ; timeout waiting BMC access"
|
||||
#define MTC_TASK_REINSTALL_FAIL_PO "Reinstall Failed ; could not power on host"
|
||||
#define MTC_TASK_REINSTALL_FAIL_NB "Reinstall Failed ; netboot request"
|
||||
#define MTC_TASK_REINSTALL_FAIL_PR "Reinstall Failed ; power reset request"
|
||||
|
||||
#define MTC_TASK_REINSTALL_FAIL "Reinstall Failed"
|
||||
#define MTC_TASK_REINSTALL_SUCCESS "Reinstall Succeeded"
|
||||
#define MTC_TASK_BOOTING "Booting"
|
||||
@ -1008,7 +1018,17 @@ string get_resetStages_str ( mtc_resetStages_enum stage );
|
||||
typedef enum
|
||||
{
|
||||
MTC_REINSTALL__START = 0,
|
||||
MTC_REINSTALL__RESP_WAIT,
|
||||
MTC_REINSTALL__START_WAIT,
|
||||
MTC_REINSTALL__RESTART,
|
||||
MTC_REINSTALL__RESTART_WAIT,
|
||||
MTC_REINSTALL__POWERON,
|
||||
MTC_REINSTALL__POWERON_WAIT,
|
||||
MTC_REINSTALL__NETBOOT,
|
||||
MTC_REINSTALL__NETBOOT_WAIT,
|
||||
MTC_REINSTALL__RESET,
|
||||
MTC_REINSTALL__RESET_WAIT,
|
||||
MTC_REINSTALL__WIPEDISK,
|
||||
MTC_REINSTALL__WIPEDISK_WAIT,
|
||||
MTC_REINSTALL__OFFLINE_WAIT,
|
||||
MTC_REINSTALL__ONLINE_WAIT,
|
||||
MTC_REINSTALL__FAIL,
|
||||
|
@ -234,7 +234,7 @@ int _timer_stop ( struct mtc_timer * mtcTimer_ptr , bool int_safe)
|
||||
}
|
||||
else if ( int_safe == false )
|
||||
{
|
||||
elog ("%s (%s) called with null TID (count:%d)\n",
|
||||
wlog ("%s (%s) called with null TID (count:%d)\n",
|
||||
mtcTimer_ptr->hostname.c_str(),
|
||||
mtcTimer_ptr->service.c_str(),
|
||||
timer_count);
|
||||
|
@ -64,7 +64,7 @@
|
||||
#define MTC_ALIVE_TIMER (5)
|
||||
#define MTC_POWEROFF_DELAY (5)
|
||||
#define MTC_SWACT_POLL_TIMER (10)
|
||||
#define MTC_TASK_UPDATE_DELAY (10)
|
||||
#define MTC_TASK_UPDATE_DELAY (30)
|
||||
#define MTC_BM_PING_TIMEOUT (30)
|
||||
#define MTC_BM_POWEROFF_TIMEOUT (30)
|
||||
#define MTC_BM_POWERON_TIMEOUT (30)
|
||||
@ -80,6 +80,7 @@
|
||||
#define MTC_POWERCYCLE_BACK2BACK_DELAY (MTC_MINS_5)
|
||||
#define MTC_HEARTBEAT_SOAK_BEFORE_ENABLE (11)
|
||||
#define MTC_REINSTALL_TIMEOUT_DEFAULT (MTC_MINS_40)
|
||||
#define MTC_REINSTALL_TIMEOUT_BMC_ACC (MTC_MINS_10)
|
||||
#define MTC_REINSTALL_TIMEOUT_MIN (MTC_MINS_1)
|
||||
#define MTC_REINSTALL_TIMEOUT_MAX (MTC_HRS_4)
|
||||
#define MTC_REINSTALL_WAIT_TIMER (10)
|
||||
|
@ -2181,6 +2181,25 @@ int nodeLinkClass::mod_host ( node_inv_type & inv )
|
||||
else if ( !inv.action.compare ( "reinstall" ) )
|
||||
{
|
||||
plog ("%s Reinstall Action\n", node_ptr->hostname.c_str());
|
||||
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__REINSTALL )
|
||||
{
|
||||
/* Allow user to restart the re-install if
|
||||
* - its in progress,
|
||||
* - there is a BMC provisioned and
|
||||
* - are waiting while the actual install is in progress */
|
||||
if (( node_ptr->bm_provisioned == true ) &&
|
||||
( node_ptr->reinstallStage == MTC_REINSTALL__ONLINE_WAIT))
|
||||
{
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__START );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Otherwise allow the current install to continue
|
||||
* remind the user that there is a reinstall
|
||||
* in progress */
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL);
|
||||
}
|
||||
}
|
||||
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__REINSTALL );
|
||||
|
||||
/* generate command=reinstall log */
|
||||
|
@ -130,6 +130,11 @@ int nodeLinkClass::ipmi_command_send ( struct nodeLinkClass::node * node_ptr, in
|
||||
{
|
||||
want_fit = true ;
|
||||
}
|
||||
else if (( command == IPMITOOL_THREAD_CMD__BOOTDEV_PXE ) &&
|
||||
( daemon_want_fit ( fit, node_ptr->hostname, "netboot_pxe" ) == true ))
|
||||
{
|
||||
want_fit = true ;
|
||||
}
|
||||
|
||||
if ( want_fit == true )
|
||||
{
|
||||
|
@ -1615,31 +1615,6 @@ extern int mtcJsonInv_testhead ( void );
|
||||
int daemon_run_testhead ( void )
|
||||
{
|
||||
int rc = PASS;
|
||||
|
||||
mtc_config.testmode = true ;
|
||||
|
||||
nodeLinkClass * mtcInv_testhead_ptr = new nodeLinkClass ;
|
||||
|
||||
printf ("\n\n");
|
||||
printf (TESTHEAD_BAR);
|
||||
|
||||
printf ("| Node Class Test Head - Private and Public Member Functions\n");
|
||||
printf (TESTHEAD_BAR);
|
||||
for ( int i = 0 ; i < 11 ; i++ )
|
||||
{
|
||||
if ( mtcInv_testhead_ptr->testhead ( i+1 ) )
|
||||
{
|
||||
FAILED_STR ;
|
||||
rc = FAIL ;
|
||||
}
|
||||
else
|
||||
PASSED ;
|
||||
}
|
||||
free(mtcInv_testhead_ptr);
|
||||
|
||||
printf (TESTHEAD_BAR);
|
||||
printf ("| Maintenance Timer Test Head\n");
|
||||
printf (TESTHEAD_BAR);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
|
@ -294,37 +294,16 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
flog ("%s -> OOS Action Check\n", node_ptr->hostname.c_str());
|
||||
|
||||
/* TEMPORARY: To allow reset of unlocked host for fault insertion. */
|
||||
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET )
|
||||
{
|
||||
wlog ("%s Allowing Reset of unlocked host for FIT\n", node_ptr->hostname.c_str());
|
||||
elog ("%s Administrative '%s' Operation Rejected\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
get_adminAction_str (node_ptr->adminAction) );
|
||||
|
||||
if ( node_ptr->hostname.compare(nodeLinkClass::my_hostname))
|
||||
{
|
||||
nodeLinkClass::reset_handler ( node_ptr );
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s Cowardly avoiding reset of self\n", node_ptr->hostname.c_str());
|
||||
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE );
|
||||
elog ("%s Cannot perform out-of-service action against in-service host\n",
|
||||
node_ptr->hostname.c_str());
|
||||
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE );
|
||||
|
||||
/* Clear the UI task since we are not really resetting */
|
||||
mtcInvApi_update_task ( node_ptr, "" );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s Administrative '%s' Operation Rejected\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
get_adminAction_str (node_ptr->adminAction) );
|
||||
|
||||
elog ("%s Cannot perform out-of-service action against in-service host\n",
|
||||
node_ptr->hostname.c_str());
|
||||
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE );
|
||||
|
||||
/* Clear the UI task since we are not really resetting */
|
||||
mtcInvApi_update_task ( node_ptr, "" );
|
||||
}
|
||||
/* Clear the UI task since we are not really taking this action */
|
||||
mtcInvApi_update_task ( node_ptr, "" );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
@ -1971,12 +1971,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ))
|
||||
{
|
||||
rc = ipmi_command_recv ( node_ptr );
|
||||
if ( rc == RETRY )
|
||||
{
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT );
|
||||
break ;
|
||||
}
|
||||
rc = ipmi_command_recv ( node_ptr );
|
||||
if ( rc == RETRY )
|
||||
{
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT );
|
||||
break ;
|
||||
}
|
||||
|
||||
if ( rc )
|
||||
{
|
||||
@ -4011,63 +4011,364 @@ int nodeLinkClass::reset_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
return (PASS);
|
||||
}
|
||||
|
||||
/* Reinstall handler
|
||||
* --------------
|
||||
/****************************************************************************
|
||||
*
|
||||
* Name : reinstall_handler
|
||||
*
|
||||
* Purpose : Perform actions that result in a network boot so that a new
|
||||
* image is installed on the specified node's boot partition.
|
||||
*
|
||||
* Description: This FSM handles node (re)install with and without
|
||||
* a provisioned Board Management Controller (BMC).
|
||||
*
|
||||
* BMC provisioned case: using IPMI commands to BMC ...
|
||||
*
|
||||
* - ensure host power is on
|
||||
* - force network boot on next reset
|
||||
* - issue node reset
|
||||
*
|
||||
* BMC not provisioned case: using mtce messaging to node ...
|
||||
*
|
||||
* - send mtcClient wipedisk command
|
||||
* fail reinstall if no ACK
|
||||
* - send mtcClient reboot command
|
||||
*
|
||||
* Both casess:
|
||||
*
|
||||
* - wait for offline
|
||||
* - wait for online
|
||||
* - install complete
|
||||
*
|
||||
* Failure Handling:
|
||||
*
|
||||
* BMC provisioned cases:
|
||||
*
|
||||
* BMC won't power on
|
||||
* BMC ipmi command failure
|
||||
* BMC connectivity lost mid-FSM.
|
||||
* BMC access timeout
|
||||
*
|
||||
* BMC not provisioned cases:
|
||||
*
|
||||
* no wipedisk ACK\
|
||||
*
|
||||
* failure to go offline after resaet/reboot
|
||||
* timeout waiting for online after reset/reboot
|
||||
*
|
||||
* Manage reinstall operations for a locked-disabled host */
|
||||
int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
/* Handle 'lost BMC connectivity during the install' case */
|
||||
if (( node_ptr->bm_provisioned == true ) &&
|
||||
( node_ptr->bm_accessible == false ))
|
||||
{
|
||||
if (( node_ptr->reinstallStage != MTC_REINSTALL__START ) &&
|
||||
( node_ptr->reinstallStage != MTC_REINSTALL__START_WAIT ) &&
|
||||
( node_ptr->reinstallStage != MTC_REINSTALL__FAIL ) &&
|
||||
( node_ptr->reinstallStage != MTC_REINSTALL__MSG_DISPLAY ) &&
|
||||
( node_ptr->reinstallStage != MTC_REINSTALL__DONE ))
|
||||
{
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
|
||||
elog ("%s Reinstall lost bmc connection",
|
||||
node_ptr->hostname.c_str());
|
||||
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_CL );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
|
||||
/* fall into switch to ...
|
||||
* - handle failure
|
||||
* - finish the FSM
|
||||
*/
|
||||
}
|
||||
switch ( node_ptr->reinstallStage )
|
||||
{
|
||||
case MTC_REINSTALL__START:
|
||||
{
|
||||
int host_reinstall_wait_timer = node_ptr->mtcalive_timeout + node_reinstall_timeout ;
|
||||
node_ptr->retries = host_reinstall_wait_timer / MTC_REINSTALL_WAIT_TIMER ;
|
||||
LOAD_NODETYPE_TIMERS ;
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL );
|
||||
node_ptr->retries = ( node_ptr->mtcalive_timeout +
|
||||
this->node_reinstall_timeout) /
|
||||
MTC_REINSTALL_WAIT_TIMER ;
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
if ( node_ptr->bm_provisioned == true )
|
||||
{
|
||||
if ( node_ptr->bm_accessible == false )
|
||||
{
|
||||
/* Handle 'lost BMC connectivity during the install' case */
|
||||
wlog ("%s Reinstall wait for BMC access ; %d second timeout",
|
||||
node_ptr->hostname.c_str(),
|
||||
MTC_REINSTALL_TIMEOUT_BMC_ACC);
|
||||
|
||||
start_offline_handler ( node_ptr );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_WAIT_NA );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_TIMEOUT_BMC_ACC );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__START_WAIT );
|
||||
}
|
||||
else if ( node_ptr->power_on == false )
|
||||
{
|
||||
/* need to power on node */
|
||||
wlog ("%s Reinstall power-on required", node_ptr->hostname.c_str());
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__POWERON );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* power is on so issue net boot command */
|
||||
ilog ("%s Reinstall power is on", node_ptr->hostname.c_str());
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__NETBOOT );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If the BMC is not provisioned coming into this handler
|
||||
* then service the install by mtce commands by starting
|
||||
* the install by wipedisk. */
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__WIPEDISK );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
/* BMC provisioned but bm_handler has not reported accessability yet.
|
||||
* Need to wait ... */
|
||||
case MTC_REINSTALL__START_WAIT:
|
||||
{
|
||||
if ( node_ptr->bm_provisioned == true )
|
||||
{
|
||||
if ( node_ptr->bm_accessible == false )
|
||||
{
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
/* wait period has timed out ; fail the install */
|
||||
elog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_FAIL_BA);
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_BA );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
; /* ... wait longer */
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* the BMC is not accessible to start the install over */
|
||||
plog ("%s BMC access established ; starting install",
|
||||
node_ptr->hostname.c_str());
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__START );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Handle case where BMC gets deprovisioned
|
||||
* while waiting for accessibility.
|
||||
*
|
||||
* Restart the install in that case after a monitored
|
||||
* wait period for reprovision.
|
||||
*
|
||||
* Has the side effect of allowing the admin to
|
||||
* reprovision the BMC during a re-install.
|
||||
*/
|
||||
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
wlog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_RTRY_PC );
|
||||
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_RTRY_PC );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_5 );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__RESTART_WAIT );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__RESTART_WAIT:
|
||||
{
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__START );
|
||||
}
|
||||
else if ( node_ptr->bm_provisioned == true )
|
||||
{
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
wlog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_RTRY_PC );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__START );
|
||||
}
|
||||
else
|
||||
{
|
||||
; /* ... wait longer */
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__POWERON:
|
||||
{
|
||||
powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__POWERON_WAIT );
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__POWERON_WAIT:
|
||||
{
|
||||
/* The power handler manages timeout */
|
||||
if ( node_ptr->powerStage == MTC_POWER__DONE )
|
||||
{
|
||||
if ( node_ptr->power_on == true )
|
||||
{
|
||||
if ( node_ptr->task != MTC_TASK_REINSTALL )
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL );
|
||||
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_5 );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__NETBOOT );
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_FAIL_PO);
|
||||
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_PO );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* run the power handler till the host's power is on or
|
||||
* the power-on handler times out */
|
||||
power_handler ( node_ptr );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__NETBOOT:
|
||||
{
|
||||
/* Issue netboot command after timed delay */
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
int rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__BOOTDEV_PXE );
|
||||
if ( rc )
|
||||
{
|
||||
elog ("%s Reinstall netboot request failed (rc:%d)",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_NB );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("%s Reinstall netboot request sent", node_ptr->hostname.c_str() );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_5 );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__NETBOOT_WAIT );
|
||||
}
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__NETBOOT_WAIT:
|
||||
{
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
int rc = ipmi_command_recv ( node_ptr );
|
||||
if ( rc == PASS )
|
||||
{
|
||||
ilog ("%s Reinstall netboot request completed", node_ptr->hostname.c_str());
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__RESET);
|
||||
}
|
||||
else if ( rc == RETRY )
|
||||
{
|
||||
wlog ("%s Reinstall netboot receive retry", node_ptr->hostname.c_str());
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_5 );
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s Reinstall netboot receive failed (rc:%d)",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_NB );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__FAIL );
|
||||
}
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__RESET:
|
||||
{
|
||||
int rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_RESET );
|
||||
if ( rc )
|
||||
{
|
||||
elog ("%s Reinstall reset request failed (rc:%d)",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_PR );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("%s Reinstall reset request sent", node_ptr->hostname.c_str());
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__RESET_WAIT );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__RESET_WAIT:
|
||||
{
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
int rc = ipmi_command_recv ( node_ptr );
|
||||
if ( rc == PASS )
|
||||
{
|
||||
ilog ("%s Reinstall reset request completed", node_ptr->hostname.c_str());
|
||||
|
||||
start_offline_handler ( node_ptr );
|
||||
|
||||
/* Wait for the host to go offline */
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RESET_TO_OFFLINE_TIMEOUT );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__OFFLINE_WAIT);
|
||||
}
|
||||
else if ( rc == RETRY )
|
||||
{
|
||||
wlog ("%s Reinstall reset receive retry", node_ptr->hostname.c_str());
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_5 );
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s Reinstall reset receive failed ; rc:%d",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_PR );
|
||||
reinstallStageChange ( node_ptr, MTC_REINSTALL__FAIL );
|
||||
}
|
||||
}
|
||||
break ;
|
||||
}
|
||||
/* BMC not provisioned case */
|
||||
case MTC_REINSTALL__WIPEDISK:
|
||||
{
|
||||
node_ptr->cmdReq = MTC_CMD_WIPEDISK ;
|
||||
|
||||
plog ("%s Administrative Reinstall Requested\n", node_ptr->hostname.c_str());
|
||||
plog ("%s Reinstall wipedisk requested", node_ptr->hostname.c_str());
|
||||
if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_WIPEDISK, MGMNT_INTERFACE ) != PASS )
|
||||
{
|
||||
elog ("Failed to send 'reinstall' request to %s\n", node_ptr->hostname.c_str());
|
||||
elog ("%s Reinstall request send failed", node_ptr->hostname.c_str());
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
node_ptr->cmdRsp = MTC_CMD_NONE ;
|
||||
|
||||
if ( node_ptr->mtcTimer.tid )
|
||||
{
|
||||
mtcTimer_stop ( node_ptr->mtcTimer );
|
||||
}
|
||||
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_CMD_RSP_TIMEOUT );
|
||||
|
||||
ilog ("%s waiting for REINSTALL ACK \n", node_ptr->hostname.c_str() );
|
||||
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__RESP_WAIT );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__WIPEDISK_WAIT );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__RESP_WAIT:
|
||||
case MTC_REINSTALL__WIPEDISK_WAIT:
|
||||
{
|
||||
if ( node_ptr->cmdRsp != MTC_CMD_WIPEDISK )
|
||||
{
|
||||
if ( node_ptr->mtcTimer.ring == true )
|
||||
{
|
||||
elog ("%s REINSTALL ACK Timeout\n",
|
||||
elog ("%s Reinstall wipedisk ACK timeout",
|
||||
node_ptr->hostname.c_str());
|
||||
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* declare successful reinstall request */
|
||||
plog ("%s REINSTALL Request Succeeded\n", node_ptr->hostname.c_str());
|
||||
plog ("%s Reinstall request succeeded", node_ptr->hostname.c_str());
|
||||
|
||||
mtcTimer_stop ( node_ptr->mtcTimer );
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
|
||||
start_offline_handler ( node_ptr );
|
||||
|
||||
/* We need to wait for the host to go offline */
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RESET_TO_OFFLINE_TIMEOUT );
|
||||
@ -4085,49 +4386,57 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
|
||||
clear_service_readies ( node_ptr );
|
||||
|
||||
ilog ("%s Reinstall Progress: host is offline ; waiting for host to come back\n", node_ptr->hostname.c_str());
|
||||
ilog ("%s Reinstall in-progress ; waiting for 'online' state",
|
||||
node_ptr->hostname.c_str());
|
||||
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_WAIT_TIMER );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__ONLINE_WAIT );
|
||||
}
|
||||
else if ( node_ptr->mtcTimer.ring == true )
|
||||
else if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
elog ("%s offline timeout - reinstall failed\n", node_ptr->hostname.c_str());
|
||||
elog ("%s failed to go offline ; timeout", node_ptr->hostname.c_str());
|
||||
stop_offline_handler ( node_ptr );
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_OL );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
; // wait longer ...
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__ONLINE_WAIT:
|
||||
{
|
||||
if ( node_ptr->mtcTimer.ring == true )
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE )
|
||||
if ( --node_ptr->retries < 0 )
|
||||
{
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_SUCCESS);
|
||||
mtcTimer_stop ( node_ptr->mtcTimer );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__MSG_DISPLAY );
|
||||
mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE );
|
||||
elog ("%s %s", node_ptr->hostname.c_str(), MTC_TASK_REINSTALL_FAIL_TO);
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL_TO );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( --node_ptr->retries < 0 )
|
||||
{
|
||||
elog ("%s online timeout - reinstall failed\n", node_ptr->hostname.c_str());
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL );
|
||||
}
|
||||
else
|
||||
{
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_WAIT_TIMER );
|
||||
}
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_WAIT_TIMER );
|
||||
}
|
||||
}
|
||||
else if ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE )
|
||||
{
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_SUCCESS);
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__MSG_DISPLAY );
|
||||
mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE );
|
||||
}
|
||||
else
|
||||
{
|
||||
; // wait longer ...
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MTC_REINSTALL__FAIL:
|
||||
{
|
||||
mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL);
|
||||
mtcTimer_stop ( node_ptr->mtcTimer );
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY );
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__MSG_DISPLAY );
|
||||
mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED );
|
||||
@ -4135,23 +4444,33 @@ int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
}
|
||||
case MTC_REINSTALL__MSG_DISPLAY:
|
||||
{
|
||||
if ( node_ptr->mtcTimer.ring == true )
|
||||
if ( mtcTimer_expired ( node_ptr->mtcTimer ) )
|
||||
{
|
||||
node_ptr->mtcTimer.ring = false ;
|
||||
reinstallStageChange ( node_ptr , MTC_REINSTALL__DONE );
|
||||
}
|
||||
else
|
||||
{
|
||||
; // wait longer ...
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case MTC_REINSTALL__DONE:
|
||||
default:
|
||||
{
|
||||
plog ("%s Reinstall Completed\n", node_ptr->hostname.c_str());
|
||||
if ( node_ptr->task == MTC_TASK_REINSTALL_SUCCESS )
|
||||
{
|
||||
plog ("%s Reinstall completed successfully",
|
||||
node_ptr->hostname.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
plog ("%s Reinstall complete ; operation failure",
|
||||
node_ptr->hostname.c_str());
|
||||
}
|
||||
|
||||
/* Default timeout values */
|
||||
LOAD_NODETYPE_TIMERS ;
|
||||
|
||||
mtcTimer_stop ( node_ptr->mtcTimer );
|
||||
|
||||
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE );
|
||||
|
||||
recovery_ctrl_init ( node_ptr->hwmon_reset );
|
||||
@ -4583,6 +4902,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
}
|
||||
else
|
||||
{
|
||||
node_ptr->power_on = false ;
|
||||
ilog ("%s power is off ; powering on ...\n", node_ptr->hostname.c_str() );
|
||||
powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND );
|
||||
}
|
||||
@ -4623,7 +4943,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON );
|
||||
rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON );
|
||||
if ( rc )
|
||||
{
|
||||
wlog ("%s Power-On request failed (%d)\n",
|
||||
@ -4918,14 +5238,14 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
bool on = false ;
|
||||
|
||||
ilog ("%s Power Status: %s\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
node_ptr->ipmitool_thread_info.data.c_str());
|
||||
ilog ("%s Power Status: %s\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
node_ptr->ipmitool_thread_info.data.c_str());
|
||||
|
||||
if ( node_ptr->ipmitool_thread_info.data.find ( IPMITOOL_POWER_ON_STATUS ) != std::string::npos )
|
||||
{
|
||||
on = true ;
|
||||
}
|
||||
if ( node_ptr->ipmitool_thread_info.data.find ( IPMITOOL_POWER_ON_STATUS ) != std::string::npos )
|
||||
{
|
||||
on = true ;
|
||||
}
|
||||
if ( rc == PASS )
|
||||
{
|
||||
/* maintain current power state */
|
||||
@ -6033,6 +6353,12 @@ int nodeLinkClass::bm_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->ipmitool_thread_info.data.c_str());
|
||||
plog ("%s bmc is accessible\n", node_ptr->hostname.c_str());
|
||||
|
||||
/* set host power state ; on or off */
|
||||
if ( node_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos )
|
||||
node_ptr->power_on = true ;
|
||||
else
|
||||
node_ptr->power_on = false ;
|
||||
|
||||
if ( node_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_OFF_STATUS) != std::string::npos )
|
||||
{
|
||||
if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )
|
||||
@ -6333,7 +6659,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
}
|
||||
node_ptr->ipmitool_thread_ctrl.done = true ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Audits for this controller host only */
|
||||
|
@ -79,6 +79,7 @@ void * mtcThread_ipmitool ( void * arg )
|
||||
|
||||
switch ( info_ptr->command )
|
||||
{
|
||||
/* control commands */
|
||||
case IPMITOOL_THREAD_CMD__POWER_RESET:
|
||||
{
|
||||
command = IPMITOOL_POWER_RESET_CMD ;
|
||||
@ -103,6 +104,14 @@ void * mtcThread_ipmitool ( void * arg )
|
||||
response = IPMITOOL_POWER_CYCLE_RESP ;
|
||||
break ;
|
||||
}
|
||||
case IPMITOOL_THREAD_CMD__BOOTDEV_PXE:
|
||||
{
|
||||
command = IPMITOOL_BOOTDEV_PXE_CMD ;
|
||||
response = IPMITOOL_BOOTDEV_PXE_RESP ;
|
||||
break ;
|
||||
}
|
||||
|
||||
/* Status commands */
|
||||
case IPMITOOL_THREAD_CMD__POWER_STATUS:
|
||||
{
|
||||
command = IPMITOOL_POWER_STATUS_CMD ;
|
||||
@ -118,6 +127,7 @@ void * mtcThread_ipmitool ( void * arg )
|
||||
command = IPMITOOL_MC_INFO_CMD ;
|
||||
break ;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
rc = info_ptr->status = FAIL_BAD_CASE ;
|
||||
@ -222,9 +232,11 @@ void * mtcThread_ipmitool ( void * arg )
|
||||
else if ((( command == IPMITOOL_POWER_RESET_CMD ) ||
|
||||
( command == IPMITOOL_POWER_OFF_CMD ) ||
|
||||
( command == IPMITOOL_POWER_ON_CMD ) ||
|
||||
( command == IPMITOOL_POWER_CYCLE_CMD )) &&
|
||||
( command == IPMITOOL_POWER_CYCLE_CMD ) ||
|
||||
( command == IPMITOOL_BOOTDEV_PXE_CMD)) &&
|
||||
( daemon_is_file_present ( MTC_CMD_FIT__POWER_CMD )))
|
||||
{
|
||||
slog("%s FIT Bypass power or bootdev command", info_ptr->hostname.c_str());
|
||||
bypass_ipmitool_request = true ;
|
||||
rc = PASS ;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user