Merge "Fix BMC access loss handling"
This commit is contained in:
commit
46c6b245df
@ -1041,19 +1041,25 @@ int msgClassRx::initSocket(bool allow_any, bool is_multicast)
|
||||
* @param IP protocol
|
||||
* @param name of interface to bind to, or null otherwise
|
||||
*/
|
||||
msgClassTx::msgClassTx(const char* address, int port, int proto, const char* interface)
|
||||
msgClassTx::msgClassTx(const char* address, int port, int proto, const char* interface, bool quiet)
|
||||
{
|
||||
this->dst_addr = new msgClassAddr(address, port, proto);
|
||||
this->src_addr = new msgClassAddr(address, port, proto);
|
||||
if(interface)
|
||||
{
|
||||
ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address);
|
||||
if ( !quiet)
|
||||
{
|
||||
ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address);
|
||||
}
|
||||
this->interface = new char[strlen(interface)+1];
|
||||
snprintf(this->interface, strlen(interface)+1, "%s", interface);
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("Creating socket on port %d with address: %s\n", port, address);
|
||||
if ( !quiet)
|
||||
{
|
||||
ilog ("Creating socket on port %d with address: %s\n", port, address);
|
||||
}
|
||||
this->interface = NULL;
|
||||
}
|
||||
this->return_status = initSocket();
|
||||
|
@ -207,7 +207,7 @@ private:
|
||||
class msgClassTx : public msgClassSock
|
||||
{
|
||||
public:
|
||||
msgClassTx(const char* address, int port, int proto, const char* interface=NULL);
|
||||
msgClassTx(const char* address, int port, int proto, const char* interface=NULL, bool quiet=false);
|
||||
msgClassTx(const msgClassAddr& addr, const char* interface=NULL);
|
||||
private:
|
||||
int initSocket();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Wind River Systems, Inc.
|
||||
* Copyright (c) 2015-2017,2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -89,6 +89,7 @@ int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip
|
||||
ping_info.sequence = getpid() ;
|
||||
ping_info.recv_retries = 0 ;
|
||||
ping_info.send_retries = 0 ;
|
||||
ping_info.fail_debounce = 0 ;
|
||||
ping_info.requested= false ;
|
||||
ping_info.received = false ;
|
||||
ping_info.recv_flush_highwater=2;
|
||||
@ -97,7 +98,7 @@ int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip
|
||||
ping_info.monitoring = false ;
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
|
||||
|
||||
ping_info.sock = new msgClassTx(ip_address, 0, IPPROTO_RAW, NULL );
|
||||
ping_info.sock = new msgClassTx(ip_address, 0, IPPROTO_RAW, NULL, true);
|
||||
|
||||
/* Validate the socket setup */
|
||||
if ( ping_info.sock == NULL )
|
||||
@ -194,7 +195,6 @@ int pingUtil_send ( ping_info_type & ping_info )
|
||||
}
|
||||
|
||||
ping_info.sequence++ ;
|
||||
ping_info.recv_retries = 0;
|
||||
|
||||
if ( ping_info.ipv6_mode == false )
|
||||
{
|
||||
@ -238,8 +238,6 @@ int pingUtil_send ( ping_info_type & ping_info )
|
||||
bytes = ping_info.sock->write( (const char*)&ping6_tx, sizeof(ping6_tx_message_type));
|
||||
}
|
||||
|
||||
ping_info.recv_retries = 0;
|
||||
|
||||
if ( bytes <= 0 )
|
||||
{
|
||||
wlog ("%s ping %s send failed (rc:%d) (%d:%m)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), bytes, errno );
|
||||
@ -508,10 +506,19 @@ int pingUtil_recv ( ping_info_type & ping_info,
|
||||
if ( ping_info.monitoring == false )
|
||||
{
|
||||
/* ... only want the log when we are first connecting */
|
||||
ilog ("%s ping to %s ok ; (try %d)",
|
||||
if ( ping_info.recv_retries )
|
||||
{
|
||||
ilog ("%s ping %s ok ; (try %d)",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str(),
|
||||
ping_info.recv_retries+1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("%s ping %s ok",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str(),
|
||||
ping_info.recv_retries+1);
|
||||
ping_info.ip.c_str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -575,6 +582,7 @@ void pingUtil_fini ( ping_info_type & ping_info )
|
||||
|
||||
ping_info.recv_retries = 0;
|
||||
ping_info.send_retries = 0;
|
||||
ping_info.fail_debounce = 0;
|
||||
ping_info.sequence = 0;
|
||||
ping_info.identity = 0;
|
||||
|
||||
@ -585,10 +593,7 @@ void pingUtil_fini ( ping_info_type & ping_info )
|
||||
|
||||
void pingUtil_restart ( ping_info_type & ping_info )
|
||||
{
|
||||
ilog ("%s ping monitor restart", ping_info.hostname.c_str());
|
||||
ping_info.ok = false ;
|
||||
ping_info.send_retries = 0 ;
|
||||
ping_info.monitoring = false ;
|
||||
dlog ("%s ping monitor restart", ping_info.hostname.c_str());
|
||||
pingUtil_fini (ping_info);
|
||||
pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data());
|
||||
|
||||
@ -668,7 +673,7 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
|
||||
if ( ++ping_info.send_retries > PING_MAX_SEND_RETRIES )
|
||||
{
|
||||
elog ("%s ping to %s failed\n",
|
||||
dlog ("%s ping %s failed ; max send retries\n",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str());
|
||||
|
||||
@ -676,7 +681,9 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
}
|
||||
else if ( pingUtil_send ( ping_info ) )
|
||||
{
|
||||
elog ("%s failed to send bmc ping\n", ping_info.hostname.c_str());
|
||||
wlog ("%s ping %s send failed\n",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str());
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
|
||||
}
|
||||
else
|
||||
@ -694,6 +701,10 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
mtcTimer_reset ( ping_info.timer );
|
||||
}
|
||||
mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_WAIT_TIMER_MSEC );
|
||||
|
||||
/* send was ok so clear its retry counter */
|
||||
ping_info.send_retries = 0 ;
|
||||
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__RECV ;
|
||||
}
|
||||
}
|
||||
@ -711,25 +722,17 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
{
|
||||
if ( ++ping_info.recv_retries > (PING_MAX_RECV_RETRIES) )
|
||||
{
|
||||
/* only print this log once on the resend attempt */
|
||||
if ( ping_info.send_retries >= PING_MAX_SEND_RETRIES )
|
||||
{
|
||||
mlog ("%s ping recv from %s missed ; identity:%04x sequence:%04x (try %d of %d)\n",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str(),
|
||||
ping_info.identity,
|
||||
ping_info.sequence,
|
||||
ping_info.recv_retries-1,
|
||||
PING_MAX_RECV_RETRIES);
|
||||
}
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__SEND ;
|
||||
break ;
|
||||
ping_info.recv_retries = 0;
|
||||
dlog ("%s ping %s failed ; max recv retries\n",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str());
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
|
||||
}
|
||||
else
|
||||
{
|
||||
blog1 ("%s retrying ping\n", ping_info.hostname.c_str());
|
||||
blog1 ("%s retrying ping\n", ping_info.hostname.c_str());
|
||||
mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_RETRY_DELAY_MSECS );
|
||||
}
|
||||
mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_RETRY_DELAY_MSECS );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -746,13 +749,7 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
|
||||
ping_info.send_retries = 0 ;
|
||||
ping_info.recv_retries = 0 ;
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_HOST, ping_info.hostname ) == true )
|
||||
interval = 3 ;
|
||||
if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_ALL ) == true )
|
||||
interval = 3 ;
|
||||
#endif
|
||||
ping_info.fail_debounce = 0 ;
|
||||
mtcTimer_start ( ping_info.timer, ping_info.timer_handler, interval );
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT ;
|
||||
}
|
||||
@ -766,14 +763,30 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info )
|
||||
}
|
||||
case PINGUTIL_MONITOR_STAGE__FAIL:
|
||||
{
|
||||
ping_info.ok = false ;
|
||||
ping_info.send_retries = 0 ;
|
||||
ping_info.monitoring = false ;
|
||||
pingUtil_fini (ping_info);
|
||||
pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data());
|
||||
// Assume socket reinit unless thresholding */
|
||||
bool reinit = true ;
|
||||
if ( ping_info.ok == true )
|
||||
{
|
||||
if ( ++ping_info.fail_debounce < PING_FAIL_DEBOUNCE_THLD )
|
||||
{
|
||||
reinit = false ;
|
||||
wlog("%s ping %s miss (%d of %d)",
|
||||
ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str(),
|
||||
ping_info.fail_debounce,
|
||||
PING_FAIL_DEBOUNCE_THLD );
|
||||
}
|
||||
}
|
||||
if ( reinit == true )
|
||||
{
|
||||
elog("%s ping %s fail", ping_info.hostname.c_str(),
|
||||
ping_info.ip.c_str());
|
||||
|
||||
pingUtil_fini (ping_info);
|
||||
pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data());
|
||||
}
|
||||
mtcTimer_reset ( ping_info.timer );
|
||||
mtcTimer_start ( ping_info.timer, ping_info.timer_handler, PING_MONITOR_INTERVAL );
|
||||
mtcTimer_start ( ping_info.timer, ping_info.timer_handler, PING_FAIL_RETRY_DELAY );
|
||||
ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT;
|
||||
break ;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
#define __INCLUDE_PINGUTIL_H__
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Wind River Systems, Inc.
|
||||
* Copyright (c) 2015-2017,2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -47,8 +47,8 @@ using namespace std;
|
||||
#define PING_WAIT_TIMER_MSEC (200)
|
||||
#define PING_RETRY_DELAY_MSECS (200)
|
||||
#define PING_MONITOR_INTERVAL (60)
|
||||
#define PING_MISS_RETRY_DELAY (5)
|
||||
|
||||
#define PING_FAIL_RETRY_DELAY (30)
|
||||
#define PING_FAIL_DEBOUNCE_THLD (3)
|
||||
#define PING_MESSAGE_LEN (80)
|
||||
|
||||
typedef enum
|
||||
@ -73,6 +73,7 @@ typedef struct
|
||||
unsigned short sequence ;
|
||||
int send_retries ;
|
||||
int recv_retries ;
|
||||
int fail_debounce ;
|
||||
bool ipv6_mode ;
|
||||
bool received ;
|
||||
bool requested ;
|
||||
|
@ -1,3 +1,3 @@
|
||||
SRC_DIR="src"
|
||||
TIS_PATCH_VER=156
|
||||
TIS_PATCH_VER=157
|
||||
BUILD_IS_SLOW=5
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2016 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -4322,7 +4322,9 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr
|
||||
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
|
||||
mtcTimer_reset ( node_ptr->bm_ping_info.timer );
|
||||
node_ptr->bm_ping_info.timer_handler = &mtcTimer_handler ;
|
||||
|
||||
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
|
||||
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
|
||||
|
||||
node_ptr->bmc_protocol_learning = false ;
|
||||
|
||||
|
@ -85,6 +85,9 @@ void hwmonHostClass::bmc_data_init ( struct hwmonHostClass::hwmon_host * host_pt
|
||||
host_ptr->accessible = false;
|
||||
host_ptr->degraded = false ;
|
||||
|
||||
host_ptr->ping_info.ip = host_ptr->bm_ip ;
|
||||
host_ptr->ping_info.hostname = host_ptr->hostname ;
|
||||
|
||||
hwmon_del_groups ( host_ptr );
|
||||
hwmon_del_sensors ( host_ptr );
|
||||
|
||||
|
@ -96,25 +96,9 @@ void hwmonHostClass::hwmon_fsm ( void )
|
||||
|
||||
pingUtil_acc_monitor ( host_ptr->ping_info );
|
||||
|
||||
if ( host_ptr->ping_info.ok == false )
|
||||
{
|
||||
/* Auto correct key ping information ; should never occur but if it does ... */
|
||||
if (( host_ptr->ping_info.hostname.empty()) ||
|
||||
( hostUtil_is_valid_ip_addr(host_ptr->ping_info.ip ) == false ))
|
||||
{
|
||||
slog ("%s host ping info missing ; (%d:%d)\n",
|
||||
host_ptr->hostname.c_str(),
|
||||
host_ptr->ping_info.hostname.empty(),
|
||||
host_ptr->ping_info.ip.empty());
|
||||
|
||||
host_ptr->ping_info.hostname = host_ptr->hostname ;
|
||||
host_ptr->ping_info.ip = host_ptr->bm_ip ;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check to see if sensor monitoring for this host is
|
||||
* disabled or the bm password has not yet been learned */
|
||||
else if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
|
||||
if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
|
||||
{
|
||||
/* ... make sure the thread sits in the
|
||||
* idle state while disabled or there
|
||||
|
@ -1165,12 +1165,12 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
|
||||
{
|
||||
if ( host_ptr->bmc_thread_info.status == FAIL_SYSTEM_CALL )
|
||||
{
|
||||
elog ("%s '%s' system call failed (retry %d of %d) (%s)",
|
||||
elog ("%s '%s' system call failed (retry %d of %d)",
|
||||
host_ptr->bmc_thread_info.log_prefix,
|
||||
bmcUtil_getCmd_str(
|
||||
host_ptr->bmc_thread_info.command).c_str(),
|
||||
host_ptr->bmc_thread_ctrl.retries,
|
||||
MAX_THREAD_RETRIES, host_ptr->thread_extra_info.bm_pw.c_str());
|
||||
MAX_THREAD_RETRIES);
|
||||
}
|
||||
else if (( host_ptr->bmc_thread_ctrl.retries == (MAX_THREAD_RETRIES-1)) ||
|
||||
( host_ptr->bmc_thread_ctrl.retries == 1 ))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2018 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -3901,7 +3901,7 @@ int nodeLinkClass::reset_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
|
||||
if ( node_ptr->bmc_accessible == false )
|
||||
{
|
||||
wlog ("%s Power Off request rejected ; BMC not accessible ; retry in %d seconds \n",
|
||||
wlog ("%s Reset request rejected ; BMC not accessible ; retry in %d seconds \n",
|
||||
node_ptr->hostname.c_str(),
|
||||
MTC_POWER_ACTION_RETRY_DELAY);
|
||||
|
||||
@ -6185,29 +6185,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
|
||||
}
|
||||
|
||||
/*****************************************************************
|
||||
* Manage bmc creds refresh
|
||||
****************************************************************/
|
||||
if ( node_ptr->bm_ping_info.ok == false )
|
||||
{
|
||||
/* Auto correct key ping information ;
|
||||
* should never occur but if it does ... */
|
||||
if (( node_ptr->bm_ping_info.hostname.empty()) ||
|
||||
( node_ptr->bm_ping_info.ip.empty()))
|
||||
{
|
||||
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
|
||||
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
|
||||
}
|
||||
if ( ! node_ptr->bm_pw.empty() )
|
||||
{
|
||||
node_ptr->bm_pw.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************
|
||||
* Manage getting the bm password but only when ping is ok
|
||||
****************************************************************/
|
||||
else if ( node_ptr->bm_pw.empty() )
|
||||
if ( node_ptr->bm_pw.empty() )
|
||||
{
|
||||
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
|
||||
node_ptr->hostname,
|
||||
|
Loading…
Reference in New Issue
Block a user