Merge "Add a wait time between http request retries"
This commit is contained in:
commit
739c508e92
@ -131,6 +131,11 @@
|
||||
#define FIT_CODE__STOP_HOST_SERVICES (71)
|
||||
|
||||
#define FIT_CODE__SOCKET_SETUP (72)
|
||||
#define FIT_CODE__READ_JSON_FROM_FILE (73)
|
||||
|
||||
#define FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED (75)
|
||||
#define FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT (76)
|
||||
#define FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS (77)
|
||||
|
||||
/***************** Process Fit Codes ********************************/
|
||||
|
||||
|
@ -2,10 +2,10 @@
|
||||
#define __INCLUDE_HTTPUTIL_H__
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
#include <iostream> /* for ... string */
|
||||
@ -93,12 +93,14 @@ typedef enum {
|
||||
HTTP__RECEIVE_WAIT = 1,
|
||||
HTTP__RECEIVE = 2,
|
||||
HTTP__FAILURE = 3,
|
||||
HTTP__DONE_FAIL = 4,
|
||||
HTTP__DONE_PASS = 5,
|
||||
HTTP__STAGES = 6
|
||||
HTTP__RETRY_WAIT = 4,
|
||||
HTTP__DONE_FAIL = 5,
|
||||
HTTP__DONE_PASS = 6,
|
||||
HTTP__STAGES = 7
|
||||
} httpStages_enum ;
|
||||
|
||||
#define HTTP_RECEIVE_WAIT_MSEC (10)
|
||||
#define HTTP_RETRY_WAIT_SECS (10)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -142,7 +144,7 @@ typedef enum {
|
||||
|
||||
SYSINV_CONFIG_SHOW,
|
||||
SYSINV_CONFIG_MODIFY,
|
||||
|
||||
|
||||
SYSINV_SENSOR_LOAD,
|
||||
SYSINV_SENSOR_LOAD_GROUPS,
|
||||
SYSINV_SENSOR_LOAD_GROUP,
|
||||
|
@ -1,10 +1,10 @@
|
||||
#ifndef __INCLUDE_NODELOG_HH__
|
||||
#define __INCLUDE_NODELOG_HH__
|
||||
/*
|
||||
* Copyright (c) 2013-2017,2023 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -116,6 +116,7 @@ typedef struct
|
||||
int start_delay ; /**< startup delay, added for pmon */
|
||||
int api_retries ; /**< api retries before failure */
|
||||
int bmc_reset_delay ; /**< secs delay before bmc reset */
|
||||
int http_retry_wait ; /**< secs to wait between http reg retries */
|
||||
int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */
|
||||
bool hostwd_reboot_on_err ; /**< should hostwd reboot on fault detected */
|
||||
bool hostwd_kdump_on_stall ; /**< sysrq crash dump on quorum msg'ing stall */
|
||||
|
@ -1,8 +1,8 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2020, 2023 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -314,6 +314,7 @@ nodeLinkClass::nodeLinkClass()
|
||||
sysinv_timeout = HTTP_SYSINV_CRIT_TIMEOUT ;
|
||||
sysinv_noncrit_timeout = HTTP_SYSINV_NONC_TIMEOUT ;
|
||||
work_queue_timeout = MTC_WORKQUEUE_TIMEOUT ;
|
||||
http_retry_wait = HTTP_RETRY_WAIT_SECS ;
|
||||
|
||||
/* Init the auto recovery threshold and intervals to zero until
|
||||
* modified by daemon config */
|
||||
|
@ -1866,6 +1866,9 @@ public:
|
||||
* time for crashdumps to complete. */
|
||||
int bmc_reset_delay ;
|
||||
|
||||
/** seconds to wait between http request retries */
|
||||
int http_retry_wait ;
|
||||
|
||||
/* collectd event handler */
|
||||
int collectd_notify_handler ( string & hostname,
|
||||
string & resource,
|
||||
|
@ -376,6 +376,11 @@ static int mtc_config_handler ( void * user,
|
||||
config_ptr->bmc_reset_delay = atoi(value);
|
||||
mtcInv.bmc_reset_delay = config_ptr->bmc_reset_delay ;
|
||||
}
|
||||
else if (MATCH("agent", "http_retry_wait"))
|
||||
{
|
||||
config_ptr->http_retry_wait = atoi(value);
|
||||
mtcInv.http_retry_wait = config_ptr->http_retry_wait ;
|
||||
}
|
||||
else if (MATCH("timeouts", "failsafe_shutdown_delay"))
|
||||
{
|
||||
config_ptr->failsafe_shutdown_delay = atoi(value);
|
||||
@ -692,6 +697,7 @@ int daemon_configure ( void )
|
||||
ilog ("TokenRefresh: %3d secs\n" , mtcInv.token_refresh_rate);
|
||||
ilog ("API Retries : %3d secs\n" , mtcInv.api_retries);
|
||||
ilog ("Reset Delay : %3d secs\n" , mtcInv.bmc_reset_delay);
|
||||
ilog ("HTTP Retry : %3d secs\n" , mtcInv.http_retry_wait);
|
||||
|
||||
/* Verify loaded config against an expected mask
|
||||
* as an ini file fault detection method */
|
||||
|
@ -69,6 +69,23 @@ string nodeLinkClass::mtcVimApi_state_get ( string hostname, int & http_status_c
|
||||
http_status_code = HTTP_NOTFOUND ;
|
||||
return ( payload );
|
||||
}
|
||||
#ifdef WANT_FIT_TESTING
|
||||
static const char * fit_file = "/var/run/fit/mtcVimApi_state_get";
|
||||
if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, hostname, "mtcVimApi_state_get"))
|
||||
{
|
||||
if ( daemon_is_file_present (fit_file) )
|
||||
{
|
||||
payload = daemon_read_file(fit_file);
|
||||
ilog("%s FIT Json: %s", hostname.c_str(), payload.c_str());
|
||||
return (payload);
|
||||
}
|
||||
else
|
||||
{
|
||||
slog("%s FIT file %s not found ; aborting fit", hostname.c_str(), fit_file);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
payload = ("{\"") ;
|
||||
payload.append (MTC_JSON_INV_ADMIN);
|
||||
payload.append ("\":\"");
|
||||
@ -246,6 +263,22 @@ int nodeLinkClass::mtcVimApi_state_change ( struct nodeLinkClass::node * node_pt
|
||||
node_ptr->httpReq.payload = "{\"state-change\": " ;
|
||||
node_ptr->httpReq.payload.append (mtcVimApi_state_get ( node_ptr->hostname , http_status_code ));
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
static const char * fit_file = "/var/run/fit/mtcVimApi_state_change";
|
||||
if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, node_ptr->hostname, "mtcVimApi_state_change" ))
|
||||
{
|
||||
if ( daemon_is_file_present (fit_file) )
|
||||
{
|
||||
node_ptr->httpReq.payload = daemon_read_file(fit_file);
|
||||
ilog("%s FIT Json: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
slog("%s FIT file %s not found ; aborting fit", node_ptr->hostname.c_str(), fit_file);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (( request == VIM_HOST_FAILED ) || ( request == VIM_DPORT_FAILED ))
|
||||
{
|
||||
wlog ("%s %s\n", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
|
||||
|
@ -1,8 +1,8 @@
|
||||
/*
|
||||
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -38,7 +38,7 @@ string _get_work_state_str ( httpStages_enum state )
|
||||
else if ( state == HTTP__RECEIVE ) return (" Rx");
|
||||
else if ( state == HTTP__FAILURE ) return (" Er ");
|
||||
else if ( state == HTTP__RECEIVE_WAIT ) return ("Wait");
|
||||
else
|
||||
else
|
||||
{
|
||||
elog ("Invalid Http Work Queue State: %d\n", state );
|
||||
return ("----");
|
||||
@ -58,7 +58,7 @@ void nodeLinkClass::workQueue_dump ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end();
|
||||
node_ptr->libEvent_work_fifo_ptr ++ )
|
||||
{
|
||||
syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %15s:%d | %s\n",
|
||||
syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %15s:%d | %s\n",
|
||||
_get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(),
|
||||
node_ptr->libEvent_work_fifo_ptr->sequence,
|
||||
node_ptr->libEvent_work_fifo_ptr->hostname.c_str(),
|
||||
@ -101,11 +101,11 @@ void nodeLinkClass::doneQueue_dump ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->libEvent_done_fifo_ptr != node_ptr->libEvent_done_fifo.end();
|
||||
node_ptr->libEvent_done_fifo_ptr ++ )
|
||||
{
|
||||
syslog ( LOG_INFO, "%15s httpReq doneQueue:%5d - %s '%s' -> Status:%d\n",
|
||||
node_ptr->libEvent_done_fifo_ptr->hostname.c_str(),
|
||||
node_ptr->libEvent_done_fifo_ptr->sequence,
|
||||
node_ptr->libEvent_done_fifo_ptr->service.c_str(),
|
||||
node_ptr->libEvent_done_fifo_ptr->operation.c_str(),
|
||||
syslog ( LOG_INFO, "%15s httpReq doneQueue:%5d - %s '%s' -> Status:%d\n",
|
||||
node_ptr->libEvent_done_fifo_ptr->hostname.c_str(),
|
||||
node_ptr->libEvent_done_fifo_ptr->sequence,
|
||||
node_ptr->libEvent_done_fifo_ptr->service.c_str(),
|
||||
node_ptr->libEvent_done_fifo_ptr->operation.c_str(),
|
||||
node_ptr->libEvent_done_fifo_ptr->status );
|
||||
}
|
||||
}
|
||||
@ -240,9 +240,9 @@ int nodeLinkClass::doneQueue_dequeue ( libEvent & event )
|
||||
* Description: This is a Per Host Finite State Machine (FSM) that
|
||||
* processes the work queue for the supplied host's
|
||||
* node pointer.
|
||||
*
|
||||
*
|
||||
* Constructs:
|
||||
*
|
||||
*
|
||||
* node_ptr->libEvent_work_fifo - the current work queue/fifo
|
||||
* node_ptr->libEvent_done_fifo - queue/fifo of completed requests
|
||||
*
|
||||
@ -255,17 +255,17 @@ int nodeLinkClass::doneQueue_dequeue ( libEvent & event )
|
||||
*
|
||||
* In process libEvents are copied from the callers work queue to
|
||||
* its thisReq.
|
||||
*
|
||||
*
|
||||
* Completed events including execution status are copied to the host's
|
||||
* done fifo.
|
||||
*
|
||||
* Failed events may be retried up to max_retries as specified by
|
||||
*
|
||||
* Failed events may be retried up to max_retries as specified by
|
||||
* the callers libEvent.
|
||||
*
|
||||
* @param event is a reference to the callers libEvent.
|
||||
*
|
||||
* @return an integer with values of PASS, FAIL, RETRY
|
||||
*
|
||||
*
|
||||
* ************************************************************************/
|
||||
|
||||
int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
@ -280,18 +280,18 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
* responses */
|
||||
if ( node_ptr->libEvent_done_fifo.size() > 10 )
|
||||
{
|
||||
qlog ("%s Done Queue has %ld elements\n",
|
||||
qlog ("%s Done Queue has %ld elements\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
node_ptr->libEvent_done_fifo.size());
|
||||
|
||||
/* TODO: look at the status of the commands and print a log of those that failed */
|
||||
|
||||
|
||||
/* Remove the first 8 - its a fifo the first ones at the front are the oldest */
|
||||
for ( int i=0 ; i < 8 ; i++ )
|
||||
{
|
||||
node_ptr->libEvent_done_fifo.pop_front();
|
||||
}
|
||||
qlog ("%s Done Queue has %ld elements remaining\n",
|
||||
qlog ("%s Done Queue has %ld elements remaining\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
node_ptr->libEvent_done_fifo.size());
|
||||
}
|
||||
@ -299,8 +299,8 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
|
||||
if ( node_ptr->libEvent_work_fifo.empty() )
|
||||
{
|
||||
// qlog_throttled ( node_ptr->no_work_log_throttle, 300,
|
||||
// "%s Idle ... \n",
|
||||
// qlog_throttled ( node_ptr->no_work_log_throttle, 300,
|
||||
// "%s Idle ... \n",
|
||||
// node_ptr->hostname.c_str());
|
||||
node_ptr->no_work_log_throttle = 0 ;
|
||||
return (PASS);
|
||||
@ -317,7 +317,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end();
|
||||
node_ptr->libEvent_work_fifo_ptr ++ )
|
||||
{
|
||||
syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %3d | %15s:%d | %s\n",
|
||||
syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %3d | %15s:%d | %s\n",
|
||||
_get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(),
|
||||
node_ptr->libEvent_work_fifo_ptr->sequence,
|
||||
node_ptr->libEvent_work_fifo_ptr->hostname.c_str(),
|
||||
@ -331,7 +331,6 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
syslog ( LOG_INFO, "+------+-------+--------------+---------+--------------+-----+----------------------+\n");
|
||||
}
|
||||
|
||||
|
||||
int size = node_ptr->libEvent_work_fifo.size() ;
|
||||
if ( size > QUEUE_OVERLOAD )
|
||||
{
|
||||
@ -354,18 +353,18 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
case HTTP__TRANSMIT:
|
||||
{
|
||||
node_ptr->thisReq = node_ptr->libEvent_work_fifo.front();
|
||||
|
||||
|
||||
qlog ("%s Transmitted\n", node_ptr->thisReq.log_prefix.c_str() );
|
||||
|
||||
rc = mtcHttpUtil_api_request ( node_ptr->thisReq ) ;
|
||||
if ( rc )
|
||||
{
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__FAILURE ;
|
||||
}
|
||||
else
|
||||
{
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ;
|
||||
|
||||
if ( node_ptr->http_timer.tid )
|
||||
@ -374,7 +373,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
if ( rc != PASS )
|
||||
{
|
||||
elog ("%s failed to start http command timer ; failing command\n", node_ptr->thisReq.log_prefix.c_str());
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__FAILURE ;
|
||||
}
|
||||
}
|
||||
@ -408,20 +407,20 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
slog ("%s has unexpected null HTTP request base pointer\n",
|
||||
node_ptr->thisReq.log_prefix.c_str());
|
||||
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__FAILURE ;
|
||||
break ;
|
||||
}
|
||||
|
||||
|
||||
int msec_timeout = (node_ptr->thisReq.timeout*1000);
|
||||
int wait_time = (++node_ptr->thisReq.rx_retry_cnt)*HTTP_RECEIVE_WAIT_MSEC ;
|
||||
|
||||
rc = mtcHttpUtil_receive ( node_ptr->thisReq );
|
||||
if ( rc == RETRY )
|
||||
{
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ;
|
||||
mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC );
|
||||
mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC );
|
||||
|
||||
if ((wait_time > (msec_timeout/4)) && ( node_ptr->thisReq.low_wm == false ) )
|
||||
{
|
||||
@ -449,48 +448,66 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
/* Only print every 16 starting with 2 */
|
||||
if ( (node_ptr->thisReq.rx_retry_cnt & 0xF) == 2 )
|
||||
{
|
||||
qlog ("%s rx_retry_cnt:%d\n",
|
||||
qlog ("%s rx_retry_cnt:%d\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
node_ptr->thisReq.rx_retry_cnt );
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
#ifdef WANT_FIT_TESTING
|
||||
if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED, node_ptr->hostname, "" ))
|
||||
{
|
||||
ilog("%s FIT Operation Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
|
||||
node_ptr->thisReq.status = FAIL_AUTHENTICATION ;
|
||||
rc = FAIL_OPERATION ;
|
||||
}
|
||||
else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT, node_ptr->hostname, "" ))
|
||||
{
|
||||
ilog("%s FIT Request Timeout Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
|
||||
rc = FAIL_TIMEOUT ;
|
||||
}
|
||||
else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS, node_ptr->hostname, "" ))
|
||||
{
|
||||
ilog("%s FIT Connection Loss: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
|
||||
node_ptr->thisReq.status = rc = FAIL_HTTP_ZERO_STATUS ;
|
||||
}
|
||||
#endif
|
||||
if ( rc != PASS )
|
||||
{
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__FAILURE ;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( node_ptr->thisReq.cur_retries )
|
||||
{
|
||||
ilog ("%s Completed (after %d retries) (took %d of %d msecs)\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
node_ptr->thisReq.cur_retries, wait_time,
|
||||
ilog ("%s Completed (after %d retries) (took %d of %d msecs)\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
node_ptr->thisReq.cur_retries, wait_time,
|
||||
node_ptr->thisReq.timeout*1000);
|
||||
}
|
||||
else
|
||||
{
|
||||
qlog ("%s Completed (took %d of %d msecs)\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
wait_time,
|
||||
qlog ("%s Completed (took %d of %d msecs)\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
wait_time,
|
||||
node_ptr->thisReq.timeout*1000);
|
||||
}
|
||||
node_ptr->thisReq.exec_time_msec = wait_time ;
|
||||
|
||||
node_ptr->thisReq.rx_retry_cnt = 0 ;
|
||||
|
||||
|
||||
mtcHttpUtil_free_conn ( node_ptr->thisReq );
|
||||
mtcHttpUtil_free_base ( node_ptr->thisReq );
|
||||
|
||||
/* Don't add success responses to non-critical commands like
|
||||
/* Don't add success responses to non-critical commands like
|
||||
* "update uptime" and "update task" to the done queue */
|
||||
if ( !node_ptr->thisReq.noncritical )
|
||||
{
|
||||
/* Copy done event to the done queue */
|
||||
node_ptr->libEvent_done_fifo.push_back(node_ptr->thisReq);
|
||||
|
||||
|
||||
}
|
||||
/* Pop that done event off the work queue */
|
||||
node_ptr->libEvent_work_fifo.pop_front();
|
||||
@ -503,21 +520,21 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
|
||||
mtcHttpUtil_free_conn ( node_ptr->thisReq );
|
||||
mtcHttpUtil_free_base ( node_ptr->thisReq );
|
||||
|
||||
|
||||
node_ptr->http_retries_cur++ ;
|
||||
node_ptr->thisReq.cur_retries++ ;
|
||||
|
||||
if ( node_ptr->thisReq.noncritical == true )
|
||||
{
|
||||
if ( node_ptr->thisReq.noncritical == true )
|
||||
{
|
||||
if ( node_ptr->thisReq.cur_retries > node_ptr->thisReq.max_retries )
|
||||
{
|
||||
node_ptr->oper_failures++ ;
|
||||
|
||||
wlog ("%s retry conjestion abort of non-critical command (%d:%d)\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
node_ptr->thisReq.log_prefix.c_str(),
|
||||
node_ptr->thisReq.cur_retries,
|
||||
node_ptr->thisReq.max_retries );
|
||||
|
||||
|
||||
/* Pop this aborted event off the work queue */
|
||||
node_ptr->libEvent_work_fifo.pop_front();
|
||||
}
|
||||
@ -561,7 +578,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->thisReq.max_retries,
|
||||
node_ptr->thisReq.timeout,
|
||||
node_ptr->thisReq.noncritical ? "No" : "Yes" );
|
||||
|
||||
|
||||
node_ptr->thisReq.response.clear();
|
||||
|
||||
node_ptr->thisReq.status = PASS ;
|
||||
@ -569,10 +586,10 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->thisReq.active = false ;
|
||||
node_ptr->thisReq.response_len= 0 ;
|
||||
|
||||
/*
|
||||
/*
|
||||
* If this is an inventory request ...
|
||||
*
|
||||
* 1. Init the inv struct
|
||||
* 1. Init the inv struct
|
||||
* 2. increase the timeout if is a critical command
|
||||
*
|
||||
* */
|
||||
@ -583,30 +600,42 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
int temp = node_ptr->libEvent_work_fifo_ptr->timeout ;
|
||||
|
||||
/*
|
||||
/*
|
||||
* Increase and update the timeout value for critical commands
|
||||
* in hope that it will succeed on he next go around.
|
||||
*/
|
||||
node_ptr->libEvent_work_fifo_ptr->timeout += get_mtcInv_ptr()->sysinv_timeout ;
|
||||
dlog ("%s timeout extended from %d to %d secs\n",
|
||||
dlog ("%s timeout extended from %d to %d secs\n",
|
||||
node_ptr->thisReq.log_prefix.c_str(), temp,
|
||||
node_ptr->libEvent_work_fifo_ptr->timeout );
|
||||
}
|
||||
}
|
||||
|
||||
/* Save the retry count */
|
||||
node_ptr->libEvent_work_fifo_ptr->cur_retries =
|
||||
node_ptr->libEvent_work_fifo_ptr->cur_retries =
|
||||
node_ptr->thisReq.cur_retries ;
|
||||
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
mtcTimer_start ( node_ptr->http_timer, mtcTimer_handler, HTTP_RETRY_WAIT_SECS );
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__RETRY_WAIT ;
|
||||
dlog ("%s %d sec retry wait started", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS);
|
||||
}
|
||||
break ;
|
||||
}
|
||||
case HTTP__RETRY_WAIT:
|
||||
{
|
||||
if ( node_ptr->http_timer.ring == true )
|
||||
{
|
||||
dlog ("%s %d sec retry wait expired", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS);
|
||||
node_ptr->libEvent_work_fifo_ptr->state =
|
||||
node_ptr->thisReq.state = HTTP__TRANSMIT ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
default:
|
||||
{
|
||||
slog ("%s Bad libEvent work state (%d) ; clearing work/done queue\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
slog ("%s Bad libEvent work state (%d) ; clearing work/done queue\n",
|
||||
node_ptr->hostname.c_str(),
|
||||
node_ptr->libEvent_work_fifo_ptr->state );
|
||||
node_ptr->libEvent_work_fifo.clear();
|
||||
node_ptr->libEvent_done_fifo.clear();
|
||||
@ -623,7 +652,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
|
||||
* Description: To handle the pathalogical case where an event seems to
|
||||
* have timed out at the callers level then this interface
|
||||
* can be called to delete it from the work queue.
|
||||
*
|
||||
*
|
||||
* @param node_ptr so that the hosts work queue can be found
|
||||
* @param sequence to specify the specific sequence number to remove
|
||||
* @return always PASS since there is nothing the caller can or needs
|
||||
@ -660,7 +689,7 @@ int nodeLinkClass::workQueue_del_cmd ( struct nodeLinkClass::node * node_ptr, in
|
||||
*
|
||||
* Description: Removes all items from the done queue.
|
||||
*
|
||||
* Returns a failure, the sequence number of the first command
|
||||
* Returns a failure, the sequence number of the first command
|
||||
* in the done queue that did not PASS.
|
||||
*
|
||||
*/
|
||||
@ -717,7 +746,7 @@ int nodeLinkClass::doneQueue_purge ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
qlog ("%s all (%d) priority queued operations passed (qlog)\n", node_ptr->hostname.c_str(), size );
|
||||
}
|
||||
|
||||
|
||||
qlog ("%s purging %d items from doneQueue\n", node_ptr->hostname.c_str(), size );
|
||||
node_ptr->libEvent_done_fifo.clear();
|
||||
}
|
||||
@ -738,7 +767,7 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
/* TODO: find out how to force close a connection.
|
||||
* Don't free the connection if it is in the receiving state or
|
||||
* we might get a segfault
|
||||
* we might get a segfault
|
||||
* There is only ever one connection open at a time for a specific host
|
||||
* so its only 'thisReq' we need to worry about. */
|
||||
if ( node_ptr->libEvent_work_fifo_ptr->state != HTTP__RECEIVE )
|
||||
@ -754,12 +783,12 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
if ( node_ptr->libEvent_work_fifo_ptr->state == HTTP__TRANSMIT )
|
||||
{
|
||||
wlog ("%s ... was not executed\n",
|
||||
wlog ("%s ... was not executed\n",
|
||||
node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s ... did not complete (%s)\n",
|
||||
wlog ("%s ... did not complete (%s)\n",
|
||||
node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str(),
|
||||
_get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str());
|
||||
}
|
||||
@ -771,7 +800,7 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr )
|
||||
{
|
||||
qlog ("%s all work done\n", node_ptr->hostname.c_str());
|
||||
}
|
||||
|
||||
|
||||
// node_ptr->libEvent_work_fifo_ptr->state = HTTP__TRANSMIT ;
|
||||
return (PASS);
|
||||
}
|
||||
@ -793,7 +822,7 @@ int nodeLinkClass::workQueue_done ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->libEvent_work_fifo_ptr++ )
|
||||
{
|
||||
/* Don't report work queue timeout if there are only noncritical
|
||||
* commands left in the work queue. Such commands might be
|
||||
* commands left in the work queue. Such commands might be
|
||||
* "update uptime" and "update task" */
|
||||
if ( !node_ptr->libEvent_work_fifo_ptr->noncritical )
|
||||
{
|
||||
@ -862,7 +891,6 @@ bool nodeLinkClass::workQueue_present ( libEvent & event )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wlog ("%s ... not found in work queue\n", event.log_prefix.c_str());
|
||||
return (false);
|
||||
}
|
||||
|
@ -78,6 +78,8 @@ bmc_reset_delay = 300 ; seconds to wait before issuing a bmc
|
||||
; ACK reboot requests. The delay gives
|
||||
; time for crashdumps to complete.
|
||||
|
||||
http_retry_wait = 10 ; secs to wait between http request retries
|
||||
|
||||
[client] ; Client Configuration
|
||||
|
||||
scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99
|
||||
|
Loading…
Reference in New Issue
Block a user