Fix Start Host Services race condition
The following update, merged in early June, introduced a change to the mtcClient to auto-run the Start Host Services command on process startup like it does for the goenable tests. https://opendev.org/starlingx/metal/ commit/1335bc484df331771e995ae822df3af84cc5739d This change introduced the potential for a race condition that did not occur during the testing of that update. Likely due to the low reproduction rate. With that update in place it is possible for maintenbance to receive the acknowlegement of a "Start Host Services" request followed immediately by the "Start Host Services Result" message. Receiving these messages back to back in a batch does not give maintenance enough time to update its command handler with the next expected message. The Command handler is a separate time-sliced FSM that needs to run at least once following the start request's message ack. Otherwise, the result message is dropped which leads to a Start Host Services timeout. The fix is to accept a "Start Host Services Result" response anytime it arrives while a "Start Host Services" request is outstanding. Test Plan: PASS: Verify issue occurs at a rate greater than 75% and then apply this change and verify there are no failures in a lock/unlock soak of 100 iterations. Closes-Bug: 2073802 Change-Id: I657e5fd917073f6c7a37dc13517559a9740a62e9 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
fb36d3b810
commit
fd66519339
@ -4097,7 +4097,7 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg, in
|
||||
* Host Services Request's Response Handling
|
||||
*****************************************************/
|
||||
node_ptr->host_services_req.status = msg.parm[0] ;
|
||||
if ( msg.cmd == node_ptr->host_services_req.cmd )
|
||||
if (( msg.cmd == node_ptr->host_services_req.cmd ) || ( msg.cmd == MTC_CMD_HOST_SVCS_RESULT ))
|
||||
{
|
||||
// print_mtc_message ( &msg, true );
|
||||
|
||||
@ -4122,7 +4122,7 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg, in
|
||||
* services extension. */
|
||||
else if (( msg.num > 1 ) && ( msg.parm[1] == MTC_ENHANCED_HOST_SERVICES ))
|
||||
{
|
||||
dlog ("%s %s request ack\n",
|
||||
ilog ("%s %s request ack",
|
||||
hostname.c_str(),
|
||||
node_ptr->host_services_req.name.c_str());
|
||||
node_ptr->host_services_req.ack = true ;
|
||||
|
@ -294,11 +294,8 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
|
||||
obj_ptr->set_cmd_resp ( hostname , msg, iface ) ;
|
||||
if ( msg.num > 0 )
|
||||
{
|
||||
/* log if not locked message, not start host services result
|
||||
* message and there is an error */
|
||||
if (( msg.cmd != MTC_MSG_LOCKED ) &&
|
||||
( msg.cmd != MTC_CMD_HOST_SVCS_RESULT ) &&
|
||||
( msg.parm[0] ))
|
||||
/* log if not locked message */
|
||||
if ( msg.cmd != MTC_MSG_LOCKED )
|
||||
{
|
||||
ilog ("%s '%s' ACK (rc:%d) (%s)",
|
||||
hostname.c_str(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user