fault/fm-common/sources/fmSocket.h
Eric MacDonald 54f9fed7c3 Set 5 second socket read timeout
FM messaging socket reads that are triggered by FM API calls from
client services have been seen to rarely but occasionally block/stall
the fmManager process. This fmManager stall can then lead to other
client service process stalls which in the case of mtcAgent has been
seen to lead to uncontrolled switch of activity ; aka Swact.

This update adds a 5 second socket read timeout to FM's client services
socket setup to avoid the prolonged blocking cases that lead to Swact
or adversely affect (block) other client service process execution.

Setting a read timeout on Linux sockets is a good programming practice.
Doing so it helps ensure that an application, FM and client services
do not hang indefinitely if a network operation like a socket read
becomes unresponsive.

Configuring a timeout helps manage network communication reliability
and efficiency, especially in applications where responsiveness is
critical. Especially in server-client application such as FM.

Test Plan:

PASS: Verify AIO DX system install.
PASS: Verify blocked socket timeout and error log after 5 seconds.
PASS: Verify unblocked socket reads complete successfully.
PASS: Verify alarm assert/clear functions operate normally.
PASS: Verify set socket timeout failure handling.
PASS: Verify fmManager is not leaking files or memory.
PASS: Verify rook-ceph apply remove 100 loop soak
      - no stall or swact
      - AIO DX
      - with 2 OSDs on each controller

Closes-Bug: 2088025
Change-Id: I1d947bccf9faeedcc2b96c7bc398fbab77b7ae09
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
2024-11-14 11:20:17 +00:00

113 lines
2.4 KiB
C++

//
// Copyright (c) 2014,2023-2024 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef FMSOCKET_H_
#define FMSOCKET_H_
#include "fmMutex.h"
#include "fmDb.h"
#include <vector>
#include <unistd.h>
#include <string>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <map>
#define SOCKET_TIMEOUT_DEFAULT (5)
typedef struct CFmSockAddr_s {
int type;
union {
struct sockaddr_in ipv4;
struct sockaddr_in6 ipv6;
struct sockaddr_un un;
} address;
sockaddr * get_sockaddr() {
return (sockaddr*)(&address);
}
void set_type(socklen_t len);
} CFmSockAddr;
class CFmSocket {
protected:
int m_fd;
int address_family;
void close();
bool create_socket();
public:
CFmSocket();
~CFmSocket();
bool connect(const char *host, int port, int address_family);
static bool write(int fd, const void *data, long len);
static bool write_packet(int fd, const std::vector<char> &data);
static bool write_packet(int fd, const void *data, long len);
static bool read(int fd,void *data, long &len);
static bool read_packet(int fd, std::vector<char> &data);
// read length and then size of exact packet
bool read_packet(std::vector<char> &data);
bool write_packet(const std::vector<char> &data) {
return write_packet(m_fd,data);
}
bool write_packet(const void *data, long len) {
return write_packet(m_fd,data,len);
}
bool read(void *data, long &len);
bool write(const void *data, long len);
int get_fd() { return m_fd; }
bool fd_valid();
bool recvfrom(void *data, long &len, CFmSockAddr &addr );
static int select(int *rfd, int rlen, int *wfds, int wlen,int timeout,int timeoutusec, bool &timedout);
static int select_read(int fd,int timeout, bool &timedout);
};
class FmSocketServer : public CFmSocket {
protected:
std::string server_addr;
int server_port;
typedef struct {
CFmSockAddr addr;
int sock;
} client_conn;
typedef std::map<int,client_conn> conn_map_t;
conn_map_t connections;
void to_sock_array(std::vector<int> &socks);
bool accept();
virtual void handle_socket_data(int fd,std::vector<char> &data,
CFmDBSession &sess);
void add_socket(int sock);
void rm_socket(int sock);
bool good_socket(int sock);
void find_bad_fd();
public:
bool server_sock(const char *bindaddr, int port, int address_family);
bool server_reset();
bool run();
virtual ~FmSocketServer();
};
#endif /* DMSOCKET_H_ */