diff --git a/centos_iso_image.inc b/centos_iso_image.inc index 018e0631..808bee3d 100644 --- a/centos_iso_image.inc +++ b/centos_iso_image.inc @@ -5,7 +5,6 @@ # mtce mtce mtce-pmon -mtce-rmon mtce-hwmon mtce-hostw mtce-lmon diff --git a/devstack/lib/stx-metal b/devstack/lib/stx-metal index ec8ca11f..afd2b3ed 100644 --- a/devstack/lib/stx-metal +++ b/devstack/lib/stx-metal @@ -273,7 +273,6 @@ function install_mtce { local unit_dir=${SYSCONFDIR}/systemd/system local local_etc_pmond=${sysconf_dir}/pmon.d - local local_etc_rmond=${sysconf_dir}/rmon.d local local_etc_goenabledd=${sysconf_dir}/goenabled.d local local_etc_servicesd=${sysconf_dir}/services.d local local_etc_logrotated=${sysconf_dir}/logrotate.d @@ -309,7 +308,6 @@ function install_mtce { sudo install -m 644 -p -D fsmon/scripts/fsmond.conf ${sysconf_dir}/mtc/fsmond.conf sudo install -m 644 -p -D hwmon/scripts/hwmond.conf ${sysconf_dir}/mtc/hwmond.conf sudo install -m 644 -p -D pmon/scripts/pmond.conf ${sysconf_dir}/mtc/pmond.conf - sudo install -m 644 -p -D rmon/scripts/rmond.conf ${sysconf_dir}/mtc/rmond.conf sudo install -m 644 -p -D hostw/scripts/hostwd.conf ${sysconf_dir}/mtc/hostwd.conf sudo install -m 755 -d ${sysconf_dir}/bmc/server_profiles.d @@ -324,12 +322,10 @@ function install_mtce { sudo install -m 755 -p -D heartbeat/hbsClient ${bin_dir}/hbsClient sudo install -m 755 -p -D pmon/pmond ${bin_dir}/pmond sudo install -m 755 -p -D hostw/hostwd ${bin_dir}/hostwd - sudo install -m 755 -p -D rmon/rmond ${bin_dir}/rmond sudo install -m 755 -p -D fsmon/fsmond ${bin_dir}/fsmond sudo install -m 755 -p -D hwmon/hwmond ${bin_dir}/hwmond sudo install -m 755 -p -D mtclog/mtclogd ${bin_dir}/mtclogd sudo install -m 755 -p -D alarm/mtcalarmd ${bin_dir}/mtcalarmd - sudo install -m 755 -p -D rmon/rmon_resource_notify/rmon_resource_notify ${bin_dir}/rmon_resource_notify sudo install -m 755 -p -D scripts/wipedisk ${bin_dir}/wipedisk $STX_SUDO install -m 755 -p -D fsync/fsync ${STX_INST_DIR}/sbin/fsync sudo install -m 700 -p -D pmon/scripts/pmon-restart ${sbin_dir}/pmon-restart @@ -343,7 +339,6 @@ function install_mtce { sudo install -m 755 -p -D fsmon/scripts/fsmon ${sysconf_dir}/init.d/fsmon sudo install -m 755 -p -D scripts/mtclog ${sysconf_dir}/init.d/mtclog sudo install -m 755 -p -D pmon/scripts/pmon ${sysconf_dir}/init.d/pmon - sudo install -m 755 -p -D rmon/scripts/rmon ${sysconf_dir}/init.d/rmon sudo install -m 755 -p -D hostw/scripts/hostw ${sysconf_dir}/init.d/hostw sudo install -m 755 -p -D alarm/scripts/mtcalarm.init ${sysconf_dir}/init.d/mtcalarm @@ -354,7 +349,6 @@ function install_mtce { # systemd service files sudo install -m 644 -p -D fsmon/scripts/fsmon.service ${unit_dir}/devstack@fsmon.service sudo install -m 644 -p -D hwmon/scripts/hwmon.service ${unit_dir}/devstack@hwmon.service - sudo install -m 644 -p -D rmon/scripts/rmon.service ${unit_dir}/devstack@rmon.service sudo install -m 644 -p -D pmon/scripts/pmon.service ${unit_dir}/devstack@pmon.service sudo install -m 644 -p -D hostw/scripts/hostw.service ${unit_dir}/devstack@hostw.service sudo install -m 644 -p -D scripts/mtcClient.service ${unit_dir}/devstack@mtcClient.service @@ -388,31 +382,15 @@ function install_mtce { sudo install -m 644 -p -D pmon/scripts/sshd.conf ${local_etc_pmond}/sshd.conf sudo install -m 644 -p -D pmon/scripts/syslog-ng.conf ${local_etc_pmond}/syslog-ng.conf sudo install -m 644 -p -D pmon/scripts/nslcd.conf ${local_etc_pmond}/nslcd.conf - sudo install -m 644 -p -D rmon/scripts/rmon.conf ${local_etc_pmond}/rmon.conf sudo install -m 644 -p -D fsmon/scripts/fsmon.conf ${local_etc_pmond}/fsmon.conf sudo install -m 644 -p -D scripts/mtclogd.conf ${local_etc_pmond}/mtclogd.conf sudo install -m 644 -p -D alarm/scripts/mtcalarm.pmon.conf ${local_etc_pmond}/mtcalarm.conf - # resource monitor config files - sudo install -m 755 -d ${local_etc_rmond} - sudo install -m 755 -d ${sysconf_dir}/rmonapi.d - sudo install -m 755 -d ${sysconf_dir}/rmonfiles.d - sudo install -m 755 -d ${sysconf_dir}/rmon_interfaces.d - sudo install -m 644 -p -D rmon/scripts/remotelogging_resource.conf ${local_etc_rmond}/remotelogging_resource.conf - sudo install -m 644 -p -D rmon/scripts/cinder_virtual_resource.conf ${local_etc_rmond}/cinder_virtual_resource.conf - sudo install -m 644 -p -D rmon/scripts/nova_virtual_resource.conf ${local_etc_rmond}/nova_virtual_resource.conf - sudo install -m 644 -p -D rmon/scripts/oam_resource.conf ${sysconf_dir}/rmon_interfaces.d/oam_resource.conf - sudo install -m 644 -p -D rmon/scripts/management_resource.conf ${sysconf_dir}/rmon_interfaces.d/management_resource.conf - sudo install -m 644 -p -D rmon/scripts/infrastructure_resource.conf ${sysconf_dir}/rmon_interfaces.d/infrastructure_resource.conf - # sudo install -m 755 -p -D rmon/scripts/query_ntp_servers.sh ${sysconf_dir}/rmonfiles.d/query_ntp_servers.sh - sudo install -m 755 -p -D rmon/scripts/rmon_reload_on_cpe.sh ${local_etc_goenabledd}/rmon_reload_on_cpe.sh - # log rotation sudo install -m 755 -d ${local_etc_logrotated} sudo install -m 644 -p -D scripts/mtce.logrotate ${local_etc_logrotated}/mtce.logrotate sudo install -m 644 -p -D hostw/scripts/hostw.logrotate ${local_etc_logrotated}/hostw.logrotate sudo install -m 644 -p -D pmon/scripts/pmon.logrotate ${local_etc_logrotated}/pmon.logrotate - sudo install -m 644 -p -D rmon/scripts/rmon.logrotate ${local_etc_logrotated}/rmon.logrotate sudo install -m 644 -p -D fsmon/scripts/fsmon.logrotate ${local_etc_logrotated}/fsmon.logrotate sudo install -m 644 -p -D hwmon/scripts/hwmon.logrotate ${local_etc_logrotated}/hwmon.logrotate sudo install -m 644 -p -D alarm/scripts/mtcalarm.logrotate ${local_etc_logrotated}/mtcalarm.logrotate @@ -420,15 +398,11 @@ function install_mtce { # software development files $STX_SUDO install -m 644 -p -D heartbeat/mtceHbsCluster.h ${inc_dir}/mtceHbsCluster.h $STX_SUDO install -m 755 -p -D public/libamon.so.${major} ${lib64_dir}/libamon.so.${major} - $STX_SUDO install -m 755 -p -D rmon/rmonApi/librmonapi.so.${major} ${lib64_dir}/librmonapi.so.${major} popd pushd ${lib64_dir} $STX_SUDO ln -sf libamon.so.${major} libamon.so.${major}.${minor} $STX_SUDO ln -sf libamon.so.${major} libamon.so - - $STX_SUDO ln -sf librmonapi.so.${major} librmonapi.so.${major}.${minor} - $STX_SUDO ln -sf librmonapi.so.${major} librmonapi.so popd } @@ -572,9 +546,6 @@ function start_maintenance { if is_service_enabled mtce; then start_mtcClient fi - if is_service_enabled rmon; then - start_rmon - fi if is_service_enabled mtclog; then start_mtclog fi @@ -604,10 +575,6 @@ function start_pmon { run_process pmon "${SYSCONFDIR}/rc.d/init.d/pmon start" root root } -function start_rmon { - run_process rmon "${SYSCONFDIR}/rc.d/init.d/rmon start" root root -} - function start_mtclog { run_process mtclog "${SYSCONFDIR}/rc.d/init.d/mtclog start" root root } @@ -678,10 +645,6 @@ function stop_hbsClient { stop_process hbsClient } -function stop_rmon { - stop_process rmon -} - function stop_mtclog { stop_process mtclog } @@ -710,9 +673,6 @@ function stop_maintenance { if is_service_enabled hbs; then stop_hbsClient fi - if is_service_enabled rmon; then - stop_rmon - fi if is_service_enabled mtclog; then stop_mtclog fi @@ -756,7 +716,6 @@ function cleanup_metal { local unit_dir=${SYSCONFDIR}/systemd/system local local_etc_pmond=${sysconf_dir}/pmon.d - local local_etc_rmond=${sysconf_dir}/rmon.d local local_etc_goenabledd=${sysconf_dir}/goenabled.d local local_etc_servicesd=${sysconf_dir}/services.d local local_etc_logrotated=${sysconf_dir}/logrotate.d @@ -769,7 +728,6 @@ function cleanup_metal { sudo rm -rf ${sysconf_dir}/mtc/fsmond.conf sudo rm -rf ${sysconf_dir}/mtc/hwmond.conf sudo rm -rf ${sysconf_dir}/mtc/pmond.conf - sudo rm -rf ${sysconf_dir}/mtc/rmond.conf sudo rm -rf ${sysconf_dir}/mtc/hostwd.conf sudo rm -rf ${sysconf_dir}/bmc/server_profiles.d/sensor_hp360_v1_ilo_v4.profile @@ -783,12 +741,10 @@ function cleanup_metal { sudo rm -rf ${bin_dir}/hbsClient sudo rm -rf ${bin_dir}/pmond sudo rm -rf ${bin_dir}/hostwd - sudo rm -rf ${bin_dir}/rmond sudo rm -rf ${bin_dir}/fsmond sudo rm -rf ${bin_dir}/hwmond sudo rm -rf ${bin_dir}/mtclogd sudo rm -rf ${bin_dir}/mtcalarmd - sudo rm -rf ${bin_dir}/rmon_resource_notify sudo rm -rf ${bin_dir}/wipedisk rm -rf ${STX_INST_DIR}/sbin/fsync sudo rm -rf ${sbin_dir}/pmon-restart @@ -802,7 +758,6 @@ function cleanup_metal { sudo rm -rf ${sysconf_dir}/init.d/fsmon sudo rm -rf ${sysconf_dir}/init.d/mtclog sudo rm -rf ${sysconf_dir}/init.d/pmon - sudo rm -rf ${sysconf_dir}/init.d/rmon sudo rm -rf ${sysconf_dir}/init.d/hostw sudo rm -rf ${sysconf_dir}/init.d/mtcalarm @@ -813,7 +768,6 @@ function cleanup_metal { # systemd service files sudo rm -rf ${unit_dir}/devstack@fsmon.service sudo rm -rf ${unit_dir}/devstack@hwmon.service - sudo rm -rf ${unit_dir}/devstack@rmon.service sudo rm -rf ${unit_dir}/devstack@pmon.service sudo rm -rf ${unit_dir}/devstack@hostw.service sudo rm -rf ${unit_dir}/devstack@mtcClient.service @@ -842,26 +796,14 @@ function cleanup_metal { sudo rm -rf ${local_etc_pmond}/sshd.conf sudo rm -rf ${local_etc_pmond}/syslog-ng.conf sudo rm -rf ${local_etc_pmond}/nslcd.conf - sudo rm -rf ${local_etc_pmond}/rmon.conf sudo rm -rf ${local_etc_pmond}/fsmon.conf sudo rm -rf ${local_etc_pmond}/mtclogd.conf sudo rm -rf ${local_etc_pmond}/mtcalarm.conf - # resource monitor config files - sudo rm -rf ${local_etc_rmond}/remotelogging_resource.conf - sudo rm -rf ${local_etc_rmond}/cinder_virtual_resource.conf - sudo rm -rf ${local_etc_rmond}/nova_virtual_resource.conf - sudo rm -rf ${sysconf_dir}/rmon_interfaces.d/oam_resource.conf - sudo rm -rf ${sysconf_dir}/rmon_interfaces.d/management_resource.conf - sudo rm -rf ${sysconf_dir}/rmon_interfaces.d/infrastructure_resource.conf - sudo rm -rf ${sysconf_dir}/rmonfiles.d/query_ntp_servers.sh - sudo rm -rf ${local_etc_goenabledd}/rmon_reload_on_cpe.sh - # log rotation sudo rm -rf ${local_etc_logrotated}/mtce.logrotate sudo rm -rf ${local_etc_logrotated}/hostw.logrotate sudo rm -rf ${local_etc_logrotated}/pmon.logrotate - sudo rm -rf ${local_etc_logrotated}/rmon.logrotate sudo rm -rf ${local_etc_logrotated}/fsmon.logrotate sudo rm -rf ${local_etc_logrotated}/hwmon.logrotate sudo rm -rf ${local_etc_logrotated}/mtcalarm.logrotate @@ -869,14 +811,10 @@ function cleanup_metal { # software development files $STX_SUDO rm -rf ${inc_dir}/mtceHbsCluster.h $STX_SUDO rm -rf ${lib64_dir}/libamon.so.${major} - $STX_SUDO rm -rf ${lib64_dir}/librmonapi.so.${major} $STX_SUDO rm -rf ${lib64_dir}/libamon.so.${major}.${minor} $STX_SUDO rm -rf ${lib64_dir}/libamon.so - $STX_SUDO rm -rf ${lib64_dir}/librmonapi.so.${major}.${minor} - $STX_SUDO rm -rf ${lib64_dir}/librmonapi.so - #remove mtce_common local inc_dir_common=${STX_INST_DIR}/include/mtce-common local inc_dir_daemon=${STX_INST_DIR}/include/mtce-daemon diff --git a/devstack/settings b/devstack/settings index 42fb19fc..150e6eff 100644 --- a/devstack/settings +++ b/devstack/settings @@ -27,7 +27,6 @@ # mtcalarm # mtclog # pmon -# rmon STX_METAL_NAME=stx-metal @@ -41,7 +40,7 @@ define_plugin stx-metal plugin_requires stx-metal stx-fault if is_service_enabled mtce-components; then - enable_service fsmon hbs hwmon mtce mtcalarm mtclog pmon rmon + enable_service fsmon hbs hwmon mtce mtcalarm mtclog pmon fi # Be careful to enable hostw, it will restart your host diff --git a/mtce-common/src/common/fitCodes.h b/mtce-common/src/common/fitCodes.h index 308f1d2a..1643de97 100644 --- a/mtce-common/src/common/fitCodes.h +++ b/mtce-common/src/common/fitCodes.h @@ -47,7 +47,6 @@ #define MTC_CMD_FIT__MGMNT_TXSOCK ("/var/run/fit/mgmnt_txsock") /* mtcClient */ #define MTC_CMD_FIT__INFRA_RXSOCK ("/var/run/fit/infra_rxsock") /* mtcClient */ #define MTC_CMD_FIT__INFRA_TXSOCK ("/var/run/fit/infra_txsock") /* mtcClient */ -#define MTC_CMD_FIT__RMON_SOCK ("/var/run/fit/rmon_sock") /* mtcClient */ #define MTC_CMD_FIT__AMON_SOCK ("/var/run/fit/amon_sock") /* mtcClient */ #define MTC_CMD_FIT__NO_INFRA_RSP ("/var/run/fit/no_infra_rsp") /* hbsClient */ #define MTC_CMD_FIT__NO_MGMNT_RSP ("/var/run/fit/no_mgmnt_rsp") /* hbsClient */ diff --git a/mtce-common/src/common/logMacros.h b/mtce-common/src/common/logMacros.h index b0412088..e83ab5f9 100644 --- a/mtce-common/src/common/logMacros.h +++ b/mtce-common/src/common/logMacros.h @@ -109,10 +109,6 @@ typedef struct int pmon_event_port ; /**< process monitor tx event port */ int pmon_pulse_port ; /**< process Monitor I'm Alive pulse port */ int pmon_cmd_port ; /**< process Monitor command receive port */ - int rmon_api_tx_port ; /**< resource monitor api tx port */ - int rmon_event_port ; /**< resource monitor api event port */ - int rmon_critical_thr ; /**< resmon critical threshold in use */ - int rmon_tx_port ; /**< resource monitor tx event port */ int log_step ; /**< used to throttle logging at step rate */ int event_port ; /**< daemon specific event tx port */ int cmd_port ; /**< daemon specific command rx port */ diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp index 1baee961..bb15979d 100755 --- a/mtce-common/src/common/nodeBase.cpp +++ b/mtce-common/src/common/nodeBase.cpp @@ -201,14 +201,6 @@ const char * get_mtcNodeCommand_str ( int cmd ) case MTC_EVENT_PMON_LOG: return("pmon log"); case MTC_EVENT_PMOND_RAISE: return("pmon raise"); - /* rmon events */ - case MTC_EVENT_RMON_READY: return("rmon ready event"); - case MTC_EVENT_RMON_CLEAR: return("rmon clear"); - case MTC_EVENT_RMON_CRIT: return("rmon critical event"); - case MTC_EVENT_RMON_MAJOR: return("rmon major event"); - case MTC_EVENT_RMON_MINOR: return("rmon minor event"); - case MTC_EVENT_RMON_LOG: return("rmon log"); - /* data port events */ case MTC_EVENT_AVS_CLEAR: return("AVS clear"); case MTC_EVENT_AVS_MAJOR: return("AVS major"); @@ -246,7 +238,6 @@ const char * get_mtcNodeCommand_str ( int cmd ) /* service events */ case MTC_SERVICE_PMOND: return ("pmond service"); - case MTC_SERVICE_RMOND: return ("rmond service"); case MTC_SERVICE_HWMOND: return ("hwmond service"); case MTC_SERVICE_HEARTBEAT: return ("heartbeat service"); default: diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index c154653b..1d4730d0 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -396,12 +396,6 @@ void daemon_exit ( void ); #define MAX_MTCE_EVENT_NAME_LEN 64 #define MAX_RESOURCE_NAME_LEN 64 -/** RMON message codes **/ -#define RMON_CRITICAL (3) -#define RMON_MAJOR (2) -#define RMON_MINOR (1) -#define RMON_CLEAR (0) - /** Interface Codes **/ #define MGMNT_INTERFACE (0) #define INFRA_INTERFACE (1) @@ -545,7 +539,6 @@ typedef struct #define MTC_CMD_NOTIFY_INST (0x11110025) /* Notify Inst */ #define MTC_SERVICE_PMOND (0xB00BF00D) -#define MTC_SERVICE_RMOND (0xFAABF00D) #define MTC_SERVICE_HWMOND (0xF00BF00D) #define MTC_SERVICE_HEARTBEAT (0xBABEF00D) @@ -564,9 +557,6 @@ typedef struct /* Generic Monitor Service ready event */ #define MTC_EVENT_MONITOR_READY (0xf0f0f0f0) -/* TODO: Obsolete code */ -#define MTC_EVENT_RMON_READY (0x0f0f0f0f) - /** Process Monitor Event codes */ #define MTC_EVENT_PMON_CLEAR (0x02020202) /**< Clear Action */ #define MTC_EVENT_PMON_CRIT (0x04040404) /**< Crit Failed Action */ @@ -574,13 +564,6 @@ typedef struct #define MTC_EVENT_PMON_MINOR (0x08080808) /**< Minor Log action */ #define MTC_EVENT_PMON_LOG (0x03030303) /**< Minor Log action */ -/** Process Monitor Event codes */ -#define MTC_EVENT_RMON_CLEAR (0x10101010) /**< Clear Action */ -#define MTC_EVENT_RMON_CRIT (0x20202020) /**< Crit Failed Action */ -#define MTC_EVENT_RMON_MAJOR (0x30303030) /**< Major Degrade Action */ -#define MTC_EVENT_RMON_MINOR (0x40404040) /**< Minor Log action */ -#define MTC_EVENT_RMON_LOG (0x50505050) /**< Minor Log action */ - /** Process Monitor Daemon Running - Event Raise / Clear Codes */ #define MTC_EVENT_PMOND_CLEAR (0x06060606) #define MTC_EVENT_PMOND_RAISE (0x07070707) diff --git a/mtce-common/src/common/nodeUtil.cpp b/mtce-common/src/common/nodeUtil.cpp index a7aaed24..cc0cd504 100755 --- a/mtce-common/src/common/nodeUtil.cpp +++ b/mtce-common/src/common/nodeUtil.cpp @@ -1368,26 +1368,21 @@ string get_event_str ( int event_code ) { switch ( event_code ) { - case MTC_EVENT_RMON_READY: case MTC_EVENT_MONITOR_READY: return "ready" ; case MTC_EVENT_PMOND_CLEAR: case MTC_EVENT_PMON_CLEAR: - case MTC_EVENT_RMON_CLEAR: case MTC_EVENT_HWMON_CLEAR: return "clear" ; case MTC_EVENT_PMON_CRIT: - case MTC_EVENT_RMON_CRIT: case MTC_EVENT_HWMON_CRIT: return "critical" ; case MTC_EVENT_PMON_LOG: return "log" ; case MTC_EVENT_PMON_MAJOR: - case MTC_EVENT_RMON_MAJOR: case MTC_EVENT_HWMON_MAJOR: return "major" ; case MTC_EVENT_PMON_MINOR: - case MTC_EVENT_RMON_MINOR: case MTC_EVENT_HWMON_MINOR: return "minor" ; case MTC_EVENT_HWMON_CONFIG: diff --git a/mtce-common/src/daemon/daemon_common.h b/mtce-common/src/daemon/daemon_common.h index efd48d3f..743fd2dd 100755 --- a/mtce-common/src/daemon/daemon_common.h +++ b/mtce-common/src/daemon/daemon_common.h @@ -232,7 +232,6 @@ int daemon_run_testhead ( void ); #define CONFIG_CLIENT_PULSE_PORT 0x10000000 /**< Pmon pulse port */ #define CONFIG_AGENT_SECRET_PORT 0x20000000 /**< Barbican HTTP port */ #define CONFIG_AGENT_VIM_EVENT_PORT 0x40000000 /**< VIM Event Port Mask */ -#define CONFIG_CLIENT_RMON_PORT 0x80000000 /**< Rmon client port */ #define CONFIG_AGENT_PORT CONFIG_AGENT_MTC_MGMNT_PORT #define CONFIG_CLIENT_PORT CONFIG_CLIENT_MTC_MGMNT_PORT diff --git a/mtce-common/src/daemon/daemon_config.cpp b/mtce-common/src/daemon/daemon_config.cpp index 0fb71094..ee6bda3b 100644 --- a/mtce-common/src/daemon/daemon_config.cpp +++ b/mtce-common/src/daemon/daemon_config.cpp @@ -342,7 +342,6 @@ void daemon_dump_cfg ( void ) if ( ptr->hbs_to_mtc_event_port) { ilog ("hbs_to_mtc_event_port = %d\n", ptr->hbs_to_mtc_event_port);} if ( ptr->inv_event_port ) { ilog ("inv_event_port = %d\n", ptr->inv_event_port );} - /* rmond */ if ( ptr->per_node ) { ilog ("per_node = %d\n", ptr->per_node );} if ( ptr->audit_period ) { ilog ("audit_period = %d\n", ptr->audit_period );} if ( ptr->pm_period ) { ilog ("pm_period = %d\n", ptr->pm_period );} @@ -350,10 +349,6 @@ void daemon_dump_cfg ( void ) if ( ptr->pmon_amon_port ) { ilog ("pmon_amon_port = %d\n", ptr->pmon_amon_port );} if ( ptr->pmon_event_port ) { ilog ("pmon_event_port = %d\n", ptr->pmon_event_port );} if ( ptr->pmon_pulse_port ) { ilog ("pmon_pulse_port = %d\n", ptr->pmon_pulse_port );} - if ( ptr->rmon_api_tx_port ) { ilog ("rmon_api_tx_port = %d\n", ptr->rmon_api_tx_port );} - if ( ptr->rmon_event_port ) { ilog ("rmon_event_port = %d\n", ptr->rmon_event_port );} - if ( ptr->rmon_critical_thr ) { ilog ("rmon_critical_thr = %d\n", ptr->rmon_critical_thr );} - if ( ptr->rmon_tx_port ) { ilog ("rmon_tx_port = %d\n", ptr->rmon_tx_port );} if ( ptr->event_port ) { ilog ("event_port = %d\n", ptr->event_port );} if ( ptr->cmd_port ) { ilog ("cmd_port = %d\n", ptr->cmd_port );} if ( ptr->sensor_port ) { ilog ("sensor_port = %d\n", ptr->sensor_port );} diff --git a/mtce/centos/mtce.spec b/mtce/centos/mtce.spec index bf79a421..e85a7b14 100644 --- a/mtce/centos/mtce.spec +++ b/mtce/centos/mtce.spec @@ -32,7 +32,6 @@ Requires: /bin/bash Requires: /bin/systemctl Requires: dpkg Requires: time -Requires: mtce-rmon >= 1.0 Requires: libevent-2.0.so.5()(64bit) Requires: expect Requires: libfmcommon.so.1()(64bit) @@ -49,7 +48,6 @@ Requires: libc.so.6(GLIBC_2.3)(64bit) Requires: libc.so.6(GLIBC_2.14)(64bit) Requires: libjson-c.so.2()(64bit) Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) -Requires: librmonapi.so.1()(64bit) Requires: librt.so.1(GLIBC_2.3.3)(64bit) Requires: libgcc_s.so.1(GCC_3.0)(64bit) Requires: libstdc++.so.6(CXXABI_1.3)(64bit) @@ -80,9 +78,7 @@ Monitor service (pmond) add both passive and active process monitoring and automatic recovery of stopped or killed processes. The File System Monitor Service (fsmond) adds detection and reporting of local file system problems. The Hardware Monitor Service (hwmond) adds present and predictive -hardware failure detection, reporting and recovery. The Resource Monitor -Service (rmond) adds resource monitoring with present and predictive -failure and overload detection and reporting. +hardware failure detection, reporting and recovery. The Host Watchdog (hostwd) daemon watches for errors in pmond and logs system information on error. All of these maintenance services improve MTTD of node failures as well as resource overload and out @@ -138,48 +134,6 @@ Titanium Cloud Maintenance Process Monitor service (pmond) with passive (pid), active (msg) and status (qry) process monitoring with automatic recovery and failure reporting of registered failed processes. -%package -n mtce-rmon -Summary: Titanuim Server Maintenance Resource Monitor Package -Group: base -Requires: /bin/bash -Requires: util-linux -Requires: /bin/systemctl -Requires: dpkg -Requires: time -Requires: libjson-c.so.2()(64bit) -Requires: libstdc++.so.6(CXXABI_1.3)(64bit) -Requires: libevent-2.0.so.5()(64bit) -Requires: libfmcommon.so.1()(64bit) -Requires: librmonapi.so.1()(64bit) -Requires: fm-common >= 1.0 -Requires: libc.so.6(GLIBC_2.2.5)(64bit) -Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) -Requires: /bin/sh -Requires: librt.so.1()(64bit) -Requires: libc.so.6(GLIBC_2.3)(64bit) -Requires: libc.so.6(GLIBC_2.14)(64bit) -Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) -Requires: librt.so.1(GLIBC_2.3.3)(64bit) -Requires: libgcc_s.so.1(GCC_3.0)(64bit) -Requires: libevent >= 2.0.21 -Requires: librt.so.1(GLIBC_2.2.5)(64bit) -Requires: libuuid.so.1()(64bit) -Requires: libm.so.6()(64bit) -Requires: rtld(GNU_HASH) -Requires: libstdc++.so.6()(64bit) -Requires: libc.so.6()(64bit) -Requires: libgcc_s.so.1()(64bit) -Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) -Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) -Requires: libpthread.so.0()(64bit) -Provides: librmonapi.so.1()(64bit) - -%description -n mtce-rmon -Titanium Cloud Host Maintenance Resource Monitor Service (rmond) adds -threshold based monitoring with predictive severity level alarming for -out of tolerance utilization of critical resourses such as memory, cpu -file system, etc. - %package -n mtce-hwmon Summary: Titanuim Server Maintenance Hardware Monitor Package Group: base @@ -255,9 +209,7 @@ Monitor service (pmond) add both passive and active process monitoring and automatic recovery of stopped or killed processes. The File System Monitor Service (fsmond) adds detection and reporting of local file system problems. The Hardware Monitor Service (hwmond) adds present and predictive -hardware failure detection, reporting and recovery. The Resource Monitor -Service (rmond) adds resource monitoring with present and predictive -failure and overload detection and reporting. The Guest Services +hardware failure detection, reporting and recovery. The Guest Services (guestAgent/guestServer) daemons control access into and heartbeat of guest VMs on the worker. The Host Watchdog (hostwd) daemon watches for errors in pmond and logs system information on error. All of these maintenance @@ -303,7 +255,6 @@ netlink monitoring for provisioned oam, mgmt and infra interfaces. %define local_bindir %{local_dir}/bin %define local_sbindir %{local_dir}/sbin %define local_etc_pmond %{_sysconfdir}/pmon.d -%define local_etc_rmond %{_sysconfdir}/rmon.d %define local_etc_goenabledd %{_sysconfdir}/goenabled.d %define local_etc_servicesd %{_sysconfdir}/services.d %define local_etc_logrotated %{_sysconfdir}/logrotate.d @@ -355,7 +306,6 @@ install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmond.conf %{buildroot}%{_sy install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmond.conf %{buildroot}%{_sysconfdir}/mtc/hwmond.conf install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmond.conf %{buildroot}%{_sysconfdir}/mtc/pmond.conf install -m 644 -p -D %{_buildsubdir}/lmon/scripts/lmond.conf %{buildroot}%{_sysconfdir}/mtc/lmond.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmond.conf %{buildroot}%{_sysconfdir}/mtc/rmond.conf install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostwd.conf %{buildroot}%{_sysconfdir}/mtc/hostwd.conf install -m 755 -d %{buildroot}/%{_sysconfdir}/etc/bmc/server_profiles.d @@ -371,12 +321,10 @@ install -m 755 -p -D %{_buildsubdir}/heartbeat/hbsClient %{buildroot}/%{local_bi install -m 755 -p -D %{_buildsubdir}/pmon/pmond %{buildroot}/%{local_bindir}/pmond install -m 755 -p -D %{_buildsubdir}/lmon/lmond %{buildroot}/%{local_bindir}/lmond install -m 755 -p -D %{_buildsubdir}/hostw/hostwd %{buildroot}/%{local_bindir}/hostwd -install -m 755 -p -D %{_buildsubdir}/rmon/rmond %{buildroot}/%{local_bindir}/rmond install -m 755 -p -D %{_buildsubdir}/fsmon/fsmond %{buildroot}/%{local_bindir}/fsmond install -m 755 -p -D %{_buildsubdir}/hwmon/hwmond %{buildroot}/%{local_bindir}/hwmond install -m 755 -p -D %{_buildsubdir}/mtclog/mtclogd %{buildroot}/%{local_bindir}/mtclogd install -m 755 -p -D %{_buildsubdir}/alarm/mtcalarmd %{buildroot}/%{local_bindir}/mtcalarmd -install -m 755 -p -D %{_buildsubdir}/rmon/rmon_resource_notify/rmon_resource_notify %{buildroot}/%{local_bindir}/rmon_resource_notify install -m 755 -p -D %{_buildsubdir}/scripts/wipedisk %{buildroot}/%{local_bindir}/wipedisk install -m 755 -p -D %{_buildsubdir}/fsync/fsync %{buildroot}/%{_sbindir}/fsync install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-restart %{buildroot}/%{local_sbindir}/pmon-restart @@ -391,7 +339,6 @@ install -m 755 -p -D %{_buildsubdir}/fsmon/scripts/fsmon %{buildroot}%{_sysconfd install -m 755 -p -D %{_buildsubdir}/scripts/mtclog %{buildroot}%{_sysconfdir}/init.d/mtclog install -m 755 -p -D %{_buildsubdir}/pmon/scripts/pmon %{buildroot}%{_sysconfdir}/init.d/pmon install -m 755 -p -D %{_buildsubdir}/lmon/scripts/lmon %{buildroot}%{_sysconfdir}/init.d/lmon -install -m 755 -p -D %{_buildsubdir}/rmon/scripts/rmon %{buildroot}%{_sysconfdir}/init.d/rmon install -m 755 -p -D %{_buildsubdir}/hostw/scripts/hostw %{buildroot}%{_sysconfdir}/init.d/hostw install -m 755 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.init %{buildroot}%{_sysconfdir}/init.d/mtcalarm @@ -404,7 +351,6 @@ install -m 644 -p -D %{_buildsubdir}/scripts/hwclock.service %{buildroot}%{_unit # systemd service files install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.service %{buildroot}%{_unitdir}/fsmon.service install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.service %{buildroot}%{_unitdir}/hwmon.service -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.service %{buildroot}%{_unitdir}/rmon.service install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.service %{buildroot}%{_unitdir}/pmon.service install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.service %{buildroot}%{_unitdir}/hostw.service install -m 644 -p -D %{_buildsubdir}/scripts/mtcClient.service %{buildroot}%{_unitdir}/mtcClient.service @@ -440,28 +386,17 @@ install -m 644 -p -D %{_buildsubdir}/pmon/scripts/acpid.conf %{buildroot}%{local install -m 644 -p -D %{_buildsubdir}/pmon/scripts/sshd.conf %{buildroot}%{local_etc_pmond}/sshd.conf install -m 644 -p -D %{_buildsubdir}/pmon/scripts/syslog-ng.conf %{buildroot}%{local_etc_pmond}/syslog-ng.conf install -m 644 -p -D %{_buildsubdir}/pmon/scripts/nslcd.conf %{buildroot}%{local_etc_pmond}/nslcd.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.conf %{buildroot}%{local_etc_pmond}/rmon.conf install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.conf %{buildroot}%{local_etc_pmond}/fsmon.conf install -m 644 -p -D %{_buildsubdir}/scripts/mtclogd.conf %{buildroot}%{local_etc_pmond}/mtclogd.conf install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.pmon.conf %{buildroot}%{local_etc_pmond}/mtcalarm.conf install -m 644 -p -D %{_buildsubdir}/lmon/scripts/lmon.pmon.conf %{buildroot}%{local_etc_pmond}/lmon.conf -# resource monitor config files -install -m 755 -d %{buildroot}%{local_etc_rmond} -install -m 755 -d %{buildroot}%{_sysconfdir}/rmonapi.d -install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf -install -m 755 -p -D %{_buildsubdir}/rmon/scripts/rmon_reload_on_cpe.sh %{buildroot}%{local_etc_goenabledd}/rmon_reload_on_cpe.sh - # log rotation install -m 755 -d %{buildroot}%{_sysconfdir}/logrotate.d install -m 644 -p -D %{_buildsubdir}/scripts/mtce.logrotate %{buildroot}%{local_etc_logrotated}/mtce.logrotate install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.logrotate %{buildroot}%{local_etc_logrotated}/hostw.logrotate install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.logrotate %{buildroot}%{local_etc_logrotated}/pmon.logrotate install -m 644 -p -D %{_buildsubdir}/lmon/scripts/lmon.logrotate %{buildroot}%{local_etc_logrotated}/lmon.logrotate -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.logrotate %{buildroot}%{local_etc_logrotated}/rmon.logrotate install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.logrotate %{buildroot}%{local_etc_logrotated}/fsmon.logrotate install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.logrotate %{buildroot}%{local_etc_logrotated}/hwmon.logrotate install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.logrotate %{buildroot}%{local_etc_logrotated}/mtcalarm.logrotate @@ -473,10 +408,6 @@ install -m 755 -p -D %{_buildsubdir}/public/libamon.so.$MAJOR %{buildroot}%{_lib cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so.$MAJOR.$MINOR cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so -install -m 755 -p -D %{_buildsubdir}/rmon/rmonApi/librmonapi.so.$MAJOR %{buildroot}%{_libdir}/librmonapi.so.$MAJOR -cd %{buildroot}%{_libdir} ; ln -s librmonapi.so.$MAJOR librmonapi.so.$MAJOR.$MINOR -cd %{buildroot}%{_libdir} ; ln -s librmonapi.so.$MAJOR librmonapi.so - # volatile directories install -m 755 -d %{buildroot}/var install -m 755 -d %{buildroot}/var/run @@ -498,9 +429,6 @@ install -m 755 -d %{buildroot}/var/run %post -n mtce-pmon /bin/systemctl enable pmon.service -%post -n mtce-rmon -/bin/systemctl enable rmon.service - %post -n mtce-lmon /bin/systemctl enable lmon.service @@ -604,34 +532,6 @@ install -m 755 -d %{buildroot}/var/run %{_sysconfdir}/init.d/pmon %{local_bindir}/pmond -############################### -# Resource Monitor RPM Files -############################### -%files -n mtce-rmon -%defattr(-,root,root,-) - -# Config files - Non-Modifiable -%{_sysconfdir}/mtc/rmond.conf - -%{local_etc_pmond}/rmon.conf -%{local_etc_logrotated}/rmon.logrotate -%{_unitdir}/rmon.service - -%{local_etc_rmond}/remotelogging_resource.conf -%{local_etc_rmond}/cinder_virtual_resource.conf -%{local_etc_rmond}/nova_virtual_resource.conf - -%{_libdir}/librmonapi.so.1.0 -%{_libdir}/librmonapi.so.1 -%{_libdir}/librmonapi.so - -%dir %{_sysconfdir}/rmonapi.d - -%{_sysconfdir}/init.d/rmon -%{local_bindir}/rmond -%{local_bindir}/rmon_resource_notify -%{local_etc_goenabledd}/rmon_reload_on_cpe.sh - ############################### # Hardware Monitor RPM Files ############################### diff --git a/mtce/src/Makefile b/mtce/src/Makefile index 68db5444..a9ec0e22 100755 --- a/mtce/src/Makefile +++ b/mtce/src/Makefile @@ -9,7 +9,6 @@ VER_MJR=1 build: (cd public ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) - (cd rmon/rmonApi ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) (cd common ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) (cd alarm ; make build VER=$(VER) VER_MJR=$(VER_MJR)) (cd heartbeat ; make build VER=$(VER) VER_MJR=$(VER_MJR)) @@ -19,15 +18,12 @@ build: (cd lmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) (cd pmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) (cd fsmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) - (cd rmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) - (cd rmon/rmon_resource_notify ; make build VER=$(VER) VER_MJR=$(VER_MJR)) (cd hostw ; make build VER=$(VER) VER_MJR=$(VER_MJR)) (cd fsync ; make build VER=$(VER) VER_MJR=$(VER_MJR)) clean: @( cd common ; make clean ) @( cd public ; make clean ) - @( cd rmon/rmonApi ; make clean ) @( cd alarm ; make clean ) @( cd mtclog ; make clean ) @( cd hwmon ; make clean ) @@ -36,8 +32,6 @@ clean: @( cd fsmon ; make clean ) @( cd heartbeat ; make clean ) @( cd maintenance ; make clean ) - @( cd rmon ; make clean ) - @( cd rmon/rmon_resource_notify ; make clean ) @( cd hostw ; make clean ) @( cd fsync ; make clean ) @( rm -rf release ) diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index 49c73d21..96749319 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -691,9 +691,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->degrade_mask = ptr->degrade_mask_save = DEGRADE_MASK_NONE ; - ptr->degraded_resources_list.clear () ; ptr->pmond_ready = false ; - ptr->rmond_ready = false ; ptr->hwmond_ready = false ; ptr->hbsClient_ready = false ; @@ -4766,12 +4764,6 @@ int nodeLinkClass::declare_service_ready ( string & hostname, } return (PASS); } - else if ( service == MTC_SERVICE_RMOND ) - { - node_ptr->rmond_ready = true ; - plog ("%s got rmond ready event\n", hostname.c_str()); - return (PASS); - } else if ( service == MTC_SERVICE_HEARTBEAT ) { if ( node_ptr->hbsClient_ready == false ) @@ -4857,73 +4849,6 @@ int nodeLinkClass::collectd_notify_handler ( string & hostname, return (rc); } -/** Resource Monitor 'Clear' Event handler. - * - * The resource specified will be removed from the - * 'degraded_resources_list' for specified host. - * if there are no other degraded resources or other - * degraded services/reasons against that host then - * this handler will clear the degrade state for the - * specified host all together. */ -int nodeLinkClass::degrade_resource_clear ( string & hostname, - string & resource ) -{ - /* lr - Log Prefix Rmon */ - string lr = hostname ; - lr.append (" rmond:"); - - nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); - if ( node_ptr == NULL ) - { - wlog ("%s Unknown Host\n", lr.c_str()); - return FAIL_UNKNOWN_HOSTNAME ; - } - else if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) - { - /* Clear all resource degrade conditions if there is no resource specified */ - /* this is used as a cleanup audit just in case things get stuck */ - if ( resource.empty() ) - { - node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; - node_ptr->degraded_resources_list.clear () ; - } - else if (( node_ptr->degraded_resources_list.empty()) || - ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )) - { - dlog ("%s '%s' Non-Degraded Clear\n", - lr.c_str(), resource.c_str()); - } - else - { - if (is_string_in_string_list (node_ptr->degraded_resources_list, resource)) - { - node_ptr->degraded_resources_list.remove(resource); - ilog ("%s '%s' Degrade Clear\n", - lr.c_str(), resource.c_str()); - } - else - { - wlog ("%s '%s' Unexpected Degrade Clear\n", - lr.c_str(), resource.c_str()); - } - - if ( node_ptr->degraded_resources_list.empty() ) - { - node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ; - } - else - { - string degraded_resources = - get_strings_in_string_list ( node_ptr->degraded_resources_list ); - wlog ("%s Degraded Resource List: %s\n", - lr.c_str(), degraded_resources.c_str()); - } - } - - } - return (PASS); -} - /********************************************************************************* * * Name : node_degrade_control @@ -4940,9 +4865,6 @@ int nodeLinkClass::degrade_resource_clear ( string & hostname, * * "hwmon" - The Hardware Monitor process * - * - * Future services might be rmon and pmon - * **********************************************************************************/ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string service ) { @@ -5266,28 +5188,6 @@ int nodeLinkClass::alarm_process_failure ( string & hostname, string & process return (PASS); } -/* Generate a log for the reported failed resource if that host is - * unlocked */ -int nodeLinkClass::log_resource_failure ( string & hostname, string & resource ) -{ - /* lr - Log Prefix Rmond */ - string lr = hostname ; - lr.append (" rmond:"); - nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); - if ( node_ptr == NULL ) - { - wlog ("%s Unknown Host ; '%s' failed (minor)\n", - lr.c_str(), resource.c_str()); - return FAIL_UNKNOWN_HOSTNAME ; - } - else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) - { - ilog ("%s '%s' failed (minor)\n", - lr.c_str(), resource.c_str()); - } - return (PASS); -} - /** Process Monitor Degrade Event handler. * * The host will enter degrade state due to the specified process @@ -5443,54 +5343,6 @@ int nodeLinkClass::update_dport_states ( struct nodeLinkClass::node * node_ptr, return (rc); } -/** Resource Monitor 'Raise' Event handler. - * - * The host will enter degrade state due to the specified resource - * threshold being surpased. The resource name is recorded in the - * 'degraded_resources_list' for specified host. - * Clearing degrade against this resource requires that host to - * send a clear event against that resource or for that host to - * fully re-enable */ -int nodeLinkClass::degrade_resource_raise ( string & hostname, - string & resource ) -{ - /* lr - Log Prefix Rmond */ - string lr = hostname ; - lr.append (" rmond:"); - - nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); - if ( node_ptr == NULL ) - { - wlog ("%s Unknown Host\n", lr.c_str()); - return FAIL_UNKNOWN_HOSTNAME ; - } - else if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) - { - if ( is_string_in_string_list ( node_ptr->degraded_resources_list, resource ) == false ) - { - string degraded_resources = ""; - - ilog ("%s '%s' Degraded\n", lr.c_str(), resource.c_str()); - node_ptr->degraded_resources_list.push_back (resource); - node_ptr->degrade_mask |= DEGRADE_MASK_RESMON ; - - /* Cleanup the list */ - node_ptr->degraded_resources_list.sort (); - node_ptr->degraded_resources_list.unique (); - - degraded_resources = - get_strings_in_string_list ( node_ptr->degraded_resources_list ); - wlog ("%s Failing Resources: %s\n", - lr.c_str(), degraded_resources.c_str()); - } - else - { - dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str()); - } - } - return (PASS); -} - /** Process Monitor 'Critical Process Failed' Event handler. * * This utility handles critical process failure event notifications. @@ -5557,36 +5409,6 @@ int nodeLinkClass::critical_process_failed( string & hostname, return (PASS); } -/** Resource Monitor 'Failed' Event handler. - * - * The host will go out of service, be reset and - * automatically re-enabled. */ -int nodeLinkClass::critical_resource_failed( string & hostname, - string & resource ) -{ - nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); - if ( node_ptr == NULL ) - { - wlog ("%s rmond: Unknown host\n", hostname.c_str()); - return FAIL_UNKNOWN_HOSTNAME ; - } - - if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && - ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) - { - /* Start fresh the next time we enter graceful recovery handler */ - node_ptr->graceful_recovery_counter = 0 ; - - elog ("%s rmond: Critical Resource '%s' Failure\n", hostname.c_str(), resource.c_str()); - - /* Set node as unlocked-enabled */ - allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, - MTC_OPER_STATE__DISABLED, - MTC_AVAIL_STATUS__FAILED ); - } - return (PASS); -} - bool nodeLinkClass::is_active_controller ( string hostname ) { if ( nodeLinkClass::my_hostname.compare(hostname) ) diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index bd38d78c..9b4aaf1e 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -602,9 +602,9 @@ private: * @addtogroup private_monitoring_services_variables * @{ * - * A grouping a of flags, mask and degrade resource lists - * used to manage the degrade state of a host for process - * and resource monitoring services. + * A grouping a of flags, mask and lists used to + * manage the degrade state of a host for process + * monitoring services. */ /* Bit mask of degrade reasons */ @@ -631,16 +631,6 @@ private: recovery_ctrl_type hwmon_reset ; recovery_ctrl_type hwmon_powercycle ; - /** Resource Monitor Daemon Flag Missing count */ - int rmond_missing_count ; - - /** Host degraded due to loss of Resource Monitor running flag */ - bool rmond_degraded ; - - /** Resource Monitor Ready flag and degrade list */ - bool rmond_ready ; - std::list degraded_resources_list ; - /** process or resource list string iterator */ std::list::iterator string_iter_ptr ; @@ -1797,16 +1787,6 @@ public: * specified host all together. */ int degrade_pmond_clear ( string & hostname ); - /** Resource Monitor 'Clear' Event handler. - * - * The resource specified will be removed from the - * 'degraded_resources_list' for specified host. - * if there are no other degraded resources or other - * degraded services/reasons against that host then - * this handler will clear the degrade state for the - * specified host all together. */ - int degrade_resource_clear ( string & hostname, string & resource ); - /** * If the pmond degrade flag is not set then do so. * if the host is not degraded then set it to degraded. */ @@ -1818,19 +1798,6 @@ public: /** if host is unlocked-enabled generate a process failure alarm */ int alarm_process_failure ( string & hostname, string & process ); - /** Resource Monitor Raise Event handler. - * - * The host will enter degrade state due to the specified resource - * not running properly. The resource name is recorded in the - * 'degraded_resources_list' for specified host. - * Clearing degrade against this resource requires that host to - * send a clear event against that resource or for that host to - * fully re-enable */ - int degrade_resource_raise ( string & hostname, string & resource ); - - /** Generate a resource failure log if the host is unlocked */ - int log_resource_failure ( string & hostname, string & resource ); - /** Hardware Process Monitor Degrade Event handler. * see implementation for details */ int node_degrade_control ( string & hostname, int state, string service ); @@ -1849,12 +1816,6 @@ public: * automatically re-enabled. */ int critical_process_failed( string & hostname, string & process, unsigned int nodetype ); - /** Resource Monitor Failed Event handler. - * - * The host will go out of service and be reset and - * automatically re-enabled. */ - int critical_resource_failed( string & hostname, string & resource ); - /************************************************************/ /** diff --git a/mtce/src/lmon/lmon.h b/mtce/src/lmon/lmon.h index 4721f0e8..7eca9219 100644 --- a/mtce/src/lmon/lmon.h +++ b/mtce/src/lmon/lmon.h @@ -86,24 +86,6 @@ typedef struct char bond[IF_NAMESIZE] ; /* bonded interface name */ bool lagged ; /* Lagged interface=true or not=false */ -// unsigned int debounce_cnt ; /* running monitor debounce count */ -// unsigned int minorlog_cnt ; /* track minor log count for thresholding */ -// unsigned int count ; /* track the number of times the condition has been occured */ -// bool failed ; /* track if the resource needs to be serviced by the resource handler */ -// int resource_value ; /* 1 if the interface is up and 0 if it is down */ -// int resource_value_lagged ; /* 1 if the interface is up and 0 if it is down for lagged interfaces */ -// int sev ; /* The severity of the failed resource */ -// rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ -// char alarm_id[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised */ -// char alarm_id_port[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised for the ports */ -// char errorMsg[ERR_SIZE]; -// rmon_api_socket_type msg; -// bool link_up_and_running; /* whether the interface is up or down initially */ - -// bool alarm_raised; -// int failed_send; /* The number of times the rmon api failed to send a message */ - - } interface_ctrl_type ; diff --git a/mtce/src/maintenance/Makefile b/mtce/src/maintenance/Makefile index c195dd57..85c2db68 100755 --- a/mtce/src/maintenance/Makefile +++ b/mtce/src/maintenance/Makefile @@ -51,9 +51,9 @@ CONTROL_OBJS += ../common/nodeClass.o OBJS = $(SRCS:.cpp=.o) BINS = mtcAgent mtcClient -LDLIBS += -lstdc++ -ldaemon -lcommon -lthreadUtil -lipmiUtil -lfmcommon -lrmonapi -lalarm -lpthread -lrt -levent -ljson-c -lamon -lcrypto -luuid +LDLIBS += -lstdc++ -ldaemon -lcommon -lthreadUtil -lipmiUtil -lfmcommon -lalarm -lpthread -lrt -levent -ljson-c -lamon -lcrypto -luuid INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common -INCLUDES += -I../common -I../alarm -I../heartbeat -I../hwmon -I../public -I../rmon/rmonApi +INCLUDES += -I../common -I../alarm -I../heartbeat -I../hwmon -I../public CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces STATIC_ANALYSIS_TOOL = cppcheck @@ -77,10 +77,10 @@ common: ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) mtcAgent: $(OBJS) - $(CXX) $(CONTROL_OBJS) -L../public -L../alarm -L../rmon/rmonApi $(LDLIBS) $(EXTRALDFLAGS) -o mtcAgent + $(CXX) $(CONTROL_OBJS) -L../public -L../alarm $(LDLIBS) $(EXTRALDFLAGS) -o mtcAgent mtcClient: $(OBJS) - $(CXX) $(COMPUTE_OBJS) -L../public -L../alarm -L../rmon/rmonApi $(LDLIBS) $(EXTRALDFLAGS) -o mtcClient + $(CXX) $(COMPUTE_OBJS) -L../public -L../alarm $(LDLIBS) $(EXTRALDFLAGS) -o mtcClient remove_bins: @rm -v -f $(BINS) diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp index f1bc938a..455c1e88 100755 --- a/mtce/src/maintenance/mtcCompMsg.cpp +++ b/mtce/src/maintenance/mtcCompMsg.cpp @@ -55,122 +55,6 @@ extern char *program_invocation_short_name; int mtcAlive_mgmnt_sequence = 0 ; int mtcAlive_infra_sequence = 0 ; -/***************************************************************************** - * Also handles: Accelerated Virtual Switch 'events' handler - * for receiving data port state change event - * - * Event strings are:- - * {"type":"port-state", "severity":"critical|major|clear"} - * severity: - * critical - port has failed and is not part of an aggregate - * or is the last port in an aggregate - * (degrade, disable services) - * major - port has failed and is part of an aggregate - * with other inservice-ports (degrade only) - * clear - port has recovered from a failed state and is - * operational (clear degrade, enable services) - * - * NOTE : The port status can transition from any of the above - * states to any other state. - * - * RMON monitors the vswitch ports at a 20 second interval(debounce value). - * If a port changes link state during the polling period, it will - * raise/clear the alarm, but now also calculates the impact of that port - * failure on the provider network data interface. - * - * The overall aggregated state across all provider network interfaces will - * be reported to maintenance when ports enter a link down or up state. - * The agent will also periodically send the current provider network port - * status to maintenance every 20 seconds. - * - * Return : MTC_EVENT_AVS_CLEAR|MAJOR|CRITICAL - *****************************************************************************/ -/** Receive and process event messages from rmon **/ -static int rmon_message_error = 0 ; - -int service_rmon_inbox ( mtc_socket_type * sock_ptr, int & rmon_code, string & resource_name ) -{ - /* Max rmon message length */ - #define RMON_MAX_LEN (100) - /* Max tries to receive rmon message */ - #define MAX_TRIES (3) - - char buf[RMON_MAX_LEN]; - char res_name[RMON_MAX_LEN]; - socklen_t len = sizeof(struct sockaddr_in) ; - char str[RMON_MAX_LEN]; - int rc = FAIL; - - - int sequence = 0; - int bytes = 0 ; - int num_tries = 0; - - do - { - memset ( buf,0,RMON_MAX_LEN); - memset ( str,0,RMON_MAX_LEN); - - bytes = recvfrom( sock_ptr->rmon_socket, buf, RMON_MAX_LEN, 0, - (struct sockaddr *)&sock_ptr->mtc_cmd_addr, &len); - if ( bytes > 0 ) - { - sscanf ( buf, "%99s %99s %d", res_name, str, &sequence ); - string r_name(res_name); - resource_name = r_name; - - if ( str[0] != '\0' ) - { - mlog("%s \n",str); - // check if it is an AVS type message - if (regexUtil_string_startswith(str, "AVS_clear")) { - rmon_code = MTC_EVENT_AVS_CLEAR; - } else if (regexUtil_string_startswith(str, "AVS_major")) { - rmon_code = MTC_EVENT_AVS_MAJOR; - } else if (regexUtil_string_startswith(str, "AVS_critical")) { - rmon_code = MTC_EVENT_AVS_CRITICAL; - // process generic RMON messages - } else if (regexUtil_string_startswith(str, "cleared")) { - rmon_code = RMON_CLEAR; - } else if (regexUtil_string_startswith(str, "minor")) { - rmon_code = RMON_MINOR; - } else if (regexUtil_string_startswith(str, "major")) { - rmon_code = RMON_MAJOR; - } else if (regexUtil_string_startswith(str, "critical")) { - rmon_code = RMON_CRITICAL; - } else { - elog("Invalid rmon string: %s \n", str); - rc = FAIL; - return rc; - } - rmon_message_error = 0 ; - rc = PASS; - return rc; - } - else - { - wlog_throttled (rmon_message_error, 1, "rmond message with no severity specified (%s)\n", - resource_name.empty() ? "no resource" : resource_name.c_str()); - - if (( rmon_message_error == 1 ) && ( buf[0] != '\0' )) - { - ilog ("rmon message: [%s]\n", buf ); - } - rc = FAIL_NULL_POINTER; - return rc; - } - } - else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) - { - elog("rmon event recv error (%d:%s) \n", errno, strerror(errno)); - rc = FAIL; - } - } while (( bytes > 0 ) && ( ++num_tries < MAX_TRIES)) ; - - return rc; -} - - /* Receive and process commands from controller maintenance */ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { @@ -627,26 +511,7 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_na memset (&event, 0 , sizeof(mtc_message_type)); - if (( cmd == MTC_EVENT_RMON_READY) || - ( cmd == MTC_EVENT_RMON_MINOR) || - ( cmd == MTC_EVENT_RMON_MAJOR) || - ( cmd == MTC_EVENT_RMON_CRIT ) || - ( cmd == MTC_EVENT_RMON_CLEAR )) - { - snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_mtce_event_header() ); - if ( mtce_name_ptr ) - { - size_t len = strnlen ( mtce_name_ptr, MAX_MTCE_EVENT_NAME_LEN ); - - /* We don't use the buffer for mtce events to remove it from the size */ - bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); - - snprintf ( &event.buf[0], MAX_MTCE_EVENT_NAME_LEN , "%s", mtce_name_ptr ); - } else { - slog ("Internal Error - mtce_name_ptr is null\n"); - } - } - else if ( cmd == MTC_EVENT_LOOPBACK ) + if ( cmd == MTC_EVENT_LOOPBACK ) { snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header() ); diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp index 32177855..0ebf45b0 100755 --- a/mtce/src/maintenance/mtcCtrlMsg.cpp +++ b/mtce/src/maintenance/mtcCtrlMsg.cpp @@ -502,44 +502,6 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, break ; } - /***************************************************************** - * Resource Monitor Events * - *****************************************************************/ - - /* TODO: Remove - Suspecting OBS Command */ - case MTC_EVENT_RMON_READY: - { - mlog ("%s RMON Ready\n", hostname.c_str()); - obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_RMOND ); - break ; - } - case MTC_EVENT_RMON_CLEAR: - { - mlog ("%s rmond: '%s' recovered (clear)\n", hostname.c_str(), event.c_str()); - obj_ptr->degrade_resource_clear ( hostname , event ); - break ; - } - case MTC_EVENT_RMON_CRIT: - { - mlog ("%s rmond: '%s' failed (critical)\n", hostname.c_str(), event.c_str()); - obj_ptr->critical_resource_failed ( hostname, event ); - break ; - } - case MTC_EVENT_RMON_MAJOR: - { - mlog ("%s rmond: '%s' failed (major)\n", hostname.c_str(), event.c_str()); - obj_ptr->degrade_resource_raise ( hostname, event ); - break ; - } - case MTC_EVENT_RMON_MINOR: - { - mlog ("%s rmond: '%s' failed (minor)\n", hostname.c_str(), event.c_str()); - /* Clear the degrade condition if one is present */ - obj_ptr->degrade_resource_clear ( hostname , event ); - obj_ptr->log_resource_failure ( hostname, event ); - break ; - } - case MTC_EVENT_HWMON_CLEAR: case MTC_DEGRADE_CLEAR: { diff --git a/mtce/src/maintenance/mtcNodeComp.cpp b/mtce/src/maintenance/mtcNodeComp.cpp index 62dfd6e6..83bd29aa 100644 --- a/mtce/src/maintenance/mtcNodeComp.cpp +++ b/mtce/src/maintenance/mtcNodeComp.cpp @@ -63,7 +63,6 @@ using namespace std; extern "C" { #include "amon.h" /* for ... active monitoring utilities */ -#include "rmon_api.h" /* for ... resource monitoring utilities */ } @@ -181,15 +180,6 @@ void _close_infra_tx_socket ( void ) } } -void _close_rmon_sock ( void ) -{ - if ( mtc_sock.rmon_socket ) - { - close (mtc_sock.rmon_socket); - mtc_sock.rmon_socket = 0 ; - } -} - void _close_amon_sock ( void ) { if ( mtc_sock.amon_socket ) @@ -207,7 +197,6 @@ void daemon_exit ( void ) _close_infra_rx_socket (); _close_mgmnt_tx_socket (); _close_infra_tx_socket (); - _close_rmon_sock (); _close_amon_sock (); exit (0) ; @@ -232,11 +221,6 @@ static int mtc_config_handler ( void * user, config_ptr->mtc_rx_mgmnt_port = atoi(value); config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ; } - else if (MATCH("client", "rmon_event_port")) - { - config_ptr->rmon_event_port = atoi(value); - config_ptr->mask |= CONFIG_CLIENT_RMON_PORT ; - } else if (MATCH("timeouts", "failsafe_shutdown_delay")) { config_ptr->failsafe_shutdown_delay = atoi(value); @@ -471,33 +455,6 @@ void setup_amon_socket ( void ) } mtc_sock.amon_socket = 0 ; } - -void setup_rmon_socket ( void ) -{ - mtc_sock.rmon_socket = - resource_monitor_initialize ( program_invocation_short_name, mtc_config.rmon_event_port, ALL_USAGE ); - if ( mtc_sock.rmon_socket ) - { - int val = 1; - - /* Make the active monitor socket non-blocking */ - if ( 0 > ioctl(mtc_sock.rmon_socket, FIONBIO, (char *)&val) ) - { - elog ("failed to set rmon event port non-blocking (%d:%s),\n", errno, strerror(errno)); - close ( mtc_sock.rmon_socket ); - } - else - { - ilog ("Resource Monitor Socket %d\n", mtc_sock.rmon_socket ); - return ; - } - } - else - { - elog ("failed to register as client with rmond\n"); - } - mtc_sock.rmon_socket = 0 ; -} /****************************************************************** * @@ -509,7 +466,6 @@ void setup_rmon_socket ( void ) * 4. Unicast transmit socket infra (mtc_client_infra_tx_socket) * * 5. socket for pmond acive monitoring - * 6. socket to receive rmond events (including AVS) * *******************************************************************/ int mtc_socket_init ( void ) @@ -557,11 +513,6 @@ int mtc_socket_init ( void ) /*************************************************************/ setup_amon_socket (); - /*************************************************************/ - /* Setup and Open the resource monitor event socket */ - /*************************************************************/ - setup_rmon_socket (); - return (PASS); } @@ -1059,9 +1010,6 @@ int daemon_init ( string iface, string nodetype_str ) int select_log_count = 0 ; void daemon_service_run ( void ) { - int rmon_code; - string resource_name; - int rc = PASS ; int file_not_present_count = 0 ; @@ -1075,7 +1023,7 @@ void daemon_service_run ( void ) std::list socks ; - /* Run heartbeat service forever or until stop condition */ + /* Run heartbeat service forever or until stop condition */ for ( ; ; ) { /* set the master fd_set */ @@ -1103,13 +1051,6 @@ void daemon_service_run ( void ) FD_SET(mtc_sock.amon_socket, &mtc_sock.readfds); } - mtc_sock.rmon_socket = resource_monitor_get_sel_obj (); - if ( mtc_sock.rmon_socket ) - { - socks.push_front (mtc_sock.rmon_socket); - FD_SET(mtc_sock.rmon_socket, &mtc_sock.readfds); - } - /* Initialize the timeval struct to wait for 50 mSec */ mtc_sock.waitd.tv_sec = 0; mtc_sock.waitd.tv_usec = SOCKET_WAIT; @@ -1118,11 +1059,10 @@ void daemon_service_run ( void ) socks.sort(); #ifdef WANT_SELECTS - ilog_throttled ( select_log_count, 200 , "Selects: mgmnt:%d infra:%d amon:%d rmon:%d - Size:%ld First:%d Last:%d\n", + ilog_throttled ( select_log_count, 200 , "Selects: mgmnt:%d infra:%d amon:%d - Size:%ld First:%d Last:%d\n", mtc_sock.mtc_client_rx_socket, mtc_sock.mtc_client_infra_rx_socket, mtc_sock.amon_socket, - mtc_sock.rmon_socket, socks.size(), socks.front(), socks.back()); #endif @@ -1159,40 +1099,6 @@ void daemon_service_run ( void ) dlog3 ("Active Monitor Select Fired\n"); active_monitor_dispatch (); } - if ( FD_ISSET(mtc_sock.rmon_socket, &mtc_sock.readfds)) - { - dlog3 ("Resource Monitor Select Fired\n"); - rc = service_rmon_inbox( sock_ptr, rmon_code, resource_name ); - - if (rc == PASS) { - - switch ( rmon_code ) { - case RMON_CLEAR: - mtce_send_event ( sock_ptr, MTC_EVENT_RMON_CLEAR, resource_name.c_str() ); - break; - - case RMON_MINOR: - mtce_send_event ( sock_ptr, MTC_EVENT_RMON_MINOR, resource_name.c_str() ); - break; - - case RMON_MAJOR: - mtce_send_event ( sock_ptr, MTC_EVENT_RMON_MAJOR, resource_name.c_str() ); - break; - - case RMON_CRITICAL: - mtce_send_event ( sock_ptr, MTC_EVENT_RMON_CRIT, resource_name.c_str() ); - break; - case MTC_EVENT_AVS_CLEAR: - case MTC_EVENT_AVS_MAJOR: - case MTC_EVENT_AVS_CRITICAL: - mtce_send_event ( sock_ptr, rmon_code, "" ); - break; - default: - break; - } - - } - } } if (( ctrl.active_script_set == GOENABLED_MAIN_SCRIPTS ) || @@ -1353,14 +1259,6 @@ void daemon_service_run ( void ) socket_reinit = true ; } - /* RMON event notifications */ - else if ( mtc_sock.rmon_socket <= 0 ) - { - setup_rmon_socket (); - wlog ("calling setup_rmon_socket (auto-recovery)\n"); - socket_reinit = true ; - } - else if ( mtc_sock.amon_socket <= 0 ) { setup_amon_socket (); @@ -1426,10 +1324,6 @@ void daemon_service_run ( void ) _close_infra_tx_socket (); } } - if ( daemon_is_file_present ( MTC_CMD_FIT__RMON_SOCK )) - { - _close_rmon_sock (); - } if ( daemon_is_file_present ( MTC_CMD_FIT__AMON_SOCK )) { _close_amon_sock (); diff --git a/mtce/src/maintenance/mtcNodeComp.h b/mtce/src/maintenance/mtcNodeComp.h index ddd2571f..5974d9fa 100644 --- a/mtce/src/maintenance/mtcNodeComp.h +++ b/mtce/src/maintenance/mtcNodeComp.h @@ -19,8 +19,7 @@ /** Compute Config mask */ #define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ - CONFIG_CLIENT_MTC_MGMNT_PORT |\ - CONFIG_CLIENT_RMON_PORT) + CONFIG_CLIENT_MTC_MGMNT_PORT) #define MAX_RUN_SCRIPTS (20) diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index 35343c3d..0fa5a428 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -725,7 +725,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) /* Clear all degrade flags except for the HWMON one */ clear_host_degrade_causes ( node_ptr->degrade_mask ); - node_ptr->degraded_resources_list.clear(); /* Purge this hosts work and done queues */ workQueue_purge ( node_ptr ); @@ -1840,7 +1839,6 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) /* Clear all degrade flags except for the HWMON one */ clear_host_degrade_causes ( node_ptr->degrade_mask ); - node_ptr->degraded_resources_list.clear(); if ( is_controller(node_ptr) ) { @@ -2754,7 +2752,6 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr ) /* Clear all degrade flags except for the HWMON one */ clear_host_degrade_causes ( node_ptr->degrade_mask ); - node_ptr->degraded_resources_list.clear(); if ( is_controller(node_ptr) ) { diff --git a/mtce/src/maintenance/mtcNodeMsg.h b/mtce/src/maintenance/mtcNodeMsg.h index c93afad0..c5bfcb16 100755 --- a/mtce/src/maintenance/mtcNodeMsg.h +++ b/mtce/src/maintenance/mtcNodeMsg.h @@ -109,9 +109,6 @@ typedef struct /** Active Monitor Socket */ int amon_socket ; - /** Resource Monitor Socket */ - int rmon_socket ; - bool main_go_enabled_reply_ack ; bool subf_go_enabled_reply_ack ; @@ -131,7 +128,6 @@ int recv_mtc_reply_noblock ( void ); int send_mtc_cmd ( string & hostname, int cmd, int interface ); int mtc_service_command ( mtc_socket_type * sock_ptr , int interface ); int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status ); -int service_rmon_inbox ( mtc_socket_type * sock_ptr, int & rmon_code, string & resource_name ); int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr ); int mtc_infra_init ( mtc_socket_type * sock_ptr , char * iface ); string get_who_i_am ( void ); diff --git a/mtce/src/pmon/scripts/pmon-test.sh b/mtce/src/pmon/scripts/pmon-test.sh index 02e74595..4bf4af33 100755 --- a/mtce/src/pmon/scripts/pmon-test.sh +++ b/mtce/src/pmon/scripts/pmon-test.sh @@ -40,7 +40,6 @@ # ntpd does not support restart # ptp4l does not support restart # phc2sys does not support restart -# rmond restart PASSED [13893]:[5129] # sm-api restart PASSED [8896]:[8460] # skipping 'sm' process # sm-eru restart PASSED [8904]:[10993] diff --git a/mtce/src/rmon/Makefile b/mtce/src/rmon/Makefile deleted file mode 100755 index 11efaac1..00000000 --- a/mtce/src/rmon/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2014-2018 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -SHELL = /bin/bash - -SRCS = rmonInit.cpp rmonMsg.cpp rmonHdlr.cpp rmonHttp.cpp interfaceHdlr.cpp rmonFM.cpp remoteloggingHdlr.cpp thinmetaHdlr.cpp -OBJS = $(SRCS:.cpp=.o) -LDLIBS = -lstdc++ -ldaemon -lcommon -lfmcommon -lrmonapi -lrt -levent -lcrypto -luuid -lpthread -ljson-c -INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common -INCLUDES += -IrmonApi -CCFLAGS = -g -O2 -Wall -Werror -Wextra - -STATIC_ANALYSIS_TOOL = cppcheck -STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) - -all: clean static_analysis common daemon build - -.cpp.o: - $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ - -static_analysis: -ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) - $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h -else - echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." -endif - -build: clean static_analysis $(OBJS) - $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common -LrmonApi/ $(LDLIBS) $(EXTRALDFLAGS) -o rmond - -lib: build $(OBJS) - -common: - ( cd ../common ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) - -daemon: - ( cd ../daemon ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) - -clean: - @rm -v -f $(OBJ) rmond *.o *.a diff --git a/mtce/src/rmon/interfaceHdlr.cpp b/mtce/src/rmon/interfaceHdlr.cpp deleted file mode 100644 index 57f9494d..00000000 --- a/mtce/src/rmon/interfaceHdlr.cpp +++ /dev/null @@ -1,1183 +0,0 @@ -/* - * Copyright (c) 2013-2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River CGCS Platform Resource Monitor Interface Handler - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "rmon.h" /* rmon header file */ -#include -#include "nlEvent.h" /* for ... get_netlink_events */ -#include -#include - -/* Used to set interface alarms through the FM API */ -static SFmAlarmDataT alarmData; - -/* Used to set port alarms through the FM API */ -static SFmAlarmDataT alarmDataPOne; - -/* Used to set port alarms through the FM API */ -static SFmAlarmDataT alarmDataPTwo; - -const char rmonStages_str [RMON_STAGE__STAGES][32] = -{ - "Handler-Init", - "Handler-Start", - "Manage-Restart", - "Monitor-Wait", - "Monitor-Resource", - "Restart-Wait", - "Ignore-Resource", - "Handler-Finish", - "Failed-Resource", - "Failed-Resource-clr", -} ; - -/***************************************************************************** - * - * Name : interfaceResourceStageChange - * - * Purpose : Put an interface resource in the requested stage for use by the - * interface resource handler - * - *******************************************} - **********************************/ -int interfaceResourceStageChange ( interface_resource_config_type * ptr , rmonStage_enum newStage ) -{ - if (( newStage < RMON_STAGE__STAGES ) && - ( ptr->stage < RMON_STAGE__STAGES )) - { - clog ("%s %s -> %s (%d->%d)\n", - ptr->resource, - rmonStages_str[ptr->stage], - rmonStages_str[newStage], - ptr->stage, newStage); - ptr->stage = newStage ; - return (PASS); - } - else - { - slog ("%s Invalid Stage (now:%d new:%d)\n", - ptr->resource, ptr->stage, newStage ); - ptr->stage = RMON_STAGE__FINISH ; - return (FAIL); - } -} - -/************************************************************ - * - * Name : get_iflink_interface - * - * Purpose : get the ifname of the linked parent interface - ***********************************************************/ -string get_iflink_interface (string ifname) -{ - - string iflink_file = INTERFACES_DIR + ifname + "/iflink"; - - ifstream finIflink ( iflink_file.c_str() ); - string iflink_line; - string ret = ""; - char iface_buffer [INTERFACE_NAME_LEN] = ""; - int iflink = -1; - - if (finIflink.is_open()) - { - - while ( getline (finIflink, iflink_line) ) { - iflink = atoi(iflink_line.c_str()); - } - finIflink.close(); - - if_indextoname (iflink, iface_buffer); - - if (iface_buffer[0] != '\0') - ret = iface_buffer; - - } - return ret; -} - - -/***************************************************************************** - * - * Name : init_physical_interfaces - * - * Purpose : Map an interface (mgmt, oam or infra) to a physical port - * - *****************************************************************************/ -void init_physical_interfaces ( interface_resource_config_type * ptr ) -{ - FILE * pFile; - char line_buf[50]; - string str; - string physical_interface = ""; - enum interface_type { single, vlan, bond }; - interface_type ifaceType; // default assumption - - memset(ptr->interface_one, 0, sizeof(ptr->interface_one)); - memset(ptr->interface_two, 0, sizeof(ptr->interface_two)); - memset(ptr->bond, 0, sizeof(ptr->bond)); - - if ( strcmp(ptr->resource, MGMT_INTERFACE_NAME) == 0 ) - { - str = MGMT_INTERFACE_FULLNAME; - } - else if ( strcmp(ptr->resource, INFRA_INTERFACE_NAME) == 0 ) - { - str = INFRA_INTERFACE_FULLNAME; - } - else if ( strcmp(ptr->resource, OAM_INTERFACE_NAME) == 0 ) - { - str = OAM_INTERFACE_FULLNAME; - } - - - pFile = fopen (PLATFORM_DIR , "r"); - /* get the physical interface */ - if (pFile != NULL) - { - ifstream fin( PLATFORM_DIR ); - string line; - while ( getline( fin, line ) ) - { - - if ( line.find(str) != string::npos ) - { - stringstream ss( line ); - getline( ss, physical_interface, '=' ); // token = string before = - getline( ss, physical_interface, '=' ); // token = string after = - - // determine the interface type - string uevent_interface_file = INTERFACES_DIR + - physical_interface + "/uevent"; - ifstream finUevent( uevent_interface_file.c_str() ); - - // if we cannot locate this file then instead of disabling - // Kernel interface monitoring all together, we will use - // use the interface naming convention to do a best effort - // estimate of the interface type... the show must go on! - if (!finUevent) { - elog ("Cannot find uevent interface file (%s) to " - "resolve interface type for resource %s. " - "Disabling monitoring\n" , - uevent_interface_file.c_str(), ptr->resource); - - ptr->interface_used = false; - fclose(pFile); - return; - } - else { // proceed with uevent method - string line; - ifaceType = single; - while( getline( finUevent, line ) ) - { - if ( line.find ("DEVTYPE") == 0 ) - { - if ( line.find ("=vlan") != string::npos ) - ifaceType = vlan; - else if ( line.find ("=bond") != string::npos ) - ifaceType = bond; - break; - } - } - } - - switch (ifaceType) { - case single: - memcpy(ptr->interface_one, - physical_interface.c_str(), - physical_interface.size()); - ilog("Interface : %s : %s \n", - ptr->interface_one, ptr->resource ); - break; - case bond: - memcpy(ptr->bond, - physical_interface.c_str(), - physical_interface.size()); - ilog("Bond Interface : %s : %s \n", - ptr->bond, ptr->resource ); - break; - case vlan: - ilog("VLAN Interface : %s : %s \n", - physical_interface.c_str(), ptr->resource); - // if it is a VLAN interface, we need - // to determine its parent interface, - // which may be a single interface or - // a bonded interface - string parent_interface = get_iflink_interface(physical_interface); - if (!parent_interface.empty()) { - - dlog ("Parent interface for VLAN : %s\n", - parent_interface.c_str()); - - physical_interface = parent_interface; - - string uevent_parent_file = INTERFACES_DIR + - parent_interface + "/uevent"; - - ifstream finUevent2( uevent_parent_file.c_str() ); - string line; - bool bond_configured = false; - while( getline( finUevent2, line ) ) - { - // if this uevent does not have a DEVTYPE - // then its a single interface. If this - // does have a DEVTYPE then check explicity - // for bond. Since we don't allow vlan over - // vlan, for all other DEVTYPEs, assume - // this is a single interface. - if ( (line.find ("DEVTYPE") == 0) && - (line.find ("=bond") != string::npos) ) { - - ilog ("Parent interface of VLAN interface " - "resolved as Bond\n"); - bond_configured = true; - break; - } - } - if (!bond_configured) { - memcpy(ptr->interface_one, - parent_interface.c_str(), - parent_interface.size()); - } - } - break; - } // end of switch - break; - } - } - fclose(pFile); - } - - /* Lagged interface */ - if ((ptr->interface_one[0] == '\0') && (!physical_interface.empty())) - { - - string lagged_interface_file = INTERFACES_DIR + - physical_interface + "/bonding/slaves"; - - ifstream finTwo( lagged_interface_file.c_str() ); - if (!finTwo) { - elog ("Cannot find bond interface file (%s) to " - "resolve slave interfaces\n", lagged_interface_file.c_str()); - } - else { - string line; - while( getline( finTwo, line ) ) - { - strcpy(line_buf, line.c_str()); - // the slave interfaces are listed as enXYYY enXYYY... - // starting with the primary. Read all other slaves - // as interface_two - sscanf(line_buf, "%19s %19s", ptr->interface_one, ptr->interface_two); - ilog("%s interface: %s, interface two: %s \n", ptr->resource, - ptr->interface_one, ptr->interface_two); - break; - } - } - } - - if ( ptr->interface_one[0] == '\0' ) - { - ptr->interface_used = false; - } - else - { - ptr->interface_used = true; - if ( ptr->interface_two[0] == '\0' ) - { - /* this is not a lagged interface */ - ptr->lagged = false; - } else { - /* this is a lagged interface */ - ptr->lagged = true; - } - } -} - -/***************************************************************************** - * - * Name : service_resource_state - * - * Purpose : Set the interface resource in the correct state for the interface - * resource handler - * - *****************************************************************************/ -void service_resource_state ( interface_resource_config_type * ptr ) -{ - - if (ptr->lagged == true) - { - /* the lagged interface is initialized */ - if ((ptr->resource_value == INTERFACE_UP) && (ptr->resource_value_lagged == INTERFACE_UP) && - (ptr->failed == true )) - { - /* If both interfaces are up and there is a fault, it needs to be cleared */ - ptr->sev = SEVERITY_CLEARED; - interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - else if ((((ptr->resource_value == INTERFACE_UP) && (ptr->resource_value_lagged == INTERFACE_DOWN)) || - ((ptr->resource_value_lagged == INTERFACE_UP) && (ptr->resource_value == INTERFACE_DOWN))) && - (ptr->sev != SEVERITY_MAJOR)) - { - /* if one interface failed its a major condition */ - - if (ptr->sev == SEVERITY_CRITICAL) - { - /* need to clear port alarm but not interface alarm */ - interfaceResourceStageChange ( ptr, RMON_STAGE__FAILED_CLR ); - } - else - { - interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - - ptr->failed = true; - ptr->sev = SEVERITY_MAJOR; - } - else if (((ptr->resource_value == INTERFACE_DOWN) && (ptr->resource_value_lagged == INTERFACE_DOWN)) && - (ptr->sev != SEVERITY_CRITICAL)) - { - /* both lagged interfaces failed, this is a critical condition */ - ptr->failed = true; - ptr->sev = SEVERITY_CRITICAL ; - interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else - { - /* interface is not lagged */ - if ( (ptr->resource_value == INTERFACE_DOWN) && (ptr->sev != SEVERITY_CRITICAL) ) - { - /* the interface has failed */ - ptr->failed = true; - ptr->sev = SEVERITY_CRITICAL ; - interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - else if ((ptr->resource_value == INTERFACE_UP) && (ptr->failed == true )) - { - /* If the interface is up and there is a fault, it needs to be cleared */ - ptr->sev = SEVERITY_CLEARED; - interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } -} - -/***************************************************************************** - * - * Name : get_link_state - * - * Purpose : Check to see if the current interface link is up or down - * - *****************************************************************************/ -int get_link_state ( int ioctl_socket, char iface[20], bool * running_ptr ) -{ - int get_link_state_throttle = 0 ; - struct ifreq if_data; - int rc = FAIL ; - - if (iface[0] == '\0') - { - elog ("Null interface name\n"); - return ( rc ) ; - } - - memset( &if_data, 0, sizeof(if_data) ); - sprintf( if_data.ifr_name, "%s", iface ); - if( 0 <= ioctl( ioctl_socket, SIOCGIFFLAGS, &if_data ) ) - { - if( if_data.ifr_flags & IFF_RUNNING ) - { - *running_ptr = true; - } - else - { - *running_ptr = false; - } - - /* reset log flood gate counter */ - get_link_state_throttle = 0 ; - - rc = PASS ; - } - else - { - wlog_throttled (get_link_state_throttle, 100, - "Failed to get %s (%s) interface state (%d:%s)\n", - iface, if_data.ifr_name, errno, strerror(errno)); - } - return ( rc ); -} - -/***************************************************************************** - * - * Name : service_interface_events - * - * Purpose : Service state changes for monitored interfaces - * - *****************************************************************************/ -int service_interface_events ( int nl_socket , int ioctl_socket ) -{ - list links_gone_down ; - list links_gone_up ; - list::iterator iter_curr_ptr ; - rmon_ctrl_type * _rmon_ctrl_ptr; - interface_resource_config_type * ptr; - - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - if ( get_netlink_events ( nl_socket, links_gone_down, links_gone_up ) ) - { - for (int i=0; i<_rmon_ctrl_ptr->interface_resources; i++) - { - ptr = get_interface_ptr(i); - if ( ptr->interface_used == true ) - { - - bool running = false ; - - if ( !links_gone_down.empty() ) - { - /* Look at the down list */ - for ( iter_curr_ptr = links_gone_down.begin(); - iter_curr_ptr != links_gone_down.end() ; - iter_curr_ptr++ ) - { - if ( strcmp ( ptr->interface_one, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("link %s is down\n", ptr->interface_one ); - - if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) - { - wlog ("%s is down (oper:%s)\n", - iter_curr_ptr->c_str(), - running ? "up" : "down" ); - if (!running) - { - ptr->resource_value = INTERFACE_DOWN; - } - else - { - ptr->resource_value = INTERFACE_UP; - } - } - else - { - wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); - ptr->resource_value = INTERFACE_DOWN; - } - } - - if (ptr->lagged == true) - { - if ( strcmp ( ptr->interface_two, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("link %s is down\n", ptr->interface_two); - - if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) - { - wlog ("%s is down (oper:%s)\n", - iter_curr_ptr->c_str(), - running ? "up" : "down" ); - if (!running) - { - ptr->resource_value_lagged = INTERFACE_DOWN; - } - else - { - ptr->resource_value_lagged = INTERFACE_UP; - } - } - else - { - wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); - ptr->resource_value_lagged = INTERFACE_DOWN; - } - } - if ( strcmp ( ptr->bond, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("bond: %s is down\n", ptr->bond); - //ptr->resource_value_lagged = INTERFACE_DOWN; - //ptr->resource_value = INTERFACE_DOWN; - } - } - } - } - if ( !links_gone_up.empty() ) - { - //wlog ("one or more links have dropped\n"); - /* Look at the down list */ - for ( iter_curr_ptr = links_gone_up.begin(); - iter_curr_ptr != links_gone_up.end() ; - iter_curr_ptr++ ) - { - - if ( strcmp ( ptr->interface_one, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("link %s is up\n", ptr->interface_one ); - - if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) - { - wlog ("%s is up (oper:%s)\n", - iter_curr_ptr->c_str(), - running ? "up" : "down" ); - if (!running) - { - ptr->resource_value = INTERFACE_DOWN; - } - else - { - ptr->resource_value = INTERFACE_UP; - } - } - else - { - wlog ("%s is down(driver query failed)\n", iter_curr_ptr->c_str() ); - ptr->resource_value = INTERFACE_DOWN; - } - } - if (ptr->lagged == true) - { - if ( strcmp ( ptr->interface_two, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("link %s is up\n", ptr->interface_two ); - - if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) - { - wlog ("%s is up (oper:%s)\n", - iter_curr_ptr->c_str(), - running ? "up" : "down" ); - if (!running) - { - ptr->resource_value_lagged = INTERFACE_DOWN; - } - else - { - ptr->resource_value_lagged = INTERFACE_UP; - } - } - else - { - wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); - ptr->resource_value_lagged = INTERFACE_DOWN; - } - } - if ( strcmp ( ptr->bond, iter_curr_ptr->c_str()) == 0 ) - { - wlog ("bond: %s is up\n", ptr->bond); - //ptr->resource_value_lagged = INTERFACE_UP; - //ptr->resource_value = INTERFACE_UP; - } - } - } - } - /* set the states for the interface handler */ - service_resource_state( ptr ); - } - } - } - - return (PASS); -} - -/***************************************************************************** - * - * Name : interface_alarming_init - * - * Purpose : Initializes any previously raised interface alarms if rmon is restarted - * - *****************************************************************************/ -void interface_alarming_init ( interface_resource_config_type * ptr) -{ - AlarmFilter alarmFilter; - ptr->failed = false; - rmon_ctrl_type * _rmon_ctrl_ptr; - - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - - /* handle active alarms for the interface ports */ - SFmAlarmDataT *intf_alarms = NULL; - unsigned int num_intf_alarms = 0; - strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); - strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".port="); - - if (rmon_fm_get(&alarmFilter, &intf_alarms, &num_intf_alarms) == FM_ERR_OK) - { - bool intf_one_found = false; - bool intf_two_found = false; - SFmAlarmDataT *a = intf_alarms; - - for( unsigned int i = 0; i < num_intf_alarms; i++, a++ ) - { - /* only handle specific port alarm */ - if (strncmp(a->alarm_id, ptr->alarm_id_port, sizeof(a->alarm_id)) == 0) - { - /* check interface port one alarm */ - if (!intf_one_found && ptr->interface_one[0] != '\0') - { - if (strstr(a->entity_instance_id, ptr->interface_one)) - { - ptr->failed = true; - intf_one_found = true; - } - } - - /* check interface port two alarm */ - if (!intf_two_found && ptr->interface_two[0] != '\0') - { - if (strstr(a->entity_instance_id, ptr->interface_two)) - { - ptr->failed = true; - intf_two_found = true; - } - } - - /* clear this alarm as it is no longer valid as the interface ports have - changed */ - if (!intf_one_found && !intf_two_found) - { - ilog("clearing alarm %s", a->entity_instance_id); - strcpy(alarmFilter.entity_instance_id, a->entity_instance_id); - rmon_fm_clear (&alarmFilter); - } - } - } - - free(intf_alarms); - } - - /* handle interface alarm */ - SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) malloc (sizeof (SFmAlarmDataT)); - strcpy(alarmFilter.alarm_id, ptr->alarm_id); - strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".interface="); - strcat(alarmFilter.entity_instance_id, ptr->resource); - - if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) - { - if (active_alarm != NULL) - { - ptr->failed = true; - } - } - - free(active_alarm); - - /* - * If the interface is DOWN, and neither a port - * nor an interface alarm is found for that interface, - * then that implies that the interface was DOWN before - * RMON came up. Consider that as a failed case as well - */ - if (ptr->interface_used && !ptr->failed && - (ptr->resource_value == INTERFACE_DOWN || - ptr->resource_value_lagged == INTERFACE_DOWN)) { - ilog("Interface %s has initial state DOWN. Marked as failed\n", - ptr->resource); - ptr->failed = true; - } - - /* service interface resource */ - if (ptr->failed) - { - ptr->alarm_raised = true; - service_resource_state ( ptr ); - } -} - -/***************************************************************************** - * - * Name : _set_alarm_defaults - * - * Purpose : Set the defaults for the interface and port alarms - * *****************************************************************************/ -void _set_alarm_defaults( interface_resource_config_type * ptr, rmon_ctrl_type * _rmon_ctrl_ptr ) -{ - /* common data for all alarm messages */ - - /* Interface alarms */ - strcpy(alarmData.uuid, ""); - strcpy(alarmData.entity_type_id ,"system.host"); - strcpy(alarmData.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmData.entity_instance_id, ".interface="); - alarmData.alarm_state = FM_ALARM_STATE_SET; - alarmData.alarm_type = FM_ALARM_OPERATIONAL; - alarmData.probable_cause = FM_ALARM_CAUSE_UNKNOWN; - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Check cabling and far-end port configuration and status on adjacent equipment."); - alarmData.timestamp = 0; - alarmData.service_affecting = FM_TRUE; - alarmData.suppression = FM_TRUE; - strcpy(alarmData.alarm_id, ptr->alarm_id); - - /* Port One alarms */ - strcpy(alarmDataPOne.uuid, ""); - strcpy(alarmDataPOne.entity_type_id ,"system.host"); - strcpy(alarmDataPOne.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmDataPOne.entity_instance_id, ".port="); - alarmDataPOne.alarm_state = FM_ALARM_STATE_SET; - alarmDataPOne.alarm_type = FM_ALARM_OPERATIONAL; - alarmDataPOne.probable_cause = FM_ALARM_CAUSE_UNKNOWN; - snprintf(alarmDataPOne.proposed_repair_action , sizeof(alarmDataPOne.proposed_repair_action), - "Check cabling and far-end port configuration and status on adjacent equipment."); - alarmDataPOne.timestamp = 0; - alarmDataPOne.service_affecting = FM_TRUE; - alarmDataPOne.suppression = FM_TRUE; - strcpy(alarmDataPOne.alarm_id, ptr->alarm_id_port); - - /* Port Two alarms */ - strcpy(alarmDataPTwo.uuid, ""); - strcpy(alarmDataPTwo.entity_type_id ,"system.host"); - strcpy(alarmDataPTwo.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmDataPTwo.entity_instance_id, ".port="); - alarmDataPTwo.alarm_state = FM_ALARM_STATE_SET; - alarmDataPTwo.alarm_type = FM_ALARM_OPERATIONAL; - alarmDataPTwo.probable_cause = FM_ALARM_CAUSE_UNKNOWN; - snprintf(alarmDataPTwo.proposed_repair_action , sizeof(alarmDataPTwo.proposed_repair_action), - "Check cabling and far-end port configuration and status on adjacent equipment."); - alarmDataPTwo.timestamp = 0; - alarmDataPTwo.service_affecting = FM_TRUE; - alarmDataPTwo.suppression = FM_TRUE; - strcpy(alarmDataPTwo.alarm_id, ptr->alarm_id_port); -} - -/***************************************************************************** - * - * Name : interface_handler - * - * Purpose : Handle the failed interfaces and raise alarms through - * the FM API as well as sending events to registered clients - *****************************************************************************/ -void interface_handler( interface_resource_config_type * ptr ) -{ - #define MAX_CLEAR_COUNT (10) - AlarmFilter alarmFilter; - bool portOne = false; - bool portTwo = false; - - rmon_ctrl_type * _rmon_ctrl_ptr; - - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - - if ( ptr->stage < RMON_STAGE__STAGES ) - { - dlog2 ("%s %s Stage %d\n", ptr->resource, rmonStages_str[ptr->stage], ptr->stage ); - } - else - { - interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - - switch ( ptr->stage ) - { - case RMON_STAGE__START: - { - dlog ( "%s failed:%d set_cnt:%d debounce_cnt:%d\n", - ptr->resource, - ptr->failed, - ptr->count, - ptr->debounce_cnt); - break ; - } - case RMON_STAGE__MANAGE: - - - { - /* sets alarms if thresholds are crossed */ - if (ptr->alarm_status == ALARM_ON) - { - - _set_alarm_defaults( ptr, _rmon_ctrl_ptr ); - - /* Interface and Port alarming */ - if (strcmp(ptr->resource, OAM_INTERFACE_NAME) == 0) - { - if ( ptr->sev == SEVERITY_CRITICAL ) - { - alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; - ilog ("'OAM' Interface failed. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'OAM' Interface failed."); - } - else if ( ptr->sev == SEVERITY_MAJOR ) - { - alarmData.severity = FM_ALARM_SEVERITY_MAJOR; - ilog ("'OAM' Interface degraded. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'OAM' Interface degraded."); - } - - if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) - { - portOne = true; - ilog ("'OAM' Port failed. \n"); - snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), - "'OAM' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); - - } - if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) - { - portTwo = true; - ilog ("'OAM' Port failed. \n"); - snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), - "'OAM' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); - } - } - else if (strcmp(ptr->resource, MGMT_INTERFACE_NAME) == 0) - { - if ( ptr->sev == SEVERITY_CRITICAL ) - { - alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; - ilog ("'MGMT' Interface failed. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'MGMT' Interface failed."); - } - else if ( ptr->sev == SEVERITY_MAJOR ) - { - alarmData.severity = FM_ALARM_SEVERITY_MAJOR; - ilog ("'MGMT' Interface degraded. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'MGMT' Interface degraded."); - } - - if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) - { - portOne = true; - ilog ("'MGMT' Port failed. \n"); - snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), - "'MGMT' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); - - } - if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) - { - portTwo = true; - ilog ("'MGMT' Port failed. \n"); - snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), - "'MGMT' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); - } - } - else if (strcmp(ptr->resource, INFRA_INTERFACE_NAME) == 0) - { - if ( ptr->sev == SEVERITY_CRITICAL ) - { - alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; - ilog ("'INFRA' Interface failed. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'INFRA' Interface failed."); - } - else if ( ptr->sev == SEVERITY_MAJOR ) - { - alarmData.severity = FM_ALARM_SEVERITY_MAJOR; - ilog ("'INFRA' Interface degraded. \n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "'INFRA' Interface degraded."); - } - - if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) - { - portOne = true; - ilog ("'INFRA' Port failed. \n"); - snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), - "'INFRA' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); - - } - if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) - { - portTwo = true; - ilog ("'INFRA' Port failed. \n"); - snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), - "'INFRA' Port failed."); - /* Set port name in entity instance ID */ - strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); - } - } - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s major_threshold_set",ptr->resource); - - /* Set interface name in entity instance ID */ - strcat(alarmData.entity_instance_id, ptr->resource); - - dlog("Creating Interface Alarm: %s \n", ptr->resource); - if (rmon_fm_set(&alarmData, NULL) == FM_ERR_OK ) - { - ilog("Alarm created for resource: %s \n", ptr->resource); - ptr->alarm_raised = true; - } - else - { - ilog("Alarm create for resource: %s failed \n", ptr->resource); - } - - - if (portOne) - { - alarmDataPOne.severity = FM_ALARM_SEVERITY_MAJOR; - dlog("Creating Port One Alarm: %s \n", ptr->resource); - if (rmon_fm_set(&alarmDataPOne, NULL) == FM_ERR_OK ) - { - ilog("Alarm created for resource: %s port one \n", ptr->resource); - } else - { - ilog("Alarm create for resource: %s port one failed \n", ptr->resource); - } - } - - if (portTwo) - { - alarmDataPTwo.severity = FM_ALARM_SEVERITY_MAJOR; - dlog("Creating Port Two Alarm: %s \n", ptr->resource); - if (rmon_fm_set(&alarmDataPTwo, NULL) == FM_ERR_OK ) - { - ilog("Alarm created for resource: %s port two \n", ptr->resource); - } else - { - ilog("Alarm create for resource: %s port two failed \n", ptr->resource); - } - } - - - if (ptr->alarm_raised) - { - if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) - { - if ( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) - { - wlog ("%s request send failed \n", ptr->resource); - ptr->failed_send++; - } - else - { - ptr->failed_send = 0; - } - interfaceResourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); - } - else - { - ptr->failed_send = 0; - interfaceResourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); - } - } - } - else - { - interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - break; - } - - case RMON_STAGE__MONITOR_WAIT: - { - if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND) && (ptr->failed_send > 0)) - { - if ( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) - { - wlog ("%s request send failed \n", ptr->resource); - ptr->failed_send++; - } - else - { - ptr->failed_send = 0; - } - } - break; - } - - case RMON_STAGE__FAILED_CLR: - { - /* clear raised port alarms if one port comes back up */ - if (ptr->alarm_raised) - { - SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) malloc (sizeof (SFmAlarmDataT)); - strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); - - if (ptr->interface_one[0] != '\0') - { - strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".port="); - strcat(alarmFilter.entity_instance_id, ptr->interface_one); - - if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) - { - if (active_alarm != NULL) - { - if ( ptr->resource_value == INTERFACE_UP ) - { - if (rmon_fm_clear(&alarmFilter) == FM_ERR_OK) - { - ilog ("Cleared alarms for port one, interface: %s \n", ptr->resource); - } - else - { - ilog ("Failed to cleared alarms for port one, interface: %s \n", ptr->resource); - } - } - } - } - } - - if (ptr->interface_two[0] != '\0') - { - strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".port="); - strcat(alarmFilter.entity_instance_id, ptr->interface_two); - - if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) - { - if (active_alarm != NULL) - { - if ( ptr->resource_value_lagged == INTERFACE_UP ) - { - if (rmon_fm_clear(&alarmFilter) == FM_ERR_OK) - { - ilog ("Cleared alarms for port two, interface: %s \n", ptr->resource); - } - else - { - ilog ("Failed to cleared alarms for port two, interface: %s \n", ptr->resource); - } - } - } - } - } - free(active_alarm); - } - - interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); - break; - } - - case RMON_STAGE__FINISH: - { - - if ((ptr->alarm_status == ALARM_ON) && (ptr->alarm_raised)) - { - strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); - - if (ptr->interface_one[0] != '\0') - { - /* clear port one alarm */ - strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".port="); - strcat(alarmFilter.entity_instance_id, ptr->interface_one); - - EFmErrorT err = rmon_fm_clear(&alarmFilter); - if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) - { - ilog ("Cleared alarms for port one, interface: %s \n", ptr->resource); - } - else - { - ilog ("Failed to cleared alarm for port one, interface: %s (rc:%d)\n", ptr->resource, err); - } - } - if (ptr->interface_two[0] != '\0') - { - /* clear port two alarm */ - strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".port="); - strcat(alarmFilter.entity_instance_id, ptr->interface_two); - - EFmErrorT err = rmon_fm_clear(&alarmFilter); - if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) - { - ilog ("Cleared alarms for port two, interface: %s \n", ptr->resource); - } - else - { - elog ("Failed to cleared alarms for port two, interface: %s (rc:%d)\n", ptr->resource, err ); - } - } - - /* clear interface alarm */ - strcpy(alarmFilter.alarm_id, ptr->alarm_id); - strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); - strcat(alarmFilter.entity_instance_id, ".interface="); - strcat(alarmFilter.entity_instance_id, ptr->resource); - - EFmErrorT err = rmon_fm_clear(&alarmFilter); - if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) - { - ilog ("Cleared alarms for interface: %s \n", ptr->resource); - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s cleared_alarms_for_resource:", ptr->resource); - - if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) - { - while (( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) && - ( ptr->failed_send < MAX_FAIL_SEND )) - { - wlog ("%s request send failed \n", ptr->resource); - ptr->failed_send++; - } - ptr->alarm_raised = false; - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - else - { - ptr->alarm_raised = false; - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0 ; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - } - else - { - wlog ("%s alarm clear failed (rc:%d)\n", ptr->resource, err); - } - } - else - { - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0 ; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - break ; - } - - default: - { - slog ("%s Invalid stage (%d)\n", ptr->resource, ptr->stage ); - - /* Default to finish for invalid case. - * If there is an issue then it will be detected */ - interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } -} diff --git a/mtce/src/rmon/remoteloggingHdlr.cpp b/mtce/src/rmon/remoteloggingHdlr.cpp deleted file mode 100644 index 0a8e3eba..00000000 --- a/mtce/src/rmon/remoteloggingHdlr.cpp +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Copyright (c) 2013-2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River Titanium Cloud Platform remote logging Monitor Handler - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for storing dynamic resource names */ -#include -#include -#include "rmon.h" /* rmon header file */ -#include "rmonHttp.h" /* for rmon HTTP libEvent utilties */ -#include "rmonApi.h" -#include -#include -#include -#include -#include -#include -#include /* for ... RTMGRP_LINK */ -#include "nlEvent.h" /* for ... open_netlink_socket */ -#include "nodeEvent.h" /* for inotify */ -#include /* for ... json-c json string parsing */ -#include "jsonUtil.h" -#include -#include -#include - -static libEvent_type remoteLoggingAudit; // for system remotelogging-show - -static inline SFmAlarmDataT -create_remoteLogging_controller_connectivity_alarm (SFmAlarmDataT data, - AlarmFilter filter) -{ - snprintf (data.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", - filter.alarm_id); - data.alarm_state = FM_ALARM_STATE_SET; - snprintf(data.entity_type_id, FM_MAX_BUFFER_LENGTH, "system.host"); - snprintf(data.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", - filter.entity_instance_id); - data.severity = FM_ALARM_SEVERITY_MINOR; - snprintf(data.reason_text, sizeof(data.reason_text), - "Controller cannot establish connection with remote logging server."); - data.alarm_type = FM_ALARM_COMM; - data.probable_cause = FM_ALARM_COMM_SUBSYS_FAILURE; - data.service_affecting = FM_FALSE; - snprintf(data.proposed_repair_action, sizeof(data.proposed_repair_action), - "Ensure Remote Log Server IP is reachable from Controller through " - "OAM interface; otherwise contact next level of support."); - return data; -} - -// alarm data for all remote loggin alarms -static SFmAlarmDataT alarmData; - -int rmonHdlr_remotelogging_query (resource_config_type * ptr); - -// this is used to create a buffer to store output from a command -// that gets the connection status of a port. -// the command filters the /proc/net/tcp(udp) files leaving only the status -// you generally expect a 1 character integer value for the status -#define CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE 8 - -/***************************************************************************** - * - * Name : rmonHdlr_remotelogging_handler - * - * Purpose : Handles the remote logging response message - * - *****************************************************************************/ -void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ) -{ - - if ( !req ) - { - elog (" Request Timeout\n"); - remoteLoggingAudit.status = FAIL_TIMEOUT ; - goto _remote_logging_handler_done ; - } - - remoteLoggingAudit.status = rmonHttpUtil_status ( remoteLoggingAudit ) ; - if ( remoteLoggingAudit.status == HTTP_NOTFOUND ) - { - - goto _remote_logging_handler_done ; - } - else if ( remoteLoggingAudit.status != PASS ) - { - dlog (" remote logging HTTP Request Failed (%d)\n", - remoteLoggingAudit.status); - - goto _remote_logging_handler_done ; - } - - if ( rmonHttpUtil_get_response ( remoteLoggingAudit ) != PASS ) - goto _remote_logging_handler_done ; - -_remote_logging_handler_done: - /* This is needed to get out of the loop */ - event_base_loopbreak((struct event_base *)arg); -} - -/***************************************************************************** - * - * Name : rmonHdlr_remotelogging_query - * - * Purpose : Send a HTTP remotelogging show request - * - *****************************************************************************/ - -int rmonHdlr_remotelogging_query (resource_config_type * ptr) -{ - - // we want this handler to run once every 5 minutes - // rmon currently runs once every 30 seconds - static bool first_execution = true; - static int exec_counter = 9; - exec_counter = (exec_counter + 1) % 10; - if(exec_counter != 0) - { - return 0; - } - // extract the ip and port for the remote logging server - FILE* pFile; - string remote_ip_address = ""; - string remote_port = ""; - string transport_type = ""; - string line; - bool feature_enabled = false; - - std::ifstream syslog_config("/etc/syslog-ng/syslog-ng.conf"); - // look for this line in the config file: - // destination remote_log_server {tcp("128.224.140.219" port(514));}; - while(std::getline(syslog_config, line)) - { - // include remotelogging.conf is present if the feature is enabled - if(line.find("@include \"remotelogging.conf\"") == 0) - { - feature_enabled = true; - } - if(line.find("destination remote_log_server") != 0) - { - continue; - } - int start = line.find("{") + 1; - int end = line.find("(", start + 1); - transport_type= line.substr(start, end - start); - start = line.find("\"") + 1; - end = line.find("\"", start + 1); - remote_ip_address = line.substr(start, end - start); - start = line.find("port(") + 5; - end = line.find(")", start + 1); - remote_port = line.substr(start, end - start); - } - - syslog_config.close(); - - // cleanup of any alarms if the remotelogging feature is not enabled - // this is important for when users turn off the remote logging feature when an alarm is active - // if the line containing this information is not in config, remote logging is not used - if(remote_ip_address.empty() || remote_port.empty() || transport_type.empty() || !feature_enabled) - { - // currently, only controllers raise alarms - if(is_controller()) - { - // clear any raised alarms - if(ptr->alarm_raised) - { - rmon_ctrl_type* _rmon_ctrl_ptr; - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - AlarmFilter alarmFilter; - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, - "%s", _rmon_ctrl_ptr->my_hostname); - int rc; - if ((rc = rmon_fm_clear(&alarmFilter)) != FM_ERR_OK) - { - wlog ("Failed to clear stale remotelogging connectivity alarm for" - "entity instance id: %s error: %d", - alarmFilter.entity_instance_id, rc); - } - else - { - ptr->alarm_raised = false; - } - } - } - return 0; - } - - // construct the remote logging server IP string - // the files being looked at(/proc/net/tcp(udp)) uses hex values, so convert the - // string we got from the config file to that format - // - convert all numbers to hex and hex to capitals - // reverse ordering of the "ipv4" values - std::stringstream config_ip(remote_ip_address); // the ip string from the config file - std::stringstream proc_file_ip; // the ip string formatted to compare to /proc/net/tcp(udp) - int ipv = 4; - - // IPv4 - if(remote_ip_address.find(".") != string::npos) - { - // ipv4 example: config file 10.10.10.45, /proc/net/tcp 2D0A0A0A - int a, b, c, d; - char trash; - config_ip >> a >> trash >> b >> trash >> c >> trash >> d; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << d; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << c; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << b; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << a; - proc_file_ip << ":"; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << atoi(remote_port.c_str()); - } - // IPv6 - else if (remote_ip_address.find(":") != string::npos) - { - ipv = 6; - // ipv6 example: config file 0:0:0:0:ffff:0:80e0:8d6c , /proc/net/tcp6 0000000000000000FFFF00006C8D0E080 - int a, b, c, d; - char trash; - // first, the hex that are in the same order from config to /proc/net/... - for(int i = 0; i < 6; i++) - { - config_ip >> std::hex >> a >> trash; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << a; - } - - // now the hex that needs to be re ordered - config_ip >> std::hex >> a >> trash >> c; - b = (a & 0xFF); - a = (a >> 8); - d = (c & 0xFF); - c = (c >> 8); - - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << d; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << c; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << b; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << a; - proc_file_ip << ":"; - proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << atoi(remote_port.c_str()); - } - // garbage - else - { - wlog("Unrecognized ip format in syslog config file\n"); - } - - string connection_check_filename; - if(transport_type == "tcp") - { - connection_check_filename = "tcp"; - } - else if (transport_type == "udp") - { - connection_check_filename = "udp"; - } - // todo: eventually we will have TLS as a transport type and potentially others - else - { - wlog("Unrecognized remote logging transport type: %s \n", transport_type.c_str()); - } - - if(ipv == 6) - { - connection_check_filename = connection_check_filename + "6"; - } - - std::string command = "cat /proc/net/" + connection_check_filename +" | awk '{print $3 \" \" $4}' | grep " \ - + proc_file_ip.str() + " | awk '{print $2}'"; - if(!(pFile = popen(command.c_str(), "r"))) - { - elog ("Failed to execute command for getting remotelogging tcp port status"); - } - else - { - char cmd_output[CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE]; - int connection_status = 0; - rmon_ctrl_type* _rmon_ctrl_ptr; - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - AlarmFilter alarmFilter; - SFmAlarmDataT active_alarm; - - memset(cmd_output, 0, CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE); - fgets((char*) &cmd_output, CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE, pFile); - pclose(pFile); - std::stringstream s(cmd_output); - s >> std::hex >> connection_status; - - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, - "%s", _rmon_ctrl_ptr->my_hostname); - - if(first_execution) - { - if (fm_get_fault (&alarmFilter, &active_alarm) == FM_ERR_OK) - { - ptr->alarm_raised = true; - } - else - { - ptr->alarm_raised = false; - } - } - if(connection_status == 1) - { - if(is_controller()) - { - // connection is established, clear the alarm - if(ptr->alarm_raised) - { - int rc; - if ((rc = rmon_fm_clear(&alarmFilter)) != FM_ERR_OK) - { - wlog ("Failed to clear stale remotelogging connectivity alarm for" - "entity instance id: %s error: %d", - alarmFilter.entity_instance_id, rc); - } - else - { - ptr->alarm_raised = false; - } - } - } - } - else - { - if(is_controller()) - { - // connection is not established, raise an alarm - if (!ptr->alarm_raised) - { - int rc; - alarmData = \ - create_remoteLogging_controller_connectivity_alarm(alarmData, - alarmFilter); - - if ((rc = rmon_fm_set(&alarmData, NULL)) != FM_ERR_OK) - { - wlog("Failed to create alarm %s for entity instance id: %s" - "error: %d \n", REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID, - alarmData.entity_instance_id, (int) rc); - } - else - { - ptr->alarm_raised = true; - } - } - } - else - { - elog ("%s cannot connect to remote log server", _rmon_ctrl_ptr->my_hostname); - } - } - } - - return 0; -} - diff --git a/mtce/src/rmon/rmon.h b/mtce/src/rmon/rmon.h deleted file mode 100755 index 3d68e287..00000000 --- a/mtce/src/rmon/rmon.h +++ /dev/null @@ -1,852 +0,0 @@ -#ifndef __INCLUDE_RMON_HH__ -#define __INCLUDE_RMON_HH__ -/* - * Copyright (c) 2013-2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/* - * This implements the CGCS Resource Monitor ; /usr/local/bin/rmond - * The Resource monitor or rmon is a utility to provide: cpu, memory and - * filesystem usage and alarm stats both to the user and to registered client - * processes on the host it is running on. - * - * Call trace is as follows: - * daemon_init - * rmon_timer_init - * rmon_hdlr_init - * daemon_files_init - * daemon_signal_init - * daemon_configure - * ini_parse - * get_debug_options - * get_iface_macaddr - * get_iface_address - * get_iface_hostname - * socket_init - * rmon_msg_init - * setup_tx_port - * - * daemon_service_run - * wait for goenable signal - * rmon_send_event ( READY ) - * rmon_service - * _config_dir_load - * _config_files_load - * _forever - * service_events: - * _get_events every audit period seconds - * resource_handler handles the resource values and sends - * alarm messages through fm api to set or clear resource - * thresholds as well as notifying registered clients through - * the rmon client api. - * - * - * - * - * This daemon waits for a "goenabled" signal an then reads all the resource - * configuration files in: /etc/rmon.d and begins monitoring them accordingly. - * A resource confguration file is expected to contain the following information: - * - * [resource] - * resource = ; name of resource being monitored - * debounce = ; number of seconds to wait before degrade clear - * severity = ; minor, major, critical - * minor_threshold = ; minor resource utilization threshold - * major_threshold = ; major resource utilization threshold - * critical_threshold = ; critical resource utilization threshold - * num_tries = ; number of tries before the alarm is raised or cleared - * alarm_on = ; dictates whether maintainance gets alarms from rmon - * 1 for on, 0 for off - * - * Here is how it works ... - * - * Every audit period seconds the resources defined in the config files get - * monitored. If the resource ie. CPU usage crosses a threshold: - * (minor, major or critical) count times an alarm is raised and message is sent to - * all clients registered for the resource. If the resource usage drops below - * that threshold count times, the alarms are cleared and a message is sent to - * all registered clients in order to clear the alarm. The audit period as well as - * other rmon config options are specifiedin the: /etc/mtc/rmond.conf file with - * the following (example) information: - * - * ; CGTS Resource Monitor Configuration File - * [config] ; Configuration - * audit_period = 10 ; Resource polling period in seconds (1 - 120) - * - * rmon_tx_port = 2101 ; Transmit Event and Command Reply Port - * per_node = 0 ; enable (1) or disable (0) memory checking per processor node - * rmon_api_port = 2300 : Resource Monitor API Receive Port - * - * [defaults] - * - * [timeouts] - * start_delay = 10 ; managed range 1 .. 120 seconds - * - * [features] - * - * [debug] ; SIGHUP to reload - * debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) - * debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) - * debug_state = 0 ; enable(1) or disable(0) state change logs (clog) - * debug_level = 0 ; decimal mask 0..15 (8,4,2,1) - * - * flush = 0 ; enable(1) or disable(0) force log flush (main loop) - * flush_thld = 2 ; if enabled - force flush after this number of loops - * - * debug_event = none ; Not used - * debug_filter = none ; Not used - * stress_test = 0 ; In-Service Stress test number - * - * To check the alarms that are raised the command: - * system alarm-list can be used. Rmon alarms have the following codes: - * - * 100.101: CPU usage threshold crossed - * 100.102: vSwitch CPU usage threshold crossed - * 100.103: Memory usage threshold crossed - * 100.104: Filesystem usage threshold crossed - * - * To register your process for rmon notifications using the rmon client api - * please see the files: rmon_api.h for usage of the api as well as: - * rmon_api_client_test.cpp and rmon_api_client_test.h for an example - * implementation for your process. - * - */ - /** - * @file - * Wind River CGCS Platform Resource Monitor Service Header - */ - -#include -#include -#include -#include -#include -#include -#include -#include /* for hostent */ -#include -#include -#include -#include -#include -#include -#include /* for close and usleep */ -#include -#include /* for round */ -#include "nodeBase.h" -#include "daemon_ini.h" /* Ini Parser Header */ -#include "daemon_common.h" /* Common definitions and types for daemons */ -#include "daemon_option.h" /* Common options for daemons */ -#include "nodeTimers.h" /* maintenance timer utilities start/stop */ -#include "nodeUtil.h" /* common utilities */ -#include "tokenUtil.h" /* for ... keystone_config_handler */ -#include "fmAPI.h" -#include "httpUtil.h" /* for ... libEvent */ -extern "C" -{ - #include "rmon_api.h" /* for ... resource monitoring utilities */ -} -/** - * @addtogroup RMON_base - * @{ - */ -using namespace std; - -#ifdef __AREA__ -#undef __AREA__ -#endif -#define __AREA__ "mon" - -/* openstack Identity version */ -#define OS_IDENTITY_VERSION_PREFIX ((const char *)"/v3") - -/* Config and resource files used by rmon */ -#define CONFIG_DIR ((const char *)"/etc/rmon.d") -#define INT_CONFIG_DIR ((const char *)"/etc/rmon_interfaces.d") -#define COMPUTE_VSWITCH_DIR ((const char *)"/etc/nova/compute_extend.conf") -#define COMPUTE_RESERVED_CONF ((const char *)"/etc/platform/worker_reserved.conf") -#define DYNAMIC_FS_FILE ((const char *)"/etc/rmonfiles.d/dynamic.conf") -#define STATIC_FS_FILE ((const char *)"/etc/rmonfiles.d/static.conf") - -// this path is different in Wind River Linux vs. CentOS. -// For the latter, we shall look specifically within -// the bonding interface device directory -#define INTERFACES_DIR ((const char *)"/sys/class/net/") -#define PLATFORM_DIR ((const char *)"/etc/platform/platform.conf") - -#define MOUNTS_DIR ((const char *)"/proc/mounts") -#define COMPUTE_CONFIG_PASS ((const char *)"/var/run/.config_pass") -#define COMPUTE_CONFIG_FAIL ((const char *)"/var/run/.config_fail") -#define RMON_FILES_DIR ((const char *)"/etc/rmonfiles.d") -#define NTPQ_QUERY_SCRIPT ((const char *)"query_ntp_servers.sh") - -/* Constant search keys used to update rmon resource usage */ -#define CPU_RESOURCE_NAME ((const char *)"Platform CPU Usage") -#define V_CPU_RESOURCE_NAME ((const char *)"vSwitch CPU Usage") -#define MEMORY_RESOURCE_NAME ((const char *)"Platform Memory Usage") -#define FS_RESOURCE_NAME ((const char *)"Platform Filesystem Usage") -#define INSTANCE_RESOURCE_NAME ((const char *)"Platform Nova Instances") -#define V_MEMORY_RESOURCE_NAME ((const char *)"vSwitch Memory Usage") -#define V_PORT_RESOURCE_NAME ((const char *)"vSwitch Port Usage") -#define V_INTERFACE_RESOURCE_NAME ((const char *)"vSwitch Interface Usage") -#define V_LACP_INTERFACE_RESOURCE_NAME ((const char *)"vSwitch LACP Interface Usage") -#define V_OVSDB_RESOURCE_NAME ((const char *)"vSwitch OVSDB Usage") -#define V_NETWORK_RESOURCE_NAME ((const char *)"vSwitch Network Usage") -#define V_OPENFLOW_RESOURCE_NAME ((const char *)"vSwitch Openflow Usage") -#define V_CINDER_THINPOOL_RESOURCE_NAME ((const char *)"Cinder LVM Thinpool Usage") -#define V_NOVA_THINPOOL_RESOURCE_NAME ((const char *)"Nova LVM Thinpool Usage") -#define REMOTE_LOGGING_RESOURCE_NAME ((const char *)"Remote Logging Connectivity") - -/* dynamic resources used for thin provisioning monitoring */ -#define CINDER_VOLUMES ((const char *)"cinder-volumes") -#define NOVA_LOCAL ((const char *)"nova-local") - -#define RMON_RESOURCE_NOT ((const char *)"read_dynamic_file_system") -#define RESPONSE_RMON_RESOURCE_NOT ((const char *)"/var/run/.dynamicfs_registered") - -#define POSTGRESQL_FS_PATH ((const char *)"/var/lib/postgresql") - -#define RESOURCE_DISABLE (0) - -/* Thin provisioning metadata monitoring */ -#define THINMETA_FSM_RETRY 3 -#define THINMETA_CONFIG_SECTION "thinpool_metadata" -#define THINMETA_DEFAULT_CRITICAL_THRESHOLD 0 // feature is disabled by default -#define THINMETA_DEFAULT_ALARM_ON 1 // alarm is enabled -#define THINMETA_DEFAULT_AUTOEXTEND_ON 1 // autoextend is enabled (only if monitoring is enabled!) -#define THINMETA_DEFAULT_AUTOEXTEND_BY 20 // autoextend by 20%, same as example in /etc/lvm/lvm.conf -#define THINMETA_DEFAULT_AUTOEXTEND_PERCENT 1 // autoextend by a percentage -#define THINMETA_DEFAULT_AUDIT_PERIOD 10 // seconds to perform audit, same as LVM (broken) audit of lvmetad -#define THINMETA_RESULT_BUFFER_SIZE (1024) // result for lvm commands may be bigger than default BUFFER_SIZE -#define THINMETA_INVALID_NAME ((const char *) "invalid name!") - -/* Constant search keys used to update rmon interface usage */ -#define MGMT_INTERFACE_NAME ((const char *)"mgmt") -#define INFRA_INTERFACE_NAME ((const char *)"infra") -#define OAM_INTERFACE_NAME ((const char *)"oam") -#define MGMT_INTERFACE_FULLNAME ((const char *)"management_interface") -#define OAM_INTERFACE_FULLNAME ((const char *)"oam_interface") -#define INFRA_INTERFACE_FULLNAME ((const char *)"infrastructure_interface") - - /* Daemon Config Constants */ -#define CONFIG_AUDIT_PERIOD 1 -#define PM_AUDIT_PERIOD 15 -#define NTP_AUDIT_PERIOD 600 //10 minutes -#define NTPQ_CMD_TIMEOUT 60 //1 minute -#define CONFIG_TX_PORT 2 -#define CONFIG_RX_PORT 4 -#define CONFIG_CRITICAL_THR 5 -#define CONFIG_NODE 12 -#define CONFIG_START_DELAY 20 - -/* rmon resource default percent thresholds */ -#define DEFAULT_MINOR (80) -#define DEFAULT_MAJOR (90) -#define DEFAULT_CRITICAL (95) -#define UNUSED_CRITICAL (101) - -/* processor node0 default memory thresholds */ -#define DEFAULT_MINOR_ABS_NODE0 (512) -#define DEFAULT_MAJOR_ABS_NODE0 (307) -#define DEFAULT_CRITICAL_ABS_NODE0 (102) -#define UNUSED_CRITICAL_ABS_NODE0 (0) - -/* processor node1 default memory thresholds */ -#define DEFAULT_MINOR_ABS_NODE1 (0) -#define DEFAULT_MAJOR_ABS_NODE1 (0) -#define DEFAULT_CRITICAL_ABS_NODE1 (0) - -/* absolute threshold array index */ -#define RMON_MINOR_IDX (0) -#define RMON_MAJOR_IDX (1) -#define RMON_CRITICAL_IDX (2) - -/* Defualt startup settings */ -#define DEFAULT_NUM_TRIES (2) /* Number of tries before an alarm is set or cleared */ -#define DEFAULT_ALARM_STATUS (1) /* Alarms are on by default */ -#define DEFAULT_PERCENT (1) /* Percentage thresholds are used by default */ -#define PERCENT_USED (1) /* Percent is used for the resource */ -#define PERCENT_UNUSED (0) /* Absolute values are used for the resource */ -#define DYNAMIC_ALARM (1) /* Filesystem alarm is a dynamic alarm, persisting among nodes */ -#define STATIC_ALARM (2) /* Filesystem alarm is a local, static resource */ -#define STANDARD_ALARM (3) /* Alarm is not a filesystem alarm */ - -#define HUGEPAGES_NODE 0 /* 0 or 1 for per hugepages node memory stats */ -#define PROCESSOR_NODE 0 /* 0 or 1 for per processor node memory stats */ -#define ALARM_OFF 0 /* Do not notify maintainance if alarm off */ -#define ALARM_ON 1 /* Notify maintainance if alarm on */ -#define PASS (0) -#define FAIL (1) - -/* Monitored Resource severity levels */ -#define SEVERITY_MINOR 0 -#define SEVERITY_MAJOR 1 -#define SEVERITY_CRITICAL 2 -#define SEVERITY_CLEARED 3 -#define MINORLOG_THRESHOLD (20) -#define PROCLOSS_THRESHOLD (5) -#define MAX_RESOURCES (100) -#define MAX_FILESYSTEMS (100) -#define MAX_BASE_CPU (100) - -#define DEGRADE_CLEAR_MSG ((const char *)("cleared_degrade_for_resource")) - -/* File System Custum Thresholds */ -#define TMPFS_MINOR (8) -#define TMPFS_MAJOR (6) -#define TMPFS_CRITICAL (4) -#define BOOTFS_MINOR (200) -#define BOOTFS_MAJOR (100) -#define BOOTFS_CRITICAL (50) - -#define MAX_FAIL_SEND (10) -#define MAX_SWACT_COUNT (10) - -/* Percent thresholds Database monitoring */ -#define FS_MINOR (70) -#define FS_MAJOR (80) -#define FS_CRITICAL (90) - -/* Resource Alarm ids */ -#define CPU_ALARM_ID ((const char *)"100.101") -#define V_CPU_ALARM_ID ((const char *)"100.102") -#define MEMORY_ALARM_ID ((const char *)"100.103") -#define FS_ALARM_ID ((const char *)"100.104") -#define INSTANCE_ALARM_ID ((const char *)"100.105") -#define OAM_PORT_ALARM_ID ((const char *)"100.106") -#define OAM_ALARM_ID ((const char *)"100.107") -#define MGMT_PORT_ALARM_ID ((const char *)"100.108") -#define MGMT_ALARM_ID ((const char *)"100.109") -#define INFRA_PORT_ALARM_ID ((const char *)"100.110") -#define INFRA_ALARM_ID ((const char *)"100.111") -#define VRS_PORT_ALARM_ID ((const char *)"100.112") //used for HP branch only -#define VRS_ALARM_ID ((const char *)"100.113") //used for HP branch only -#define NTP_ALARM_ID ((const char *)"100.114") -#define V_MEMORY_ALARM_ID ((const char *)"100.115") -#define V_CINDER_THINPOOL_ALARM_ID ((const char *)"100.116") -#define V_NOVA_THINPOOL_ALARM_ID ((const char *)"100.117") -#define THINMETA_ALARM_ID ((const char *)"800.103") - -// ripped from fm-api constants for Neutron AVS alarms -// being moved over to RMON -#define V_PORT_ALARM_ID ((const char *)"300.001") -#define V_INTERFACE_ALARM_ID ((const char *)"300.002") - -// remote logging alarm ID -#define REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID ((const char *)"100.118") - -// SDN specific alarms -#define V_OPENFLOW_CONTROLLER_ALARM_ID ((const char *)"300.012") -#define V_OPENFLOW_NETWORK_ALARM_ID ((const char *)"300.013") -#define V_OVSDB_MANAGER_ALARM_ID ((const char *)"300.014") -#define V_OVSDB_ALARM_ID ((const char *)"300.015") - -#define INTERFACE_NAME_LEN (10) -#define INTERFACE_UP (1) -#define INTERFACE_DOWN (0) -#define MAX_CLIENTS (100) -#define RMON_MAX_LEN (100) -#define MOUNTED (1) -#define NOT_MOUNTED (0) -#define MTC_EVENT_RMON_READY (0x0f0f0f0f) - -#define NTP_ERROR (255) - -/** Daemon Config Mask */ -#define CONF_MASK (CONFIG_AUDIT_PERIOD |\ - PM_AUDIT_PERIOD |\ - NTP_AUDIT_PERIOD |\ - NTPQ_CMD_TIMEOUT |\ - CONFIG_NODE |\ - CONFIG_START_DELAY |\ - CONFIG_TX_PORT |\ - CONFIG_RX_PORT |\ - CONFIG_CRITICAL_THR) - -#define CONF_RMON_API_MASK (CONF_PORT | \ - CONF_PERIOD | \ - CONF_TIMEOUT | \ - CONF_THRESHOLD) - - - -#define RMON_MIN_START_DELAY (1) -#define RMON_MAX_START_DELAY (120) - -#define RMON_MIN_AUDIT_PERIOD (10) /* Minimum audit period for resource if none specified */ -#define RMON_MAX_AUDIT_PERIOD (120) /* Maximum audit period for resource if none specified */ - -#define RMON_MIN_PM_PERIOD (60) /* Minimum pm period for resource if none specified */ -#define RMON_MAX_PM_PERIOD (600) /* Maximum pm period for resource if none specified */ - -#define RMON_MIN_NTP_AUDIT_PERIOD (10) /* Minimum audit period for resource if none specified */ -#define RMON_MAX_NTP_AUDIT_PERIOD (1200) /* Maximum audit period for resource if none specified */ - -/* Monitored Resource Config Bit Mask */ -#define CONF_RESOURCE (0x01) -#define CONF_STYLE (0x04) -#define CONF_SEVERITY (0x20) -#define CONF_INTERVAL (0x40) -#define CONF_DEBOUNCE (0x80) - -/* Usual buffer sizes */ -#define RATE_THROTTLE (6) -#define BUFFER_SIZE (128) -/* Monitored Resource stages for resource handler fsm */ -typedef enum -{ - RMON_STAGE__INIT, - RMON_STAGE__START, - RMON_STAGE__MANAGE, - RMON_STAGE__MONITOR_WAIT, - RMON_STAGE__MONITOR, - RMON_STAGE__RESTART_WAIT, - RMON_STAGE__IGNORE, - RMON_STAGE__FINISH, - RMON_STAGE__FAILED, - RMON_STAGE__FAILED_CLR, - RMON_STAGE__STAGES, -} rmonStage_enum ; - -typedef enum -{ - NTP_STAGE__BEGIN, - NTP_STAGE__EXECUTE_NTPQ, - NTP_STAGE__EXECUTE_NTPQ_WAIT, - NTP_STAGE__STAGES, -} ntpStage_enum ; - - /* The return values from the ntpq querie */ -typedef enum -{ - NTP_OK = 0, /* All NTP servers are reachable and one is selected */ - NTP_NOT_PROVISIONED = 1, /* No NTP servers are provisioned */ - NTP_NONE_REACHABLE = 2, /* None of the NTP servers are reachable */ - NTP_SOME_REACHABLE = 3, /* Some NTP servers are reachable and one selected */ - NTP_SOME_REACHABLE_NONE_SELECTED = 4 /* Some NTP servers are reachable but none is selected, will treat at as none reachable */ -} NTPQueryStatus; - -typedef enum -{ - RESOURCE_TYPE__UNKNOWN, - RESOURCE_TYPE__FILESYSTEM_USAGE, - RESOURCE_TYPE__MEMORY_USAGE, - RESOURCE_TYPE__CPU_USAGE, - RESOURCE_TYPE__DATABASE_USAGE, - RESOURCE_TYPE__NETWORK_USAGE, - RESOURCE_TYPE__PORT, - RESOURCE_TYPE__INTERFACE, - RESOURCE_TYPE__CONNECTIVITY, -} resType_enum ; - -/* Structure to store memory stats (KiB) */ -typedef struct -{ - unsigned long int MemTotal; - unsigned long int MemFree; - unsigned long int Buffers; - unsigned long int Cached; - unsigned long int SlabReclaimable; - unsigned long int CommitLimit; - unsigned long int Committed_AS; - unsigned long int HugePages_Total; - unsigned long int HugePages_Free; - unsigned long int FilePages; - unsigned long int Hugepagesize; - unsigned long int AnonPages; -} memoryinfo; - -#define RMON_API_MAX_LEN (100) -typedef struct -{ - int tx_sock ; /**< socket to monitored process */ - int tx_port ; /**< port to monitored process */ - struct sockaddr_in tx_addr ; /**< process socket attributes */ - char tx_buf[RMON_API_MAX_LEN]; /**< Server receive buffer */ - socklen_t len ; /**< Socket Length */ -} rmon_api_socket_type ; - -typedef struct -{ - - /* Config Items */ - unsigned int mask ; - resType_enum res_type ; /* specifies the generic resource type */ - const char * resource ; /* The name of the Resource being monitored */ - const char * severity ; /* MINOR, MAJOR or CRITICAL for each resource */ - unsigned int debounce ; /* Period to wait before clearing alarms */ - unsigned int minor_threshold; /* % Value for minor threshold crossing */ - unsigned int major_threshold; /* % Value for major threshold crossing */ - unsigned int critical_threshold; /* % Value for critical threshold crossing */ - unsigned int minor_threshold_abs_node0; /* Absolute value for minor threshold crossing processor node 0 */ - unsigned int major_threshold_abs_node0; /* Absolute value for major threshold crossing processor node 0 */ - unsigned int critical_threshold_abs_node0; /* Absolute value for critical threshold crossing processor node 0 */ - unsigned int minor_threshold_abs_node1; /* Absolute value for minor threshold crossing processor node 1 */ - unsigned int major_threshold_abs_node1; /* Absolute value for major threshold crossing processor node 1 */ - unsigned int critical_threshold_abs_node1; /* Absolute value for critical threshold crossing processor node 1 */ - unsigned int num_tries ; /* Number of times a resource has to be in - failed or cleared state before sending alarm */ - unsigned int alarm_status ; /* 1 or 0. If it is 0 threshold crossing alarms are not sent */ - unsigned int percent ; /* 1 or 0. If it is 1, the percentage is used, otherwise if 0, - the absolute value is used for thresholds crossing values */ - unsigned int alarm_type; /* standard, dynamic or static */ - - /* Dynamic Data */ - const char * type ; - const char * device ; - int i ; /* timer array index */ - unsigned int debounce_cnt ; /* running monitor debounce count */ - unsigned int minorlog_cnt ; /* track minor log count for thresholding */ - unsigned int count ; /* track the number of times the condition has been occured */ - bool failed ; /* track if the resource needs to be serviced by the resource handler */ - double resource_value ; /* Usage for the Linux blades: controller, worker and storage */ - double resource_prev ; /* the previous resource_value */ - int sev ; /* The severity of the failed resource */ - rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ - char alarm_id[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised */ - char errorMsg[ERR_SIZE]; - rmon_api_socket_type msg; - bool alarm_raised ; - int failed_send ; /* The number of times the rmon api failed to send a message */ - int mounted ; /* 1 or 0 depending on if the dynamic fs resource is mounted */ - int socket_id ; /* socket id corresponding to a physical processor */ - int response_error_log_throttle; /* log throttle counter for error in receiving response for resource info */ - int parse_error_log_throttle ; /* log throttle counter for failing to parse resource info */ - int key_error_log_throttle ; /* log throttle counter for failing to obtain resource info */ - int resource_monitor_throttle ; /* log throttle for the this resource being monitored */ -} resource_config_type ; - -typedef struct -{ - - /* Config Items */ - unsigned int mask ; - const char * resource ; /* The name of the Resource being monitored */ - const char * severity ; /* MINOR, MAJOR or CRITICAL for each resource */ - unsigned int debounce ; /* Period to wait before clearing alarms */ - unsigned int num_tries ; /* Number of times a resource has to be in - failed or cleared state before sending alarm */ - unsigned int alarm_status ; /* 1 or 0. If it is 0 threshold crossing alarms are not sent */ - - /* Dynamic Data */ - int i ; /* timer array index */ - char interface_one[20] ; /* primary interface */ - char interface_two[20] ; /* second interface if lagged */ - char bond[20] ; /* bonded interface name */ - bool lagged ; /* Lagged interface=true or not=false */ - unsigned int debounce_cnt ; /* running monitor debounce count */ - unsigned int minorlog_cnt ; /* track minor log count for thresholding */ - unsigned int count ; /* track the number of times the condition has been occured */ - bool failed ; /* track if the resource needs to be serviced by the resource handler */ - int resource_value ; /* 1 if the interface is up and 0 if it is down */ - int resource_value_lagged ; /* 1 if the interface is up and 0 if it is down for lagged interfaces */ - int sev ; /* The severity of the failed resource */ - rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ - char int_name[INTERFACE_NAME_LEN] ; /* Name of the tracked interface ex: eth1 */ - char alarm_id[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised */ - char alarm_id_port[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised for the ports */ - char errorMsg[ERR_SIZE]; - rmon_api_socket_type msg; - bool link_up_and_running; /* whether the interface is up or down initially */ - bool interface_used; /* true if the interface is configured */ - bool alarm_raised; - int failed_send; /* The number of times the rmon api failed to send a message */ - - -} interface_resource_config_type ; - -typedef struct -{ - - /* Config Items */ - const char * vg_name ; /* LVM Volume Group name */ - const char * thinpool_name ; /* LVM Thin Pool in VG to monitor */ - unsigned int critical_threshold ; /* critical alarm threshold percentage for metadata utilization, - 0 to disable monitoring*/ - unsigned int alarm_on ; /* 1 or 0. 1 to enable critical alarm, 0 to disable it */ - unsigned int autoextend_on ; /* 1 or 0. 1 to first try extending the metadata before - raising alarm, 0 for autoextend off */ - unsigned int autoextend_by ; /* autoextend by percentage or absolute value in MiB */ - unsigned int autoextend_percent ; /* use percent or MiB in autoexent_by */ - unsigned int audit_period ; /* frequency at which resources are polled, in seconds */ - - /* Dynamic Data */ - bool section_exists ; /* will be 1 if [THINMDA_CONFIG_SECTION] section is defined in - configuration file */ - double resource_value ; /* metadata usage percent */ - double resource_prev ; /* the previous value */ - bool alarm_raised ; /* track if alarm is raised to avoid re-raising */ - bool first_run ; /* to check for state consistency on first run */ - rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ - -} thinmeta_resource_config_type; - -/** Daemon Service messaging socket control structure **/ -typedef struct -{ - int rmon_tx_sock; /**< RMON API Tx Socket */ - int rmon_tx_port; /**< RMON API Tx Port */ - struct sockaddr_in rmon_tx_addr; /**< RMON API Tx Address */ - int rmon_rx_sock; /**< RMON API Rx Socket */ - int rmon_rx_port; /**< RMON API Rx Port */ - struct sockaddr_in rmon_rx_addr; /**< RMON API Rx Address */ - int netlink_sock; /**< Netlink event socket */ - int ioctl_sock; - msgSock_type mtclogd ; -} rmon_socket_type ; -rmon_socket_type * rmon_getSock_ptr ( void ); - -typedef struct -{ char resource[50]; - char registered_not[NOT_SIZE] ; /* The api notification the client has registerted for */ - char client_name[NOT_SIZE] ; /* The api notification the client has registerted for */ - - /** RMON API socket */ - /* ------------------------------------ */ - rmon_api_socket_type msg ; /**< Resource monitoring messaging interface */ - - /* RMON API Dynamic Data */ - /* ------------------------------ */ - bool resource_failed ; /**< resource monitoring failed signal */ - unsigned int tx_sequence ; /**< outgoing sequence number */ - unsigned int rx_sequence ; /**< incoming sequence number */ - bool waiting ; /**< waiting for response */ - int port ; - unsigned int msg_count ;/**< running pulse count */ - unsigned int b2b_miss_peak ; /**< max number of back to back misses */ - unsigned int b2b_miss_count ; /**< current back to back miss count */ - unsigned int afailed_count ; /**< total resouce mon'ing failed count */ - unsigned int recv_err_cnt ; /**< counts the receive errors */ - unsigned int send_err_cnt ; /**< counts the transmit errors */ - unsigned int send_msg_count ; /**< number of messages sent */ - unsigned int mesg_err_cnt ; /**< response message error count */ - unsigned int mesg_err_peak ; /**< response message error count */ - unsigned int adebounce_cnt ; /**< resource monitor debounce counter */ - bool resource_debounce ; /**< true = in resource mon'ing debounce */ - rmon_socket_type rx_sock ; /* rx socket for that client */ - -} registered_clients; - -void rmon_msg_init ( void ); -void rmon_msg_fini ( void ); -int setup_tx_port ( const char * iface , const char * mcast , int port ); -int rmon_send_event ( unsigned int event_cmd , const char * process_name_ptr ); - - -/* Note: Any addition to this struct requires explicit - * init in daemon_init. - * Cannot memset a struct contianing a string type. - **/ -typedef struct -{ - /* iface attributes ; hostname, ip, audit period and mac address */ - char my_hostname [MAX_HOST_NAME_SIZE+1]; - string my_macaddr ; - int audit_period ; /* Frequency at which resources are polled */ - int pm_period ; /* Frequency at which ceilometer PM's are created */ - int ntp_audit_period; /* Frequency at which we check if the NTP servers are still reachable */ - int ntpq_cmd_timeout; /* Max amount of time in seconds to wait for the ntpq command to complete */ - string my_address ; - int resources ; /**< Number of Monitored resources */ - int interface_resources ; /**< Number of monitored interface resources */ - int thinmeta_resources; /**< Number of monitored thinpool metadata resources */ - int per_node ; /* Memory checking per node enabled: 1 or disabled: 0 */ - int clients ; - int rmon_critical_thr ; - int fd; /* Used for inotify */ - int wd; /* Used for inotify */ - - unsigned int function ; - unsigned int subfunction ; - unsigned int nodetype ; - -} rmon_ctrl_type ; - -bool is_controller ( void ); - -/* Init tx message */ -void rmon_msg_init ( void ); - -/* Delete tx message */ -void rmon_msg_fini ( void ); - -/* Initizialize the settings from the rmond.conf file */ -int rmon_hdlr_init ( rmon_ctrl_type * ctrl_ptr ); - -/* Initialize the timers */ -void rmon_timer_init( void ); - -/* Service client register and deregister requests - * when rmon was not alive */ -void rmon_alive_notification (int & clients); - -/* Service inbox when rmon is born */ -int rmon_service_file_inbox ( int clients, char buf[RMON_MAX_LEN], bool add=true ); - -/* rmon_api functions */ -int rmon_service_inbox ( int clients ); - -/* Send set or clear alarm notification to registered clients */ -int rmon_send_request ( resource_config_type * ptr, int clients); - -/* send rmon interface resource set and clear alarm messages to registered client processes */ -int send_interface_msg ( interface_resource_config_type * ptr, int clients); - -/* Init rmon api tx and rx ports */ -int rmon_port_init ( int tx_port ); - -/* Main loop to poll and handle resource monitoring */ -void rmon_service (rmon_ctrl_type * ctrl_ptr); - -/* Update the number of registered clients */ -void update_total_clients (int total_clients); - -/* Add a registered client to the list of clients */ -void add_registered_client (registered_clients client); - -/* Read in the per resource specific thresholds */ -int rmon_resource_config ( void * user, - const char * section, - const char * name, - const char * value); - -/* Read in the per interface resource specific values */ -int rmon_interface_config ( void * user, - const char * section, - const char * name, - const char * value); - -/* Read in LVM Thinpool metadata resource specific values */ -int rmon_thinmeta_config ( void * user, - const char * section, - const char * name, - const char * value); - -/* Returns a registered client at a given index */ -registered_clients * get_registered_clients_ptr ( int index ); - -/* read the dynamic file systems file and send a response back */ -void process_dynamic_fs_file(); - -/* send the notification that the file has been read */ -int rmon_resource_response ( int clients ); - -/* Updates the interface data structure with the state (up or down) of the interface */ -void check_interface_status( interface_resource_config_type * ptr ); - -/* Check if the node is a worker node */ -bool check_worker(); - -/* Handle failed platform interfaces */ -void interface_handler( interface_resource_config_type * ptr ); - -/* Handle LVM thinpool metadata usage */ -int thinmeta_handler( thinmeta_resource_config_type * ptr ); - -/* Compute the thinpool metadata usage for a specific LVM thinpool */ -int calculate_metadata_usage(thinmeta_resource_config_type * ptr); - -/* Returns the reference to the rmon control pointer */ -rmon_ctrl_type * get_rmon_ctrl_ptr (); - -/* Initialize LVM Thin Pool Metadata monitoring */ -void thinmeta_init(thinmeta_resource_config_type * res, struct mtc_timer * timers, int count); - -/* Clears any previously raised interface alarms if rmon is restarted */ -void interface_alarming_init ( interface_resource_config_type * ptr ); - -/* Map an interface (mgmt, oam or infra) to a physical port */ -void init_physical_interfaces ( interface_resource_config_type * ptr ); - -/* returns true if the link is up for the specified interface */ -int get_link_state ( int ioctl_socket, char iface[20], bool * running_ptr ); - -/* Service state changes for monitored interfaces */ -int service_interface_events ( int nl_socket , int ioctl_socket ); - -/* Set the interface resource in the correct state for the interface resource handler */ -void service_resource_state ( interface_resource_config_type * ptr ); - -/* Get the interface resource by index */ -interface_resource_config_type * get_interface_ptr ( int index ); - -/* Get the resource by index */ -resource_config_type * get_resource_ptr ( int index ); - -/* Resource monitor handler cleanup */ -void rmon_hdlr_fini ( rmon_ctrl_type * ctrl_ptr ); - -void build_entity_instance_id ( resource_config_type *ptr, char *entity_instance_id); - -/* Resource monitor FM interface */ -void rmon_fm_init ( void ); -void rmon_fm_handler ( void ); -EFmErrorT rmon_fm_set ( const SFmAlarmDataT *alarm, fm_uuid_t *fm_uuid ); -EFmErrorT rmon_fm_clear ( AlarmFilter *alarmFilter ); -EFmErrorT rmon_fm_get ( AlarmFilter *alarmFilter, SFmAlarmDataT **alarm, unsigned int *num_alarm ); - -/* Save dynamic memory resource (both system memory and AVS memory) */ -int save_dynamic_mem_resource ( string resource_name, string criticality, - double r_value, int percent, int abs_values[3], - const char * alarm_id, int socket_id /*=0*/ ); - -/* Resource failure processing for percentage based thresholds */ -void process_failures ( resource_config_type * ptr ); -/* Resource failure processing for absolute based thresholds */ -void process_failures_absolute ( resource_config_type * ptr ); - - -// convert Severity level into literal defination -static inline string FmAlarmSeverity_to_string(EFmAlarmSeverityT severity) -{ - switch (severity) { - case FM_ALARM_SEVERITY_CLEAR: - return "clear"; - case FM_ALARM_SEVERITY_WARNING: - return "warning"; - case FM_ALARM_SEVERITY_MINOR: - return "minor"; - case FM_ALARM_SEVERITY_MAJOR: - return "major"; - case FM_ALARM_SEVERITY_CRITICAL: - return "critical"; - default: - return NULL; - } -} - -/**************************************************************************** - * - * Name : log_value - * - * Purpose : Log resource state values while avoiding log flooding for - * trivial fluxuations. - * - * Description: Recommends whether the current resource state value should - * be logged based on current, previous and step values. - * - * Caller should not generate such log if a false is returned. - * - * A true is returned if the currrent and previous resource values differ - * by +/- step amount. - * - * The caller specifies the step that can be overridden by a smaller value - * in rmond.conf:log_step value. - * - * If step is zero then a true is always returned in support of a debug mode - * where we get the current reading as a log on every audit. - * - * The callers previous value is updated to current whenever true is returned. - * - ****************************************************************************/ - -/* a default step value ; change of + or - 5 triggers log */ -#define DEFAULT_LOG_VALUE_STEP (5) - -bool log_value ( double & current, double & previous, int step ); - - -#endif /* __INCLUDE_RMON_HH__ */ diff --git a/mtce/src/rmon/rmonApi.h b/mtce/src/rmon/rmonApi.h deleted file mode 100644 index 70225e14..00000000 --- a/mtce/src/rmon/rmonApi.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef __INCLUDE_RMONAPI_H__ -#define __INCLUDE_RMONAPI_H__ -/* - * Copyright (c) 2013, 2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -#include -#include - -#include "rmonHttp.h" - - /** - * @file - * Wind River CGTS Platform - * - * rmon API Header - - */ - -#define RMON_PUT_VSWITCH_OPER_LABEL "v1" -#define RMON_PUT_VSWITCH "/engine/stats" - - - -/* Poll request is a GET operation that looks like this ... - * - * http://localhost:9000/v1/engine/stats - * The following defines are used to help construct that request - * - */ - - -/** Initializes the module */ -int rmonApi_init ( string ip, int port ); - -/** Frees the module's dynamically allocated resources */ -void rmonApi_fini ( void ); - - -/**remote logging service request handlers */ - -void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ); -int rmonHdlr_remotelogging_query (resource_config_type * ptr); - -/**ceilometer sample create request handlers */ -void rmonHdlr_ceilometer_handler ( struct evhttp_request *req, void *arg ); -#endif diff --git a/mtce/src/rmon/rmonApi/Makefile b/mtce/src/rmon/rmonApi/Makefile deleted file mode 100644 index 486d3634..00000000 --- a/mtce/src/rmon/rmonApi/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2014-2018 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -SRCS = rmon_api.c -OBJS = $(SRCS:.c=.o) -INCLUDES = -I. -LDFLAGS = -shared -CFLAGS = -fPIC -g -O2 -Wall -Wextra -Werror -TARGET_LIB = librmonapi.so -lib: build -ifeq (,$(shell which ${CC})) -CC=gcc -endif - -STATIC_ANALYSIS_TOOL = cppcheck -STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) - -.c.o: - $(CC) $(INCLUDES) $(CFLAGS) $(EXTRACCFLAGS) -c $< -o $@ - -static_analysis: -ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) - $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h -else - echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." -endif - -build: static_analysis ${TARGET_LIB} - -${TARGET_LIB}: ${TARGET_LIB}.${VER_MJR} - ln -sf $^ $@ - -${TARGET_LIB}.${VER_MJR}: ${TARGET_LIB}.${VER} - ln -sf $^ $@ - -${TARGET_LIB}.${VER}: $(OBJS) - $(CC) ${LDFLAGS} -Wl,-soname,${TARGET_LIB}.${VER_MJR} -o $@ $^ - -$(SRCS:.c=.d):%.d:%.c - $(CC) $(CFLAGS) -MM $< >$@ - -include $(SRCS:.c=.d) - -clean: - @rm -v -f *.o - @rm -v -f *.so - @rm -v -f *.so.* - @rm -v -f *.d diff --git a/mtce/src/rmon/rmonApi/rmon_api.c b/mtce/src/rmon/rmonApi/rmon_api.c deleted file mode 100644 index b6782d7e..00000000 --- a/mtce/src/rmon/rmonApi/rmon_api.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGCS Platform Resource Monitor Client Notification API Library - * See rmon_api.h for API header. - * - **/ - -#include "rmon_api.h" -#include - -/* Pass code */ -#ifndef PASS -#define PASS (0) -#endif - -/* Fail Code */ -#ifndef FAIL -#define FAIL (1) -#endif - -/* Retry Code */ -#ifndef RETRY -#define RETRY (2) -#endif - -/* maximum string and socket endpoint path length */ -#define RMON_MAX_LEN (100) - -/* initialization signature to gate functional - * api calls made prior to initialization */ -#define INIT_SIG (0xffffdead) - -/* rmon default messaging port */ -#define RMONTXPORT 2300 - -/** Control Structure */ -typedef struct -{ - unsigned int init ; /**< Init signature */ - - int client_rx_sock ; /**< inet pulse request rx socket */ - int client_rx_port ; /**< inet pulse request rx port number */ - struct sockaddr_in client_rx_addr ; /**< inet pulse request rx attributes */ - char client_rx_buf[RMON_MAX_LEN] ; - - int rmon_tx_sock ; /**< inet pulse response tx socket */ - int rmon_tx_port ; /**< inet pulse response tx port number */ - struct sockaddr_in rmon_tx_addr ; /**< inet pulse response tx attributes */ - char rmon_tx_buf[RMON_MAX_LEN] ; - - int rmon_rx_sock ; /**< inet pulse response rx socket */ - int rmon_rx_port ; /**< inet pulse response rx port number */ - struct sockaddr_in rmon_rx_addr ; /**< inet pulse response rx attributes */ - char rmon_rx_buf[RMON_MAX_LEN] ; - - char name[RMON_MAX_LEN] ; /**< name of process using this instance */ - - bool debug_mode ; /**< debug mode if true */ - int fit_code ; /**< fit code MAGIC, SEQ, PROCESS */ -} resource_mon_socket_type ; - -/* Instance Control Structure - Per Process Private Data */ -static resource_mon_socket_type rmon ; -/* Mutex For sending client process information to rmon */ -pthread_mutex_t client_mutex; - -int remove_rmon_client( const char * process_name_ptr, int socket ); - -int add_rmon_client ( const char * process_name_ptr, int port , const char * registration, int rx_port); - -int resource_monitor_initialize ( const char * process_name_ptr, int port , const char * registration); - -int resource_monitor_deregister( const char * process_name_ptr, int socket ); - -int resource_monitor_get_sel_obj ( void ); - -int remove_rmon_client( const char * process_name_ptr, int socket ); - -void resource_monitor_finalize (); - -int create_tx_socket(); - -/* Create and Setup Inet Transmit Socket - * return PASS (0) on success - * -# on kernel call error - * non-zero on internal error - * - **/ -int create_tx_socket( int rx_port ) -{ - int val = 1 ; - int ok = 1 ; - - rmon.rmon_tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( 0 >= rmon.rmon_tx_sock ) - { - syslog ( LOG_ERR, "create_tx_socket failed to create 'tx' socket (%d:%m)", errno ); - return (-errno); - } - - if ( setsockopt ( rmon.rmon_tx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) - { - syslog ( LOG_WARNING, "create_tx_socket failed to set 'tx' socket as reusable (%d:%m)", errno ); - } - - /* Setup with localhost ip */ - memset(&rmon.rmon_tx_addr, 0, sizeof(struct sockaddr_in)); - rmon.rmon_tx_addr.sin_family = AF_INET ; - rmon.rmon_tx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - rmon.rmon_tx_addr.sin_port = htons(RMONTXPORT) ; - rmon.rmon_tx_port = RMONTXPORT ; - - /* Set socket to be non-blocking. */ - int rc = ioctl(rmon.rmon_tx_sock, FIONBIO, (char *)&ok); - if ( 0 > rc ) - { - syslog ( LOG_WARNING, "create_tx_socket failed to set 'tx' socket as non-blocking (%d:%m)\n", errno ); - } - - /* if the sock is already open then close it first */ - if ( rmon.rmon_rx_sock ) - { - close (rmon.rmon_rx_sock); - } - - rmon.rmon_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( 0 >= rmon.rmon_rx_sock ) - { - syslog ( LOG_WARNING, "create_rx_socket failed (%d:%m)\n", errno ); - return (-errno); - } - if ( setsockopt ( rmon.rmon_rx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) - { - syslog ( LOG_WARNING, "create_tx_socket failed to set 'rx' socket as reusable (%d:%m)", errno ); - } - - /* Setup with localhost ip */ - memset(&rmon.rmon_rx_addr, 0, sizeof(struct sockaddr_in)); - rmon.rmon_rx_addr.sin_family = AF_INET ; - rmon.rmon_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - rmon.rmon_rx_addr.sin_port = htons(rx_port) ; - rmon.rmon_rx_port = rx_port ; - - /* Set socket to be non-blocking. */ - rc = ioctl(rmon.rmon_rx_sock, FIONBIO, (char *)&ok); - if ( 0 > rc ) - { - syslog ( LOG_ERR, "create_tx_socket failed to set 'rx' socket as non-blocking (%d:%m)\n", errno ); - return -errno; - } - /* bind socket to the receive addr */ - if ( bind ( rmon.rmon_rx_sock, (const struct sockaddr *)&rmon.rmon_rx_addr, sizeof(struct sockaddr_in)) == -1 ) - { - syslog ( LOG_ERR, "failed to bind rmon 'rx' socket with port %d (%d:%m)\n", rx_port, errno ); - close (rmon.rmon_rx_sock); - rmon.rmon_rx_sock = 0 ; - return -errno; - } - return PASS; -} - -/* open lo socket */ -int add_rmon_client ( const char * process_name_ptr, int port , const char * registration, int rx_port) -{ - struct stat p ; - int val = 1 ; - memset ( &rmon, 0, sizeof(rmon)); - memset ( &p, 0 , sizeof(struct stat)); - - if ( registration == NULL ) - { - syslog ( LOG_INFO, "resource_monitor_initialize called with null registration info"); - return (0); - } - - syslog ( LOG_INFO , "Add Client '%s' to rmon (port:%d)\n", registration, port ); - - sprintf ( rmon.name, "/var/run/%s.rmon", process_name_ptr ); - - stat ( rmon.name, &p ) ; - if ((p.st_ino != 0 ) && (p.st_dev != 0)) - { - rmon.debug_mode = true ; - syslog ( LOG_INFO, "Enabling resource Monitor Debug Mode\n"); - if ( p.st_size ) - { - FILE * filename = fopen ( rmon.name, "rb" ) ; - if ( filename != NULL ) - { - memset ( &rmon.name, 0, RMON_MAX_LEN); - if ( fgets ( rmon.name, 20, filename ) != NULL ) - { - if ( !strncmp ( rmon.name, FIT_MAGIC_STRING, strlen (FIT_MAGIC_STRING))) - { - rmon.fit_code = FIT_MAGIC ; - syslog ( LOG_INFO, "Enabling FIT on 'magic calculation'\n"); - } - else if ( !strncmp ( rmon.name, FIT_SEQUENCE_STRING, strlen(FIT_SEQUENCE_STRING))) - { - rmon.fit_code = FIT_SEQ ; - syslog ( LOG_INFO, "Enabling FIT on 'sequence number'\n"); - } - else if ( !strncmp ( rmon.name, FIT_PROCESS_STRING, strlen(FIT_PROCESS_STRING))) - { - rmon.fit_code = FIT_PROCESS ; - syslog ( LOG_INFO, "Enabling FIT on 'process name'\n"); - } - else - { - syslog ( LOG_INFO, "Unsupported FIT string (%s)\n", rmon.name ); - } - } - fclose (filename); - } - else - { - syslog ( LOG_INFO, "Failed to open %s\n", rmon.name); - } - } - } - /* Init the control struct - includes all members */ - memset ( rmon.name, 0, RMON_MAX_LEN); - - if ( process_name_ptr ) - { - memcpy ( rmon.name, process_name_ptr, strlen (process_name_ptr)) ; - } - else - { - syslog ( LOG_INFO, "resource_monitor_initialize called with null process name"); - return (0); - } - - /*******************************************************/ - /* Create and Setup Inet Receive Socket */ - /*******************************************************/ - rmon.client_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( 0 >= rmon.client_rx_sock ) - { - syslog ( LOG_INFO, "add_rmon_client error:1"); - return (0); - } - - if ( setsockopt ( rmon.client_rx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) - { - syslog ( LOG_INFO, "%s failed to set socket as re-useable (%d:%s)\n", - process_name_ptr, errno, strerror(errno)); - } - - /* Setup with localhost ip */ - memset(&rmon.client_rx_addr, 0, sizeof(struct sockaddr_in)); - rmon.client_rx_addr.sin_family = AF_INET ; - rmon.client_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - rmon.client_rx_addr.sin_port = htons(port) ; - rmon.client_rx_port = port ; - - /* bind socket to the receive addr */ - if ( bind ( rmon.client_rx_sock, (const struct sockaddr *)&rmon.client_rx_addr, sizeof(struct sockaddr_in)) == -1 ) - { - syslog ( LOG_ERR, "failed to bind to rx socket with port %d\n", port ); - close (rmon.client_rx_sock); - rmon.client_rx_sock = 0 ; - return (0); - - } - - int rc = create_tx_socket ( rx_port ); - - if (rc != PASS ) - { - syslog ( LOG_ERR, "add_rmon_client failed to create_tx_socket (rc:%d)", rc ); - return (0); - } - if ((registration != NULL) && (rc == PASS)) - { - int bytes = 0; -#ifdef WANT_CLIENT_REGISTER_SOCKET_SEND - socklen_t len = sizeof(struct sockaddr_in) ; - - /* client registering, send rmon the resources registered for */ - memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); - snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", process_name_ptr, registration, port); - bytes = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, - (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); - fd_set readfds; - struct timeval waitd; - bytes = 0; - - FD_ZERO(&readfds); - FD_SET(rmon.rmon_rx_sock, &readfds); - - waitd.tv_sec = WAIT_DELAY; - waitd.tv_usec = 0; - /* This is used as a delay up to select_timeout */ - select(FD_SETSIZE, &readfds, NULL, NULL, &waitd); - - if (FD_ISSET(rmon.rmon_rx_sock, &readfds)) - { - /* wait for the response from rmon to verify that the client is registered */ - memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); - rmon.rmon_rx_buf[0] = 0; - bytes = recvfrom( rmon.rmon_rx_sock, rmon.rmon_rx_buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon.rmon_rx_addr, &len ); - } -#endif - if (bytes <= 0) { - /* no respone, write the client name and notification to a file for later use */ - FILE * pFile; - memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); - snprintf(rmon.rmon_rx_buf, sizeof(rmon.rmon_rx_buf), "%s %s %d", process_name_ptr, registration, port); - pFile = fopen (RMON_API_REG_DIR , "a+"); - if ( pFile ) - { - // take out a writer lock on this file to - // ensure that no other entity is writing to it - // at this time - int lock = flock(fileno(pFile), LOCK_EX); - if (lock < 0) { - syslog (LOG_ERR, "Failed to get exclusive lock on" - " '%s' (errno: %d)", RMON_API_REG_DIR, errno); - } else { - fprintf(pFile, "%s\n", rmon.rmon_rx_buf); - // release write lock - flock(fileno(pFile), LOCK_UN); - } - fclose(pFile); - } - else - { - syslog ( LOG_ERR, "Failed to open '%s'\n", RMON_API_REG_DIR ); - } - } - else - { - syslog ( LOG_ERR, "add_rmon_client send message succeeded"); - } - - /* Set init sig */ - rmon.init = INIT_SIG ; - - /* Return the socket descriptor */ - return (rmon.client_rx_sock); - } - else - { - syslog ( LOG_ERR, "Failed register due to previous failure\n"); - } - return (0); -} - -int rmon_notification ( const char * notification_name ) -{ - int port = RMONTXPORT; - int rc; - - /* send the message to check the dynamic file */ - memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); - snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", notification_name, RESOURCE_NOT, port); - rc = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, - (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); - return rc; -} - -int resource_monitor_initialize ( const char * process_name_ptr, int port , const char * registration) -{ - /* use a mutex to prevent multiple clients from registering at once */ - int clt_rx_sock; - int rx_port = port - 1; - - pthread_mutex_lock(&client_mutex); - clt_rx_sock = add_rmon_client(process_name_ptr, port , registration, rx_port ); - pthread_mutex_unlock(&client_mutex); - - return clt_rx_sock; - -} - -int resource_monitor_deregister( const char * process_name_ptr, int socket ) -{ - /* use a mutex to prevent multiple clients from de-registering at once */ - int rc; - pthread_mutex_lock(&client_mutex); - rc = remove_rmon_client(process_name_ptr, socket); - pthread_mutex_unlock(&client_mutex); - - return rc; -} - - -/* */ -int resource_monitor_get_sel_obj ( void ) -{ - if (( rmon.init != INIT_SIG ) || ( rmon.client_rx_sock <= 0 )) - { - syslog (LOG_WARNING , "'%s' called with invalid init (sock:%d)\n", - __FUNCTION__, rmon.client_rx_sock); - } - - return (rmon.client_rx_sock); -} - -int remove_rmon_client( const char * process_name_ptr, int socket ) -{ - int rc; - int port = RMONTXPORT; - int bytes; - socklen_t len = sizeof(struct sockaddr_in); - - /* client deregistering, send rmon the client process name */ - memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); - snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", process_name_ptr, CLR_CLIENT, port); - rc = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, - (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); - sleep(WAIT_DELAY); - /* wait for the response from rmon to verify that the client is de-registered */ - memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); - rmon.rmon_rx_buf[0] = 0; - bytes = recvfrom( rmon.rmon_rx_sock, rmon.rmon_rx_buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon.rmon_rx_addr, &len); - - if ((bytes <= 0) || (rmon.rmon_rx_buf[0] == 0)) { - - FILE * pFile; - memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); - snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", - process_name_ptr, CLR_CLIENT, port); - pFile = fopen (RMON_API_DEREG_DIR , "a+"); - if (pFile) { - // take out a writer lock on this file to - // ensure that no other entity is writing to it - // at this time - int lock = flock(fileno(pFile), LOCK_EX); - if (lock < 0) { - syslog (LOG_ERR, "Failed to get exclusive lock on" - " '%s' (errno: %d)", RMON_API_DEREG_DIR, errno); - } else{ - fprintf(pFile, "%s\n", rmon.rmon_rx_buf); - // release the lock - flock(fileno(pFile), LOCK_UN); - } - fclose(pFile); - } else { - syslog (LOG_ERR, "Failed to open '%s'\n", - RMON_API_DEREG_DIR ); - } - } - - if ( socket ) - { - /* close the client receive port */ - close (socket); - } - rc = PASS ; - - return rc; -} - -/* close the rmon ports */ -void resource_monitor_finalize () -{ - if ( rmon.rmon_tx_sock ) - { - close (rmon.rmon_tx_sock); - } - if ( rmon.rmon_rx_sock ) - { - close (rmon.rmon_rx_sock); - } - -} diff --git a/mtce/src/rmon/rmonApi/rmon_api.h b/mtce/src/rmon/rmonApi/rmon_api.h deleted file mode 100644 index eea51d28..00000000 --- a/mtce/src/rmon/rmonApi/rmon_api.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2014, 2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGCS Platform Resource Monitor Client Notification API Library Header - */ - -#include /* for ... snprintf */ -#include /* for ... unlink, close and usleep */ -#include /* for ... socket */ -#include /* for ... domain socket type */ -#include /* for ... inet socket type */ -#include /* for ... inet_addr, inet_ntoa macro */ -#include /* for ... syslog */ -#include /* for ... EINTR, errno, strerror */ -#include /* for ... true and false */ -#include /* for ... file stat */ -#include /* for ... mutual exclusion */ -#include -#include -/** - - * This is a convenience module in support of resource monitoring notificiations to - * client processes - * - * Packaged as a shared library that processes can link to. - * - * This module provides four simple interfaces to that provide the following general functions - * - * - open an abstract socket interface for resource monitoring messaging - * - return the socket file descriptor for event driven selection - * - service events on socket - * - close the socket when done - * - * *Interfaces including work flow are* - * - * Init: - * - * resource_monitor_initialize ( "testClient" , 2302, CPU_USAGE ); - * - * Setup event driven handling: - * - * int resource_monitor_socket = resource_monitor_get_sel_obj(); - * FD_SET( resource_monitor_socket, &readfds); - * - * Main loop: - * - * if ( FD_ISSET(resource_monitor_socket, &readfds)) - * resource_monitor_dispatch (); - * - * Exit: - * - * resource_monitor_deregister("testClient", 2302); - * - */ - -/** Initialize the library and open the messaging socket(s). - * - * Creates socket and binds to named endpoint. - * - * Prints status or errors to syslog. - * - * @param process_name_ptr - char pointer to string containing monitored process name - * @param port - integer specifying the port number this process is listening on - * - * @returns The socket file descriptor on success or negative version of - * standard Linux error numbers (errno) codes from socket(2) or bind(2) - * - **/ - - -/* Notification resource types */ -#define CPU_USAGE ((const char *)"cpuUsage") -#define MEMORY_USAGE ((const char *)"memoryUsage") -#define FS_USAGE ((const char *)"fsUsage") -#define ALL_USAGE ((const char *)"allUsage") -#define CLR_CLIENT ((const char *)"clearClient") -#define RESOURCE_NOT ((const char *)"resourceNotification") -#define NOT_SIZE (100) -#define ERR_SIZE (100) -#define MAX_ERR_CNT (5) -/** Supplies the messaging socket file descriptor. - * - * @returns The created socket file descriptor for event driven select - * or zero if initialize was not called of there was error creating - * the socket. A notification message is sent to rmon to tell it that a new client - * is registering for a notification of type resource. From then on, rmon will send - * alarm set and clear messages for that resource to the process until it deregisters. - **/ -int resource_monitor_initialize ( const char * process_name_ptr, int port, const char * resource ); - - -int rmon_notification ( const char * notification_name ); - -/* returns the client socket fd */ -int resource_monitor_get_sel_obj ( void ); - -/** Close the rmon tx socket */ -void resource_monitor_finalize ( void ); - -/** Debug mode is enabled if the following file is found during initialize - * - * /var/run/.debug - * - * Failt Insertion Mode is enabled if the first word of line one - * of this file contains one of the following words - * - * sequence - corrupt the sequence number returned - * magic - corrupt the magic number returned - * process - corrupt the process name returned - * - */ - - -/* Deregister a client process from rmon notifications */ -int resource_monitor_deregister( const char * process_name_ptr, int socket ); - -/** FAult Insertion Mode Strings */ -#define FIT_MAGIC_STRING "magic" -#define FIT_SEQUENCE_STRING "sequence" -#define FIT_PROCESS_STRING "process" - -/** Fault Insertion Codes */ -#define FIT_NONE 0 -#define FIT_MAGIC 1 -#define FIT_SEQ 2 -#define FIT_PROCESS 3 -#define WAIT_DELAY (3) -#define PASS (0) -#define FAIL (1) - -/* location of file for registering clients */ -#define RMON_API_REG_DIR ((const char *)"/etc/rmonapi.d/register.txt") -/* location of file for deregistering clients */ -#define RMON_API_DEREG_DIR ((const char *)"/etc/rmonapi.d/deregister.txt") -/* location of file for the current registered clients */ -#define RMON_API_ACTIVE_DIR ((const char *)"/etc/rmonapi.d/active.txt") diff --git a/mtce/src/rmon/rmonApi/rmon_nodeMacro.h b/mtce/src/rmon/rmonApi/rmon_nodeMacro.h deleted file mode 100644 index 421c717a..00000000 --- a/mtce/src/rmon/rmonApi/rmon_nodeMacro.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -* Copyright (c) 2013-2014 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* -*/ - - -#define CREATE_REUSABLE_INET_UDP_TX_SOCKET(ip, port, s, a, p, l, n, rc) \ -{ \ - int on = 1 ; \ - s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ - if ( 0 >= s ) \ - { \ - printf ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ - rc = FAIL_SOCKET_CREATE ; \ - } \ - else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ - { \ - printf ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ - close(s); \ - s = 0 ; \ - rc = FAIL_SOCKET_OPTION ; \ - } \ - else \ - { \ - memset(&a, 0, sizeof(struct sockaddr_in)); \ - l = sizeof(a); \ - p = port ; \ - a.sin_family = AF_INET ; \ - a.sin_addr.s_addr = inet_addr(ip); \ - a.sin_port = htons(p) ; \ - } \ -} - -#define CREATE_NONBLOCK_INET_UDP_RX_SOCKET(ip, port, s, a, p, l, n, rc) \ -{ \ - int on = 1 ; \ - s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ - if ( 0 >= s ) \ - { \ - printf ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ - rc = FAIL_SOCKET_CREATE ; \ - } \ - else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ - { \ - printf ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ - close(s); \ - s = 0 ; \ - rc = FAIL_SOCKET_OPTION ; \ - } \ - else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ - { \ - printf ("failed to set '%s' socket non-blocking (%d:%s)\n", n, errno, strerror(errno)); \ - close(s); \ - s = 0 ; \ - rc = FAIL_SOCKET_NOBLOCK ; \ - } \ - else \ - { \ - memset(&a, 0, sizeof(struct sockaddr_in)); \ - l = sizeof(a); \ - p = port ; \ - a.sin_family = AF_INET ; \ - a.sin_addr.s_addr = inet_addr(ip); \ - a.sin_port = htons(p) ; \ - if ( bind ( s, (const struct sockaddr *)&a, sizeof(struct sockaddr_in)) == -1 ) \ - { \ - printf ( "failed to bind '%s' socket with port %d\n", n, p ); \ - close (s); \ - s = 0 ; \ - rc = -errno; \ - } \ - printf ("Listening on '%s' socket %s port %d\n", n, inet_ntoa(a.sin_addr), p); \ - } \ -} diff --git a/mtce/src/rmon/rmonApi/tests/Makefile b/mtce/src/rmon/rmonApi/tests/Makefile deleted file mode 100644 index fdfea4b4..00000000 --- a/mtce/src/rmon/rmonApi/tests/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) 2014-2018 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -SRCS = rmon_api_client_test.cpp -OBJS = $(SRCS:.cpp=.o) -LDLIBS = -lstdc++ -lrmonapi -INCLUDES = -I../ -I. -CCFLAGS = -g -O2 -Wall -Wextra -Werror - -all: build - -.cpp.o: - $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ - -build: $(OBJS) - $(CXX) $(CCFLAGS) $(OBJS) -L../ -L/usr/lib64/ $(LDLIBS) -o rmond_api_test - -clean: - @rm -v -f $(OBJ) rmond_api_test *.o *.a diff --git a/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.cpp b/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.cpp deleted file mode 100644 index fb7400e0..00000000 --- a/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2014-2015 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGTS Platform Resource Monitor API Test Client - */ -/* - *This simulates a test client process to test out the rmon client notification - *api. To run: ./rmond_api_test - *If left blank it runs with the default port: 2302 and default process name. When testing - *with more than one client test process, these values must be entered. For help: - *./rmond_api_test --help - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -#include "../rmon_nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ -#include "rmon_api_client_test.h" - -extern "C" -{ - #include "../rmon_api.h" -} -#define MAX_HOST_NAME_SIZE (100) -#define FAIL_SOCKET_INIT -1 -#define FAIL_SOCKET_CREATE -2 -#define PASS 0 -#define FAIL 1 -#define LOOPBACK_IP "127.0.0.1" -#define RX_PORT 2302 -static char my_hostname [MAX_HOST_NAME_SIZE+1]; - - -/** - * Messaging Socket Control Struct - The allocated struct - * @see rmon_api_client_test.h for rmon_socket_type struct format. - */ -static rmon_socket_type rmon_sock ; -static rmon_socket_type * sock_ptr ; - -/** Client Config mask */ -#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ - CONFIG_CLIENT_API_PORT |\ - CONFIG_CLIENT_PORT) - - - - -/****************************/ -/* Initialization Utilities */ -/****************************/ - -/* Initialize the unicast api response message */ -/* One time thing ; tx same message all the time. */ -int rmon_message_init ( void ) -{ - /* Build the transmit api response message */ - memset ( &sock_ptr->tx_message, 0, sizeof (rmon_message_type)); - memcpy ( &sock_ptr->tx_message.m[RMON_HEADER_SIZE], my_hostname, strlen(my_hostname)); - return (PASS); -} - -int rmon_socket_init ( int port, const char * process_name ) -{ - - int on = 1 ; - int rc = PASS ; - - - /***********************************************************/ - /* Setup the RMON API Message Receive Socket */ - /***********************************************************/ - - CREATE_NONBLOCK_INET_UDP_RX_SOCKET ( LOOPBACK_IP, - port, - rmon_sock.rmon_api_sock, - rmon_sock.rmon_api_addr, - rmon_sock.rmon_api_port, - rmon_sock.rmon_api_len, - "rmon api socket receive", - rc ); - if ( rc ) return (rc) ; - - /* Open the active monitoring socket */ - rmon_sock.rmon_socket = resource_monitor_initialize ( process_name, port, ALL_USAGE ); - printf("Resource Monitor API Socket %d\n", rmon_sock.rmon_socket ); - if ( 0 > rmon_sock.rmon_socket ) - rmon_sock.rmon_socket = 0 ; - - /* Make the resource monitor api socket non-blocking */ - rc = ioctl(rmon_sock.rmon_socket, FIONBIO, (char *)&on); - if ( 0 > rc ) - { - printf("Failed to set rmon socket non-blocking (%d:%m)\n", errno ); - return (FAIL_SOCKET_NOBLOCK); - } - - - return (PASS); -} - -int daemon_init (int port, const char * process_name ) -{ - int rc = PASS ; - - /* Initialize socket construct and pointer to it */ - memset ( &rmon_sock, 0, sizeof(rmon_sock)); - sock_ptr = &rmon_sock ; - - /* Setup the resmon api rx messaging sockets */ - if ( (rc = rmon_socket_init (port, process_name)) != PASS ) - { - printf ("socket initialization failed (rc:%d)\n", rc ); - rc = FAIL_SOCKET_INIT; - } - return (rc); -} - -#define RMON_MAX_LEN (100) -int client_service_inbox ( const char * process_name) -{ - #define MAX_T 100 - int bytes = 0 ; - char buf[RMON_MAX_LEN] ; - socklen_t len = sizeof(struct sockaddr_in) ; - char str[RMON_MAX_LEN]; - int sequence = 0; - int rc = FAIL; - - do - { - memset ( buf,0,RMON_MAX_LEN); - memset ( str,0,RMON_MAX_LEN); - - bytes = recvfrom( rmon_sock.rmon_socket, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.client_sockAddr, &len); - if ( bytes > 0 ) - { - - sscanf ( buf, "%s %d", str, &sequence ); - if ( str[0] != '\0' ) - { - printf("%s \n",str); - - if (strstr(str, "cleared_alarms_for_resource:") != NULL) { - /* Sleep for 10 secs */ - sleep (10); - rc = resource_monitor_deregister( process_name, rmon_sock.rmon_socket ); - if ( rc == PASS ) { - printf("deregistered test client\n"); - break; - } - } - } - else - { - printf("Null string !\n"); - } - - } - else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) - { - printf("problem with test client recv \n"); - } - } while ( bytes > 0 ) ; - - return rc; -} - -#define MAX_LEN 300 -int main ( int argc, char *argv[] ) -{ - int rc = 0 ; - int port = RX_PORT; - const char * process_name = PROCESS_NAME; - - if ((argc > 1) && (strcmp(argv[1],"--help") == 0)) { - printf("usage: ./rmond_api_test \n"); - return 0; - } - else if (argc > 1) { - port = atoi(argv[1]); - } - if (argc > 2) { - process_name = argv[2]; - } - - - daemon_init(port, process_name); - rc = rmon_message_init(); - if (rc == PASS) { - - printf("socket initialized \n"); - } - - rmon_sock.rmon_socket = resource_monitor_get_sel_obj (); - std::list socks; - socks.clear(); - socks.push_front ( rmon_sock.rmon_socket ); - socks.sort(); - - /* Run test loop forever or until stop condition */ - for ( ; ; ) - { - - /* Initialize the timeval struct */ - rmon_sock.waitd.tv_sec = 20; - rmon_sock.waitd.tv_usec = 0; - - /* Initialize the master fd_set */ - FD_ZERO(&rmon_sock.readfds); - FD_SET(rmon_sock.rmon_socket, &rmon_sock.readfds); - - rc = select( socks.back()+1, - &rmon_sock.readfds, NULL, NULL, - &rmon_sock.waitd); - - /* If the select time out expired then */ - if (( rc < 0 ) || ( rc == 0 )) - { - /* Check to see if the select call failed. */ - /* ... but filter Interrupt signal */ - if (( rc < 0 ) && ( errno != EINTR )) - { - printf ("Socket Select Failed (rc:%d) %s \n", errno, strerror(errno)); - } - } - - - if ( FD_ISSET(rmon_sock.rmon_socket, &rmon_sock.readfds)) - { - printf("Resource Monitor API Select Fired got message from rmon:\n"); - rc = client_service_inbox(process_name); - - if (rc == PASS) { - break; - } - } - } - return 0; -} - diff --git a/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.h b/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.h deleted file mode 100644 index 4865a76c..00000000 --- a/mtce/src/rmon/rmonApi/tests/rmon_api_client_test.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2014 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGTS Platform Resource Monitor API Test Client Header - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/** Maximum service fail count before action */ -#define MAX_FAIL_COUNT (1) - -#define RMON_HEADER_SIZE (15) -#define RMON_MAX_MSG (50) -#define FAIL_SOCKET_NOBLOCK (36) -#define FAIL_SOCKET_OPTION (38) - -/* default process name if none is specified */ -#define PROCESS_NAME ((const char *)"testClient") - - - -typedef struct -{ - /** Message buffer */ - char m [RMON_MAX_MSG]; - - /** Sequence number */ - unsigned int s ; - - /* Fast Lookup Clue Info */ - unsigned int c ; - - /* Status Flags */ - /* bit 0: Process Monitor Status: 1=running - * bit 1: tbd */ - unsigned int f ; - - /* reserved for future use */ - unsigned int r ; - -} rmon_message_type ; - -/** rmon notification api messaging socket control structure */ -typedef struct -{ - - /** Client (compute) socket address attributes - * - * Used by both controller and compute nodes - * when messaging to and from the compute - * node maintenance. - */ - struct sockaddr_in client_addr ; - socklen_t client_addr_len ; - - - /** Unix domain socket used to transmit on-node event messages - * to from other local services such as rmon - heartbeat service */ - int send_event_socket ; - struct sockaddr_un agent_domain ; - socklen_t agent_domain_len ; - - /** rmon api Socket using UDP Inet over 'lo' interface */ - int rmon_api_sock ; /**< receive rmon pulses socket */ - int rmon_api_port ; /**< the port */ - struct sockaddr_in rmon_api_addr ; /**< attributes */ - socklen_t rmon_api_len ; /**< length */ - - int rmon_socket ; /**< Active monitor socket */ - /** The addr and port are stored in the shared librmonapi.so library */ - - struct sockaddr_in client_sockAddr ; /**< Client socket attributes */ - socklen_t agentLen ; /**< Agent socket attr struct len */ - socklen_t clientLen ; /**< Client socket attr struct len */ - int tx_socket ; /**< general transmit socket ID */ - int rx_socket ; /**< general receive socket ID */ - rmon_message_type tx_message ; /**< transmit message */ - rmon_message_type rx_message ; /**< receive message */ - int rmon_client_port ; - int fail_count ; /**< Socket retry thresholding */ - - /* For select dispatch */ - struct timeval waitd ; - fd_set readfds; - msgSock_type mtclogd ; - -} rmon_socket_type ; - - diff --git a/mtce/src/rmon/rmonApi/tests/rmond_api_test b/mtce/src/rmon/rmonApi/tests/rmond_api_test deleted file mode 100755 index 2df1080a..00000000 Binary files a/mtce/src/rmon/rmonApi/tests/rmond_api_test and /dev/null differ diff --git a/mtce/src/rmon/rmonFM.cpp b/mtce/src/rmon/rmonFM.cpp deleted file mode 100644 index a40b63b6..00000000 --- a/mtce/src/rmon/rmonFM.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2015-2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGCS Platform Resource Monitor Fault Management - * retry mechanism for fault set/clear failures due to communication - * error. - */ - -#include -#include "rmon.h" - - -typedef enum -{ - CLR_REQUEST = 0, - SET_REQUEST = 1 - -} fm_req_type_t; - - -typedef struct fm_req_info -{ - fm_req_type_t req_type; - union - { - AlarmFilter alarm_filter; - SFmAlarmDataT alarm; - - } _data; - struct fm_req_info *next; - -} fm_req_info_t; - - -typedef struct fm_req_queue -{ - fm_req_info_t *head; - fm_req_info_t *tail; - pthread_mutex_t mutex; - -} fm_req_queue_t; - - -static fm_req_queue_t fm_req_q; -static void rmon_fm_enq(fm_req_type_t req, void *data); -static void rmon_fm_deq(void); - - -/****************************/ -/* Initialization Utilities */ -/****************************/ -void rmon_fm_init (void) -{ - fm_req_q.head = NULL; - fm_req_q.tail = NULL; - pthread_mutex_init(&fm_req_q.mutex, NULL); -} - -void rmon_fm_fini (void) -{ - pthread_mutex_lock(&fm_req_q.mutex); - fm_req_info_t *i = fm_req_q.head; - while (i != NULL) { - fm_req_info_t *n = i->next; - delete i; - i = n; - } - fm_req_q.head = fm_req_q.tail = NULL; - pthread_mutex_unlock(&fm_req_q.mutex); - pthread_mutex_destroy(&fm_req_q.mutex); -} - -/***************************/ -/* handler function */ -/***************************/ -void rmon_fm_handler (void) -{ - while (fm_req_q.head != NULL) { - EFmErrorT err; - fm_req_info_t *fm_req = fm_req_q.head; - if (fm_req->req_type == CLR_REQUEST) { - ilog("clearing alarm %s", fm_req->_data.alarm_filter.entity_instance_id); - err = fm_clear_fault (&fm_req->_data.alarm_filter); - } - else { - ilog("setting alarm %s", fm_req->_data.alarm.entity_instance_id); - err = fm_set_fault (&fm_req->_data.alarm, NULL); - } - - if (err == FM_ERR_NOCONNECT) { - ilog("FM_ERR_NOCONNECT"); - return; - } - - rmon_fm_deq(); - } -} - -/*****************************/ -/* request functions */ -/*****************************/ -EFmErrorT rmon_fm_clear (AlarmFilter *alarmFilter) -{ - EFmErrorT err = FM_ERR_NOCONNECT; - if (fm_req_q.head == NULL) - err = fm_clear_fault (alarmFilter); - if (err == FM_ERR_NOCONNECT) { - ilog("retry clearing alarm %s", alarmFilter->entity_instance_id); - rmon_fm_enq (CLR_REQUEST, (void *) alarmFilter); - return FM_ERR_OK; - } - - return err; -} - -EFmErrorT rmon_fm_set (const SFmAlarmDataT *alarm, fm_uuid_t *fm_uuid) -{ - EFmErrorT err = FM_ERR_NOCONNECT; - if (fm_req_q.head == NULL) - err = fm_set_fault (alarm, fm_uuid); - if (err == FM_ERR_NOCONNECT) { - ilog("retry setting alarm %s", alarm->entity_instance_id); - rmon_fm_enq (SET_REQUEST, (void *) alarm); - return FM_ERR_OK; - } - - return err; -} - -EFmErrorT rmon_fm_get (AlarmFilter *alarmFilter, SFmAlarmDataT **alarm, unsigned int *num_alarm) -{ - unsigned int n = 0; - EFmErrorT err = FM_ERR_NOT_ENOUGH_SPACE; - - while (err == FM_ERR_NOT_ENOUGH_SPACE) { - /* get additional 3 more alarms at a time, as max. number of port alarms - is 6 (2 ports per interface: OAM, INFRA, MGMT */ - n += 3; - SFmAlarmDataT *list = (SFmAlarmDataT *) malloc(sizeof(SFmAlarmDataT) * n); - if (list != NULL) { - err = fm_get_faults (&alarmFilter->entity_instance_id, list, &n); - if (err == FM_ERR_OK) { - *alarm = list; - *num_alarm = n; - return FM_ERR_OK; - } - free(list); - } - else { - err = FM_ERR_NOMEM; - } - } - - *alarm = NULL; - *num_alarm = 0; - - return err; -} - - -/****************************/ -/* queue functions */ -/****************************/ -void rmon_fm_deq (void) -{ - pthread_mutex_lock (&fm_req_q.mutex); - fm_req_info_t *fm_req = fm_req_q.head; - if (fm_req->next == NULL) { - fm_req_q.head = fm_req_q.tail = NULL; - } - else { - fm_req_q.head = fm_req->next; - } - pthread_mutex_unlock (&fm_req_q.mutex); - delete fm_req; -} - -void rmon_fm_enq (fm_req_type_t req, void *data) -{ - fm_req_info_t *fm_req = new fm_req_info_t; - fm_req->next = NULL; - fm_req->req_type = req; - if (req == CLR_REQUEST) - fm_req->_data.alarm_filter = *((AlarmFilter *)data); - else - fm_req->_data.alarm = *((SFmAlarmDataT *)data); - pthread_mutex_lock (&fm_req_q.mutex); - if (fm_req_q.tail == NULL) { - fm_req_q.head = fm_req_q.tail = fm_req; - } - else { - fm_req_q.tail->next = fm_req; - fm_req_q.tail = fm_req; - } - pthread_mutex_unlock (&fm_req_q.mutex); -} - diff --git a/mtce/src/rmon/rmonHdlr.cpp b/mtce/src/rmon/rmonHdlr.cpp deleted file mode 100644 index 42669567..00000000 --- a/mtce/src/rmon/rmonHdlr.cpp +++ /dev/null @@ -1,4918 +0,0 @@ - /* - * Copyright (c) 2013-2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River CGCS Platform Resource Monitor Handler - */ -#include "rmon.h" /* rmon header file */ -#include "rmonHttp.h" /* for rmon HTTP libEvent utilties */ -#include "rmonApi.h" /* vswitch calls */ -#include -#include -#include -#include -#include -#include -#include /* for storing dynamic resource names */ -#include -#include -#include -#include -#include -#include -#include -#include /* for ... RTMGRP_LINK */ -#include "nlEvent.h" /* for ... open_netlink_socket */ -#include "nodeEvent.h" /* for inotify */ -#include /* for ... json-c json string parsing */ -#include "jsonUtil.h" -#include "tokenUtil.h" /* for ... tokenUtil_new_token */ - -/* Preserve a local copy of a pointer to the control struct to - * avoid having to publish a get utility prototype into rmon.h */ -static rmon_ctrl_type * _rmon_ctrl_ptr = NULL ; -static interface_resource_config_type interface_resource_config[MAX_RESOURCES] ; -static resource_config_type resource_config[MAX_RESOURCES] ; -static thinmeta_resource_config_type thinmeta_resource_config[MAX_RESOURCES] ; -static registered_clients registered_clt[MAX_CLIENTS]; - -static libEvent_type ceilometerEvent; // for ceilometer REST API request -static libEvent tokenEvent; // for token request - -/* Used to set alarms through the FM API */ -static SFmAlarmDataT alarmData; -static struct mtc_timer rmonTimer_event ; -static struct mtc_timer rmonTimer_pm ; -static struct mtc_timer rmonTimer_ntp ; - -static struct mtc_timer rtimer[MAX_RESOURCES] ; -static struct mtc_timer thinmetatimer[MAX_RESOURCES] ; - -static ntpStage_enum ntp_stage ; /* The stage the ntp is in within the resource handler fsm */ -static int ntp_status ; /* status returned by the ntpq command */ -static int ntp_child_pid ; - -/* for dynamic resources */ -bool modifyingResources = false; -vector criticality_resource; -vector dynamic_resource; -vector types; -vector devices; -vector fs_index; -vector fs_state; - -/** List of config files */ -std::list config_files ; -std::list::iterator string_iter_ptr ; -std::list interface_config_files ; - -/* percent or abs value for fs resources */ -int fs_percent = 0; -int swact_count = 0; - -/* for cpu usage */ -time_t t1, t2; -int num_cpus = 0; -int num_base_cpus = 0; -int included_cpu[MAX_BASE_CPU]; - -static string hostUUID = ""; - -/* Initial cpu time */ -vector cpu_time_initial; -/* Later cpu time */ -vector cpu_time_later; - -void save_fs_resource ( string resource_name, string criticality, - int enabled, int percent, int abs_values[3], - int alarm_type, string type, string device, int mounted ); -void calculate_fs_usage( resource_config_type * ptr ); -void _space_to_underscore (string & str ); - -struct thread_data -{ - pid_t tid; - pid_t pid; - unsigned long long nr_switches_count; - bool thread_running; - double resource_usage; - resource_config_type * resource; -}; - -/* info passed to pthreads */ -struct thread_data t_data; -pthread_t thread; -pthread_mutex_t lock; - -/* strict memory accounting off = 0 or on = 1 */ -int IS_STRICT = 0; - -void mem_log_ctrl ( rmon_ctrl_type * ptr ) -{ -#define MAX_LEN 500 - char str[MAX_LEN] ; - snprintf (&str[0], MAX_LEN, "%s %s %s\n", - &ptr->my_hostname[0], - ptr->my_address.c_str(), - ptr->my_macaddr.c_str() ); - mem_log(str); -} - -void mem_log_resource ( resource_config_type * ptr ) -{ -#define MAX_LEN 500 - char str[MAX_LEN] ; - snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Tries:%u Debounce:%d\n", - ptr->resource, ptr->severity, ptr->count, ptr->debounce); - mem_log(str); -} - -void mem_log_interface_resource ( interface_resource_config_type * ptr ) -{ -#define MAX_LEN 500 - char str[MAX_LEN] ; - snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Debounce:%d\n", - ptr->resource, ptr->severity, ptr->debounce); - mem_log(str); -} - -int _config_dir_load (void); -int _config_files_load (void); - - -const char rmonStages_str [RMON_STAGE__STAGES][32] = -{ - "Handler-Init", - "Handler-Start", - "Manage-Restart", - "Monitor-Wait", - "Monitor-Resource", - "Restart-Wait", - "Ignore-Resource", - "Handler-Finish", - "Failed-Resource", - "Failed-Resource-clr", -} ; - -const char ntpStages_str [NTP_STAGE__STAGES][32] = -{ - "Begin", - "Execute-NTPQ", - "Execute-NTPQ-Wait", -} ; - -registered_clients * get_registered_clients_ptr ( int index ) -{ - if ( index <= _rmon_ctrl_ptr->clients ) - return ( ®istered_clt[index] ); - return ( NULL ); -} - -rmon_ctrl_type * get_rmon_ctrl_ptr () -{ - return _rmon_ctrl_ptr; -} - -interface_resource_config_type * get_interface_ptr ( int index ) -{ - if ( index <= _rmon_ctrl_ptr->interface_resources ) - return ( &interface_resource_config[index] ); - return ( NULL ); -} - -resource_config_type * get_resource_ptr ( int index ) -{ - if ( index >= 0 && index <= _rmon_ctrl_ptr->resources ) - return ( &resource_config[index] ); - return NULL; -} - -/***************************************************************************** - * - * Name : get_resource_index - * - * Purpose : Get the resource's index based on the name - * - *****************************************************************************/ -int get_resource_index ( const char *resource_name, int *index ) -{ - for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) - { - if ( strcmp(resource_config[i].resource, resource_name) == 0) - { - *index = i; - return (PASS); - } - } - return (FAIL); -} - -/***************************************************************************** - * - * Name : rmon_hdlr_fini - * - * Purpose : Clean up the resource monitor module - * - *****************************************************************************/ -void rmon_hdlr_fini ( rmon_ctrl_type * ctrl_ptr ) -{ - for ( int i = 0 ; i < ctrl_ptr->resources ; i++ ) - { - // mem_log ('\n'); - mem_log_resource ( &resource_config[i] ); - } - pthread_mutex_destroy(&lock); - /* Turn off inotify */ - //set_inotify_close ( ctrl_ptr->fd, ctrl_ptr->wd ); -} - -/***************************************************************************** - * - * Name : resourceStageChange - * - * Purpose : Put a resource in the requested stage for use by the resource handler - * - *****************************************************************************/ -int resourceStageChange ( resource_config_type * ptr , rmonStage_enum newStage ) -{ - if (( newStage < RMON_STAGE__STAGES ) && - ( ptr->stage < RMON_STAGE__STAGES )) - { - clog ("%s %s -> %s (%d->%d)\n", - ptr->resource, - rmonStages_str[ptr->stage], - rmonStages_str[newStage], - ptr->stage, newStage); - ptr->stage = newStage ; - return (PASS); - } - else - { - slog ("%s Invalid Stage (now:%d new:%d)\n", - ptr->resource, ptr->stage, newStage ); - ptr->stage = RMON_STAGE__FINISH ; - return (FAIL); - } -} - -/***************************************************************************** - * - * Name : ntpStageChange - * - * Purpose : Stage change handler for NTP resource - * - *****************************************************************************/ -int ntpStageChange ( ntpStage_enum newStage ) -{ - if ((newStage < NTP_STAGE__STAGES ) && - ( ntp_stage < NTP_STAGE__STAGES )) - { - clog ("NTP %s -> %s (%d->%d)\n", - ntpStages_str[ntp_stage], - ntpStages_str[newStage], - ntp_stage, newStage); - ntp_stage = newStage ; - return (PASS); - } - else - { - slog ("NTP Invalid Stage (now:%d new:%d)\n", ntp_stage, newStage ); - ntp_stage = NTP_STAGE__BEGIN ; - return (FAIL); - } -} - -/***************************************************************************** - * - * Name : _config_files_load - * - * Purpose : Load the content of each config file into resource_config[x] - * - *****************************************************************************/ -int _config_files_load (void) -{ - int i = 0 ; - /* Run Maintenance on Inventory */ - for ( string_iter_ptr = config_files.begin () ; - string_iter_ptr != config_files.end () ; - string_iter_ptr++ ) - { - if ( i >= MAX_RESOURCES ) - { - wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); - break ; - } - /* Read the resource config file */ - resource_config[i].mask = 0 ; - if (ini_parse( string_iter_ptr->data(), rmon_resource_config, - &resource_config[i]) < 0) - { - ilog("Read Failure : %s\n", string_iter_ptr->data() ); - } - - else - { - dlog ("Config File : %s\n", string_iter_ptr->c_str()); - - /* Init the timer for this resource */ - mtcTimer_reset ( rtimer[i] ) ; - rtimer[i].service = resource_config[i].resource ; - - resource_config[i].i = i ; - /* allow to clear an existing alarm if the first reading is good - after reboot - */ - resource_config[i].failed = false ; - resource_config[i].count = 0 ; - resource_config[i].resource_value = 0 ; - resource_config[i].resource_prev = 0 ; - resource_config[i].stage = RMON_STAGE__INIT ; - resource_config[i].sev = SEVERITY_CLEARED ; - resource_config[i].alarm_type = STANDARD_ALARM; - resource_config[i].failed_send = 0; - resource_config[i].alarm_raised = false; - - /* add the alarm ids for the FM API per resource monitored */ - if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) { - /* platform cpu utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, CPU_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ; - } - else if (strcmp(resource_config[i].resource, V_CPU_RESOURCE_NAME) == 0) { - /* vswitch cpu utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CPU_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ; - } - else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) { - /* platform memory utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MEMORY_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ; - } - else if (strcmp(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) == 0) { - /* vswitch memory utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_MEMORY_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ; - } - else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) { - /* platform disk utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); - resource_config[i].mounted = MOUNTED; - resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; - } - else if (strcmp(resource_config[i].resource, INSTANCE_RESOURCE_NAME) == 0) { - /* platform disk utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INSTANCE_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; - } - else if (strcmp(resource_config[i].resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { - /* platform virtual thin pool utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CINDER_THINPOOL_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; - } - else if (strcmp(resource_config[i].resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { - /* platform virtual thin pool utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_NOVA_THINPOOL_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; - } - else if (strcmp(resource_config[i].resource, V_PORT_RESOURCE_NAME) == 0) { - /* vswitch port utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, - V_PORT_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__PORT ; - } - else if (!strcmp(resource_config[i].resource, V_INTERFACE_RESOURCE_NAME) || - !strcmp(resource_config[i].resource, V_LACP_INTERFACE_RESOURCE_NAME)) { - /* vswitch interface(lacp or otherwise) utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, - V_INTERFACE_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__INTERFACE ; - } - else if (!strcmp(resource_config[i].resource, V_OVSDB_RESOURCE_NAME)) { - /* vswitch OVSDB manager utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, - V_OVSDB_MANAGER_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__DATABASE_USAGE ; - } - else if (!strcmp(resource_config[i].resource, V_OPENFLOW_RESOURCE_NAME)) { - /* vswitch Openflow utilization */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, - V_OPENFLOW_CONTROLLER_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__NETWORK_USAGE ; - } - else if (strcmp(resource_config[i].resource, REMOTE_LOGGING_RESOURCE_NAME) == 0) { - /* remote logging connectivity */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, - REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); - resource_config[i].res_type = RESOURCE_TYPE__CONNECTIVITY ; - } - else - { - resource_config[i].res_type = RESOURCE_TYPE__UNKNOWN ; - } - - ilog ("Monitoring %2d: %s (%s)\n", - i, - resource_config[i].resource, - resource_config[i].severity); - mem_log_resource ( &resource_config[i] ); - i++; - - } - - } - - _rmon_ctrl_ptr->resources = i ; - ilog ("Monitoring %d Resources\n", _rmon_ctrl_ptr->resources ); - return (PASS); -} - -/***************************************************************************** - * - * Name : _inter_config_load - * - * Purpose : Load the content of each config file into interface_resource_config[x] - * - *****************************************************************************/ -int _inter_config_load (void) -{ - int i = 0 ; - - for ( string_iter_ptr = interface_config_files.begin () ; - string_iter_ptr != interface_config_files.end () ; - string_iter_ptr++ ) - { - if ( i >= MAX_RESOURCES ) - { - wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); - break ; - } - - /* Read the interface resource config file */ - resource_config[i].mask = 0 ; - if (ini_parse( string_iter_ptr->data(), rmon_interface_config, - &interface_resource_config[i]) < 0) - { - ilog("Read Failure : %s\n", string_iter_ptr->data() ); - } - - else - { - dlog ("Config File : %s\n", string_iter_ptr->c_str()); - ilog ("Monitoring %2d: %s (%s)\n", i, interface_resource_config[i].resource , - interface_resource_config[i].severity ); - - interface_resource_config[i].i = i ; - interface_resource_config[i].failed = false ; - interface_resource_config[i].stage = RMON_STAGE__INIT ; - interface_resource_config[i].sev = SEVERITY_CLEARED ; - interface_resource_config[i].failed_send = 0; - interface_resource_config[i].alarm_raised = false; - - /* add the alarm ids for the FM API per resource monitored */ - if (strcmp(interface_resource_config[i].resource, OAM_INTERFACE_NAME) == 0) { - /* add the alarm id for the FM API per resource monitored */ - snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, OAM_ALARM_ID); - snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, OAM_PORT_ALARM_ID); - } - else if (strcmp(interface_resource_config[i].resource, MGMT_INTERFACE_NAME) == 0) { - /* add the alarm id for the FM API per resource monitored */ - snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MGMT_ALARM_ID); - snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, MGMT_PORT_ALARM_ID); - } - else if (strcmp(interface_resource_config[i].resource, INFRA_INTERFACE_NAME) == 0) { - /* add the alarm id for the FM API per resource monitored */ - snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INFRA_ALARM_ID); - snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, INFRA_PORT_ALARM_ID); - } - - mem_log_interface_resource ( &interface_resource_config[i] ); - i++; - - } - } - - _rmon_ctrl_ptr->interface_resources = i ; - ilog ("Monitoring %d Interface Resources\n", _rmon_ctrl_ptr->interface_resources ); - return (PASS); -} - -/***************************************************************************** - * - * Name : _thinmeta_config_load - * - * Purpose : Load the content of each config file into thinmeta_config[x] - * - *****************************************************************************/ -int _thinmeta_config_load (void) -{ - int i = 0 ; - - /* Set hard-coded defaults for all structures */ - for ( int j = 0; j < MAX_RESOURCES; j++) - { - thinmeta_resource_config_type * res; - res = &thinmeta_resource_config[i]; - res->critical_threshold = THINMETA_DEFAULT_CRITICAL_THRESHOLD; - res->alarm_on = THINMETA_DEFAULT_ALARM_ON; - res->autoextend_on = THINMETA_DEFAULT_AUTOEXTEND_ON; - res->autoextend_by = THINMETA_DEFAULT_AUTOEXTEND_BY; - res->autoextend_percent = THINMETA_DEFAULT_AUTOEXTEND_PERCENT; - res->audit_period = THINMETA_DEFAULT_AUDIT_PERIOD; - } - - /* Load resources */ - for ( string_iter_ptr = config_files.begin () ; - string_iter_ptr != config_files.end () ; - string_iter_ptr++ ) - { - if ( i >= MAX_RESOURCES ) - { - wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); - break ; - } - /* Read the resource config file */ - if (ini_parse( string_iter_ptr->data(), rmon_thinmeta_config, - &thinmeta_resource_config[i]) < 0) - { - ilog("Read Failure : %s\n", string_iter_ptr->data() ); - } - else - { - thinmeta_resource_config_type * res; - res = &thinmeta_resource_config[i]; - if (!res->section_exists) - { - dlog3 ("Config File : %s does not have a [%s] section\n", - string_iter_ptr->c_str(), THINMETA_CONFIG_SECTION); - continue; - } - dlog ("Config File : %s\n", string_iter_ptr->c_str()); - - /* validate loaded configuration */ - if (!res->vg_name || !res->thinpool_name) - { - elog("Invalid VG and/or thinpool names for thinpool metadata " - "in config file: %s, disabling monitoring", string_iter_ptr->c_str()); - res->critical_threshold = RESOURCE_DISABLE; - res->vg_name = THINMETA_INVALID_NAME; - res->thinpool_name = THINMETA_INVALID_NAME; - } - else if (res->critical_threshold > 99) - { - elog("Metadata monitoring error in config file: %s. Option critical_threshold > 99%%, " - "value in config file: %i, disabling monitoring", - string_iter_ptr->c_str(), res->critical_threshold) - res->critical_threshold = 0; - } - else if (res->alarm_on > 1) - { - elog("Metadata monitoring error in config file: %s. Option alarm_on is NOT boolean, " - "value in config file: %i, disabling monitoring", string_iter_ptr->c_str(), res->alarm_on); - res->critical_threshold = RESOURCE_DISABLE; - } - else if (res->autoextend_on > 1) - { - elog("Metadata monitoring error in config file: %s. Option autoextend_on is NOT boolean, " - "value in config file: %i, disabling monitoring", - string_iter_ptr->c_str(), res->autoextend_on) - res->critical_threshold = RESOURCE_DISABLE; - } - else if (res->autoextend_percent > 1) - { - elog("Metadata monitoring error in config file: %s. Option autoextend_percent is NOT boolean, " - "value in config file: %i, disabling monitoring", - string_iter_ptr->c_str(), res->autoextend_percent) - res->critical_threshold = RESOURCE_DISABLE; - } - else if ((res->autoextend_percent && res->autoextend_by > 100) || - (res->autoextend_on && res->autoextend_by < 1)) - { - elog("Metadata monitoring error in config file: %s. Option autoextend_by not in [1,100] interval, " - "value in config file: %i, disabling monitoring", - string_iter_ptr->c_str(), res->autoextend_by) - res->critical_threshold = RESOURCE_DISABLE; - } - else if ((res->audit_period < 1) || (res->audit_period > 10000)) - { - elog("Metadata monitoring error in config file: %s. Option audit_period not in [1,10000] interval, " - "value in config file: %i, disabling monitoring", - string_iter_ptr->c_str(), res->audit_period) - res->critical_threshold = RESOURCE_DISABLE; - } - - ilog ("%s/%s pool metadata monitored; resource index: %2d\n", res->vg_name , - res->thinpool_name, i ); - i++; - } - - } - - _rmon_ctrl_ptr->thinmeta_resources = i ; - ilog ("Monitoring %d Thinpool Metadata Resources\n", _rmon_ctrl_ptr->thinmeta_resources ); - return (PASS); -} - -/***************************************************************************** - * - * Name : rmon_hdlr_init - * - * Purpose : Init the handler but also support re-init that might occur over a SIGHUP - * - *****************************************************************************/ - -#define RMON_TIMER_TYPE__EVENT "event" -#define RMON_TIMER_TYPE__PM "pm" -#define RMON_TIMER_TYPE__NTP "ntp" -#define RMON_TIMER_TYPE__RES "resource" -#define RMON_TIMER_TYPE__THIN "thinpool" - -int rmon_hdlr_init ( rmon_ctrl_type * ctrl_ptr ) -{ - /* Save the control pointer */ - _rmon_ctrl_ptr = ctrl_ptr ; - - mtcTimer_init ( rmonTimer_event, LOCALHOST, RMON_TIMER_TYPE__EVENT) ; - mtcTimer_init ( rmonTimer_pm, LOCALHOST, RMON_TIMER_TYPE__PM ) ; - - if (is_controller()) - mtcTimer_init ( rmonTimer_ntp,LOCALHOST, RMON_TIMER_TYPE__NTP ) ; - - for ( int i = 0 ; i < MAX_RESOURCES ; i++ ) - mtcTimer_init ( rtimer[i], LOCALHOST, RMON_TIMER_TYPE__RES ); - ctrl_ptr->resources = 0 ; - - for ( int i = 0 ; i < MAX_RESOURCES ; i++ ) - mtcTimer_init ( thinmetatimer[i], LOCALHOST, RMON_TIMER_TYPE__THIN ); - ctrl_ptr->thinmeta_resources = 0 ; - - /* Initialize the Resource Monitor Array */ - memset ( (char*)&resource_config[0], 0, sizeof(resource_config_type)*MAX_RESOURCES); - memset ( (char*)&interface_resource_config[0], 0, sizeof(interface_resource_config_type)*MAX_RESOURCES); - memset ( (char*)&thinmeta_resource_config[0], 0, sizeof(thinmeta_resource_config_type)*MAX_RESOURCES); - memset ( (char*)®istered_clt[0], 0, sizeof(registered_clients)*MAX_CLIENTS); - - /* Read in the list of config files and their contents */ - load_filenames_in_dir ( CONFIG_DIR, config_files ) ; - /* Read in the list of interface config files and their contents */ - load_filenames_in_dir ( INT_CONFIG_DIR, interface_config_files ) ; - - _thinmeta_config_load(); - _config_files_load (); - // _inter_config_load (); - - /* init Thin Metadata Monitoring after config reload - including timers */ - thinmeta_init(thinmeta_resource_config, thinmetatimer, ctrl_ptr->thinmeta_resources); - - /* Log the control setting going into the main loop */ - mem_log_ctrl ( _rmon_ctrl_ptr ); - - /* Initialize instance mount monitoring */ - if (pthread_mutex_init(&lock, NULL) != 0) - { - elog("mutex init failed \n"); - } - - t_data.thread_running = false; - t_data.resource_usage = MOUNTED; - t_data.nr_switches_count = 0; - t_data.pid = getpid(); - - return (PASS) ; -} - -/***************************************************************************** - * - * Name : _set_severity - * - * Purpose : Restores the resource value and the severity of the alarm - * - *****************************************************************************/ -void _set_resource_usage ( string reason_text, resource_config_type * ptr ) -{ - unsigned int found; - string res_val; - size_t last_index; - string temp_val; - char resource_usage[10]; - - /* extract the resource value from the reason text */ - found = reason_text.find_last_of( ' ' ); - temp_val = reason_text.substr(found+1); - last_index = temp_val.find_first_not_of("0123456789"); - res_val = temp_val.substr(0, last_index); - snprintf (resource_usage, sizeof(resource_usage), res_val.c_str()); - sscanf(resource_usage, "%lf", &ptr->resource_value); -} - -/***************************************************************************** - * - * Name : build_entity_instance_id - * - * Purpose : build the alarm's entity_instance_id based on the - * resource type and alarm type. - * - *****************************************************************************/ -void build_entity_instance_id ( resource_config_type *ptr, char *entity_instance_id ) -{ - dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); - - // Make certain the id is cleared - entity_instance_id[0] = 0; - - if ( ptr->alarm_type == DYNAMIC_ALARM ) - { - if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 )) - { - /* This case covers volume groups */ - /* Use host=.volumegroup=type for id*/ - snprintf((char*)entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); - } - else - { - /* Use host=.filesystem=type for id*/ - snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); - } - } - else if ( ptr->alarm_type == STATIC_ALARM ) - { - /* Use host=.filesystem=type for id*/ - snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); - } - else if ((ptr->alarm_type == STANDARD_ALARM) && (strstr(ptr->resource, V_MEMORY_RESOURCE_NAME) != NULL)) - { - /* AVS memory */ - snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.processor=%d", _rmon_ctrl_ptr->my_hostname, ptr->socket_id); - } - else if (strstr(ptr->resource, V_CINDER_THINPOOL_RESOURCE_NAME) != NULL) - { - /* Cinder thin pool alarm should not be raised against a specific host */ - /* as the volumes are synced between controllers through drbd. */ - /* Instead we use a common entity instance id for both controllers. */ - snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "host=controller"); - } - else - { - /* Use hostname for alarm */ - snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, _rmon_ctrl_ptr->my_hostname); - } - - dlog ("resource %s entity instance id: %s\n", ptr->resource, entity_instance_id); - - return; -} - - -/***************************************************************************** - * - * Name : thinpool_virtual_space_usage_init - * - * Purpose : Determine if we should monitor virtual usage or not: no purpose - * in doing so if thin provisioning is not used. - * - * Params : index - the index of the virtual space resource - * - * Return : None. - * - *****************************************************************************/ -void thinpool_virtual_space_usage_init(int index, - const char *poolName, - const char *poolOwner) { - - if (!poolName or !poolOwner) { - slog ("No poolName or poolOwner provided"); - return; - } - ilog("index = %d, poolName = %s, poolOwner = %s", index, poolName, poolOwner); - - /* Buffer (and its size) for keeping the initial result after executing - the above command. */ - char current_pool_type[BUFFER_SIZE]; - const unsigned int buffer_size = BUFFER_SIZE; - /* The command for seeing if the pool type is thin. */ - char lvm_thin_cmd[BUFFER_SIZE]; - const char *thin_pool_expected_result = NULL; - - MEMSET_ZERO(current_pool_type); - MEMSET_ZERO(lvm_thin_cmd); - - if (strcmp(poolName, "nova-local-pool") == 0) { - const char *nova_thin_pool_expected_result = "thin-pool"; - thin_pool_expected_result = nova_thin_pool_expected_result; - sprintf(lvm_thin_cmd, "lvs --segments | grep \"%s\" | awk '{print $5}'", poolName); - } - else if (strcmp(poolName, "cinder-volumes-pool") == 0) { - const char *cinder_thin_pool_expected_result = "thin"; - thin_pool_expected_result = cinder_thin_pool_expected_result; - sprintf(lvm_thin_cmd, "cat /etc/cinder/cinder.conf | awk -F = '/^lvm_type.*=.*/ { print $2; }' | tail -n 1 | tr -d ' '"); - } - else { - slog("Invalid pool name given."); - return; - } - - /* Result code. */ - int rc; - - /* Execute the command. */ - rc = execute_pipe_cmd(lvm_thin_cmd, current_pool_type, buffer_size); - - /* If the command has been executed successfuly, continue. */ - if (rc == PASS) { - if (current_pool_type != NULL) { - /* If the pool type is not thin, disable the alarm for virtual - usage. */ - ilog("%s current pool type is set to = %s", poolOwner, current_pool_type); - if(strcmp(current_pool_type, thin_pool_expected_result) != 0) { - resource_config[index].alarm_status = ALARM_OFF; - ilog("%s LVM Thinpool Usage alarm off: thin provisioning not used", poolOwner); - } else { - resource_config[index].alarm_status = ALARM_ON; - ilog("%s LVM Thinpool Usage alarm on: thin provisioning used", poolOwner); - } - } - } else { - resource_config[index].alarm_status = ALARM_OFF; - elog("%s LVM Thinpool monitoring state unknown ; alarm disabled (rc:%i)", - poolOwner, rc); - } -} - -/***************************************************************************** - * - * Name : virtual_space_usage_init - * - * Purpose : Determine if we should monitor virtual usage or not: no purpose - * in doing so if thin provisioning is not used. - * - * Return : None. - * - *****************************************************************************/ - -void virtual_space_usage_init(const char* resource_name) { - - ilog ("Initialize thin pools for resource %s\n", resource_name); - int index; - if ( get_resource_index( resource_name, &index ) == PASS ) { - - if (strcmp(resource_name, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { - thinpool_virtual_space_usage_init(index,"cinder-volumes-pool","Cinder"); - - } else if (strcmp(resource_name, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { - thinpool_virtual_space_usage_init(index, "nova-local-pool","Nova"); - } - } - else { - wlog ("failed get_resource_index for resource %s\n", resource_name); - } -} - -/***************************************************************************** - * - * Name : rmon_alarming_init - * - * Purpose : Clears any previously raised rmon alarms if rmon is restarted - * - *****************************************************************************/ -void rmon_alarming_init ( resource_config_type * ptr ) -{ - dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); - - AlarmFilter alarmFilter; - - SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT)); - if (active_alarm == NULL) - { - elog("Failed to allocate memory for SFmAlarmDataT\n"); - return; - } - - build_entity_instance_id (ptr, alarmData.entity_instance_id); - - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); - - if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) - { - if (active_alarm != NULL) { - - string reasonText(active_alarm->reason_text); - /* Set the resource severity */ - ptr->failed = true; - ptr->alarm_raised = true; - ptr->count = ptr->num_tries; - if ( active_alarm->severity == FM_ALARM_SEVERITY_MINOR ) - { - ptr->sev = SEVERITY_MINOR; - } - else if ( active_alarm->severity == FM_ALARM_SEVERITY_MAJOR ) - { - ptr->sev = SEVERITY_MAJOR; - if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE ) - { - string err_res_name(ptr->resource); - _space_to_underscore(err_res_name); - - /* clear host degrade for fs usage alarms */ - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s %s:", - err_res_name.c_str(), - DEGRADE_CLEAR_MSG ); - - rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ); - } - } - else - { - ptr->sev = SEVERITY_CRITICAL; - } - resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); - - if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0) - { - /* Set the resource severity */ - _set_resource_usage( reasonText, ptr ); - ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s usage: %0.2f\n", - ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id, ptr->resource_value); - } - else - { - ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s\n", - ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id); - } - } - } - free(active_alarm); -} - -/***************************************************************************** - * - * Name : send_clear_msg - * - * Purpose : Send a message to all registered clients to set the node to - * available (clear the degrade) - * - *****************************************************************************/ -void send_clear_msg ( int index ) -{ - int count = 0; - AlarmFilter alarmFilter; - - SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT)); - if (active_alarm == NULL) - { - elog("Failed to allocate memory for SFmAlarmDataT\n"); - return; - } - - string err_res_name(resource_config[index].resource); - _space_to_underscore(err_res_name); - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, resource_config[index].alarm_id); - - build_entity_instance_id (&resource_config[index], alarmData.entity_instance_id); - - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); - - /* Notify rmon clients of fault being cleared */ - snprintf(resource_config[index].errorMsg, sizeof(resource_config[index].errorMsg), - "%s cleared_alarms_for_resource:", err_res_name.c_str()); - - /* check if there is an alarm first for this resource. If there is not then the node */ - /* should not be in a degrade state */ - EFmErrorT ret = fm_get_fault( &alarmFilter, active_alarm); - if ( (ret == FM_ERR_OK) && (active_alarm != NULL) ) - { - while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 )) - { - wlog ("%s request send failed \n", resource_config[index].resource); - count++; - } - if (count > 2) - { - wlog ("%s request send failed, count:%d \n", resource_config[index].resource, count); - resource_config[index].failed_send++; - } - if ((resource_config[index].failed_send == MAX_FAIL_SEND) || (count < 3)) - { - /* Reset the values to defaults */ - swact_count = 0; - ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); - resource_config[index].failed = false ; - resource_config[index].alarm_raised = false ; - resource_config[index].count = 0 ; - resource_config[index].sev = SEVERITY_CLEARED ; - resource_config[index].stage = RMON_STAGE__START ; - resource_config[index].failed_send = 0; - } - } - else //alarm not found or error - { - if (ret == FM_ERR_ENTITY_NOT_FOUND) - { - dlog ("Alarm not found for resource: %s entity_instance_id: %s \n", alarmFilter.alarm_id, alarmFilter.entity_instance_id); - } - else - { - wlog ("fm_get_fault failed for resource: %s entity_instance_id: %s err: %d\n", alarmFilter.alarm_id, - alarmFilter.entity_instance_id, ret); - } - - if (active_alarm == NULL) - { - elog("fm_get_fault returned null active_alarm\n"); - } - - swact_count++; - if (swact_count == MAX_SWACT_COUNT) - { - /* Reset the values to defaults */ - while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 )) - { - wlog ("%s request send failed \n", resource_config[index].resource); - count++; - } - swact_count = 0; - ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); - resource_config[index].failed = false ; - resource_config[index].alarm_raised = false ; - resource_config[index].count = 0 ; - resource_config[index].sev = SEVERITY_CLEARED ; - resource_config[index].stage = RMON_STAGE__START ; - resource_config[index].failed_send = 0; - } - } - free(active_alarm); -} - -/***************************************************************************** - * - * Name : read_fs_file - * - * Purpose : read the memory mapped dynamic file system file - *****************************************************************************/ -void read_fs_file ( vector & dynamic_resources ) -{ - FILE * pFile; - char buf[MAX_LEN]; - int fd; - string delimiter = ","; - size_t pos; - string token; - struct stat fileInfo; - struct flock fl; - - memset ((char *)&fileInfo, 0 , sizeof(fileInfo)); - - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = 0; - fl.l_pid = getpid(); - - pFile = fopen (DYNAMIC_FS_FILE , "r"); - if (pFile != NULL) { - - fd = fileno(pFile); - /* lock the file */ - fl.l_type = F_RDLCK; - - /* lock the file for read and write */ - fcntl(fd, F_SETLKW, &fl); - - if (fd == -1) - { - elog("Error opening file for reading"); - } - - if (fstat(fd, &fileInfo) == -1) - { - elog("Error getting the file size"); - } - - char *map = static_cast( mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0)); - if (map == MAP_FAILED) - { - elog("Error mmapping the file"); - } - string str(map); - - snprintf( buf, MAX_LEN, str.c_str()); - /* free the mmapped memory */ - if (munmap(map, fileInfo.st_size) == -1) - { - elog("Error un-mmapping the file"); - } - fclose(pFile); - /* unlock the file */ - fl.l_type = F_UNLCK; - fcntl(fd, F_SETLK, &fl); - - while ((pos = str.find(delimiter)) != string::npos) { - /* separate the resources from the file */ - token = str.substr(0, pos); - dynamic_resources.push_back(token); - dlog("reading resource %s \n", token.c_str()); - str.erase(0, pos + delimiter.length()); - } - } -} - -/***************************************************************************** - * - * Name : add_dynamic_fs_resource - * - * Purpose : Add the dynamic file system resources - *****************************************************************************/ -void add_dynamic_fs_resource ( bool send_response ) -{ -#ifdef WANT_FS_MONITORING - char resource[50]; - char temp_resource[50]; - char device [50]; - char mount_point[50]; - char temp_state[20]; - char type [50]; - char buf[200]; - string criticality = "critical"; - vector resource_list; - int absolute_thresholds[3]; - - memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); - fs_index.clear(); - fs_state.clear(); - - /* get a list of all the dynamic fs mounts */ - read_fs_file(resource_list); - - for(std::vector::iterator it = resource_list.begin(); it != resource_list.end(); ++it) - { - string str = *it; - snprintf(buf, sizeof(buf), str.c_str()); - - // For resources without mounts the mount_point will be NULL - memset(&mount_point[0], 0, sizeof(mount_point)); - sscanf(buf, "%49s %19s %49s %49s %49s", temp_resource, temp_state, type, device, mount_point); - string state(temp_state); - - bool found = false; - - if (mount_point[0] != '\0') - { - // for resources with mounts, the resource name is the mount value - snprintf(resource, FM_MAX_BUFFER_LENGTH, mount_point); - } - else - { - // for resources without mounts, the resource name is the device value - snprintf(resource, FM_MAX_BUFFER_LENGTH, device); - } - - /* the dynamic file system is enabled, add it if need be */ - for (int i=0; i<_rmon_ctrl_ptr->resources; i++) - { - if ( strcmp(resource, resource_config[i].resource) == 0) - { - dlog ("resource %s already exists, update the state to %s \n", resource, state.c_str()); - /* resource already exists no need to add it again */ - /* update the state, it may have changed */ - fs_index.push_back(i); - fs_state.push_back(state); - found = true; - break; - } - } - - if (!found) // new resource to monitor, lets add it - { - int enabled_resource = ALARM_OFF; - if (strcmp(temp_state,"enabled") == 0) - { - enabled_resource = ALARM_ON; - } - - if (mount_point[0] != '\0') - { - save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, MOUNTED ); - } - else - { - save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, NOT_MOUNTED ); - } - - if (enabled_resource == ALARM_ON) { - calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] ); - rmon_alarming_init( &resource_config[_rmon_ctrl_ptr->resources - 1] ); - } - } - } -#endif - if (send_response) - { -#ifdef WANT_FS_MONITORING - ilog ("sending response to dynamic FS add, to the rmon client\n"); -#else - ilog("dynamic filesystem monitoring moved to collectd\n"); -#endif - /* let the rmon client know that we are done with the file */ - rmon_resource_response(_rmon_ctrl_ptr->clients); - } -} - -/***************************************************************************** - * - * Name : clear_alarm_for_resource - * - * Purpose : Clear the alarm of the resource passed in - * - *****************************************************************************/ -void clear_alarm_for_resource ( resource_config_type * ptr ) -{ - dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); - AlarmFilter alarmFilter; - - build_entity_instance_id (ptr, alarmData.entity_instance_id); - - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); - - int ret = rmon_fm_clear(&alarmFilter); - if (ret == FM_ERR_OK) - { - ilog ("Cleared stale alarm %s for entity instance id: %s", alarmFilter.alarm_id, alarmFilter.entity_instance_id); - } - else if (ret == FM_ERR_ENTITY_NOT_FOUND) - { - dlog ("Stale alarm %s for entity instance id: %s was not found", alarmFilter.alarm_id, alarmFilter.entity_instance_id); - } - else - { - wlog ("Failed to clear stale alarm %s for entity instance id: %s error: %d", alarmFilter.alarm_id, alarmFilter.entity_instance_id, ret); - } -} - - -/***************************************************************************** - * - * Name : process_dynamic_fs_file - * - * Purpose : read the dynamic files directory and add the dynamic filesystem - * resources when the file is updated - *****************************************************************************/ -void process_dynamic_fs_file() -{ - int index = 0; - - pthread_mutex_lock(&lock); - modifyingResources = true; - pthread_mutex_unlock(&lock); - - add_dynamic_fs_resource(true); - - pthread_mutex_lock(&lock); - modifyingResources = false; - pthread_mutex_unlock(&lock); - - /* deal with changes of dynamic file system enabled state */ - for (unsigned int i=0; iclients > 0 ) - { - //send a clear degrade node - send_clear_msg(index); - } - - // we need to clear the resource's alarm if there was any set for this resource - clear_alarm_for_resource(&resource_config[index]); - } - else - { - /* There was no active alarm to clear */ - ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); - resource_config[index].alarm_status = ALARM_OFF; - resource_config[index].failed = false; - resource_config[index].alarm_raised = false; - resource_config[index].count = 0 ; - resource_config[index].sev = SEVERITY_CLEARED ; - resource_config[index].stage = RMON_STAGE__START ; - } - } - else if ( strcmp(fs_state.at(i).c_str(), "enabled") == 0 ) - { - // resource has been enabled - if ( resource_config[index].alarm_status == ALARM_OFF ) - { - /* Turn the resource checking back on if it was off */ - resource_config[index].alarm_status = ALARM_ON; - - //reset values - resource_config[index].failed = false; - resource_config[index].alarm_raised = false; - resource_config[index].count = 0 ; - resource_config[index].sev = SEVERITY_CLEARED ; - resource_config[index].stage = RMON_STAGE__START ; - - rmon_alarming_init( &resource_config[index] ); - - ilog("%s is now enabled \n", resource_config[index].resource); - if (strcmp(resource_config[index].resource, CINDER_VOLUMES) == 0) - { - virtual_space_usage_init(V_CINDER_THINPOOL_RESOURCE_NAME); - } - if (strcmp(resource_config[index].resource, NOVA_LOCAL) == 0) - { - virtual_space_usage_init(V_NOVA_THINPOOL_RESOURCE_NAME); - } - } - else // alarm aready on (enabled) - { - ilog("%s is already enabled \n", resource_config[index].resource); - } - } - else - { - wlog("%s invalid dynamic file system state: %s \n", resource_config[index].resource, fs_state.at(i).c_str()); - } - } -} - -/***************************************************************************** - * - * Name : process_static_fs_file - * - * Purpose : Reads in the list of static file systems for monitoring - * - *****************************************************************************/ -void process_static_fs_file() -{ - FILE * pFile; - vector mounts; - char buf[MAX_LEN]; - char resource[50]; - char type[50]; - char device[50]; - bool found = false; - int enabled_resource = ALARM_ON; - string criticality = "critical"; - int absolute_thresholds[3] = {0}; - - pFile = fopen (STATIC_FS_FILE , "r"); - if (pFile != NULL) { - ifstream fin( STATIC_FS_FILE ); - string line; - - while( getline( fin, line )) { - /* process each line */ - mounts.push_back(line); - } - fclose(pFile); - - - for(std::vector::iterator it = mounts.begin(); it != mounts.end(); ++it) - { - string str = *it; - snprintf(buf, MAX_LEN, str.c_str()); - sscanf(buf, "%49s %49s %49s %d %d %d", resource, device, type, &absolute_thresholds[0], &absolute_thresholds[1], &absolute_thresholds[2]); - - if (!found) - { - if (fs_percent == PERCENT_USED) - { - /* do not use the absolute thresholds */ - memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); - } - /* add the resource */ - save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, STATIC_ALARM, type, device, MOUNTED ); - calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] ); - } - } - } - else - { - elog("Error, no static file system file present at: %s\n", STATIC_FS_FILE); - } -} - -/***************************************************************************** - * - * Name : rmon_timer_handler - * - * Purpose : Looks up the timer ID and asserts the corresponding ringer - * - *****************************************************************************/ -void rmon_timer_handler ( int sig, siginfo_t *si, void *uc) -{ - timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; - - /* Avoid compiler errors/warnings for parms we must - * have but currently do nothing with */ - UNUSED(sig); - UNUSED(uc); - - if ( !(*tid_ptr) ) - { - // tlog ("Called with a NULL Timer ID\n"); - return ; - } - - /* is event rmon timer */ - if ( *tid_ptr == rmonTimer_event.tid ) - { - mtcTimer_stop_int_safe ( rmonTimer_event); - rmonTimer_event.ring = true ; - } - - else if ( *tid_ptr == rmonTimer_pm.tid ) - { - mtcTimer_stop_int_safe ( rmonTimer_pm); - rmonTimer_pm.ring = true ; - } - - else if ( (is_controller()) && (*tid_ptr == rmonTimer_ntp.tid) ) - { - mtcTimer_stop_int_safe ( rmonTimer_ntp); - rmonTimer_ntp.ring = true ; - } - - else - { - bool found = false ; - for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) - { - if ( *tid_ptr == rtimer[i].tid ) - { - mtcTimer_stop_int_safe ( rtimer[i] ); - rtimer[i].ring = true ; - found = true ; - break ; - } - } - if ( !found ) - { - for ( int i = 0 ; i < _rmon_ctrl_ptr->thinmeta_resources ; i++ ) - { - if ( *tid_ptr == thinmetatimer[i].tid ) - { - mtcTimer_stop_int_safe ( thinmetatimer[i] ); - thinmetatimer[i].ring = true ; - found = true ; - break ; - } - } - } - if ( !found ) - { - /* try and cleanup by stopping this unknown timer via its tid */ - mtcTimer_stop_tid_int_safe (tid_ptr); - } - } -} - -/***************************************************************************** - * - * Name : clear_ntp_alarms - * - * Purpose : Loop through each current alarms and deleted them if the server - * is now reachable or the server no longer is assigned to ntpq - * - *****************************************************************************/ -void clear_ntp_alarms(std::list &non_reachable_ntp_servers, unsigned int alarm_count, SFmAlarmDataT *active_alarms, bool clear_major_alarm) -{ - dlog ("Total NTP alarm_count:%d", alarm_count); - AlarmFilter alarmFilter; - char alarm_to_search[FM_MAX_BUFFER_LENGTH]; - - fm_alarm_id alarm_id; - snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); - - // clear the major alarms if required - if (clear_major_alarm) - { - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID ); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname); - - int ret = rmon_fm_clear(&alarmFilter); - if (ret != FM_ERR_OK) - { - if (ret != FM_ERR_ENTITY_NOT_FOUND) - { - wlog ("Failed to clear major alarm %s for entity instance id:%s error:%d", NTP_ALARM_ID, alarmFilter.entity_instance_id, ret); - } - } - else - { - ilog ("Cleared major alarm %s for entity instance id:%s", NTP_ALARM_ID, alarmFilter.entity_instance_id); - } - } - - if (active_alarms == NULL) - { - elog ("Null pointer for active_alarms"); - return; - } - - // clear minor alarms if required - bool found; - std::list::iterator iter; - std::list::iterator iter_bad_list; - - // for each NTP alarms in the system see if it match any of the invalid NTP servers - // if it does not match then the alarm must be removed since that NTP server - // is no longer being monitored or is now valid - for ( unsigned int i = 0; i < alarm_count; i++ ) - { - if ( ((active_alarms+i)->severity) == FM_ALARM_SEVERITY_MINOR ) - { - // Verify that this NTP minor alarm is still valid, This server could no longer exist or is now marked - // reachable - dlog ("Verify NTP minor alarm is still valid, entity instance id:%s", (active_alarms+i)->entity_instance_id); - - found = false; - - // check for stale minor alarm - for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ ) - { - // e.g. host=controller-0.ntp=102.111.2.2 - snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str()); - - dlog ("Non reachable NTP server to search %s", iter->c_str()); - - if (strstr((active_alarms+i)->entity_instance_id, iter->c_str()) != NULL) - { - // server is in non reachable list, do not clear it - found = true; - dlog ("Alarm is still valid %s", iter->c_str()); - break; - } - } - - if (!found) - { - // lets clear it but only if it's this controller's alarm, it could be the peer controller's alarm - if (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL) - { - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", (active_alarms+i)->entity_instance_id); - - if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK) - { - wlog ("Failed to clear minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id); - } - else - { - ilog ("Cleared minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id); - } - } - } - } - } -} - -/***************************************************************************** - * - * Name : ntp_query_results - * - * Purpose : Analyze the return code from script query_ntp_servers.sh. - * Create alarms if the servers are non reachable, Clear alarms if they are - * now reachable - * - *****************************************************************************/ -void ntp_query_results (int ntp_query_status ) -{ - dlog ("ntp_query_results ntp_query_status:%d", ntp_query_status); - - std::list non_reachable_ntp_servers; - - // if no NTP servers are provisioned on the system, we still need to clear old NTP - // alarms if there are any. But we do not need to read the tmp server file. - if (ntp_query_status != NTP_NOT_PROVISIONED) - { - // read the temp file which contains a list of reachable and non reachable servers - // this file is the output from the query_ntp_servers.sh script - - const char *server_info = "/tmp/ntpq_server_info"; - FILE *pFile; - pFile = fopen(server_info, "r"); - if (pFile != NULL) - { - const char * delim = ";\n\r"; - char * ip; - char line[500]; - - int pos = 0; - while ( memset(line, 0, sizeof(line)) && (fgets((char*) &line, sizeof(line), pFile) != NULL) ) - { - // the first line in the tmp file is the reachable servers, the second is the non reachable servers - if (pos == 1) - { - for (ip = strtok (line, delim); ip; ip = strtok (NULL, delim)) - { - non_reachable_ntp_servers.push_back(ip); - dlog("Found non reachable NTP servers:%s\n", ip); - } - break; - } - pos++; - } - fclose(pFile); - } - else - { - elog("Failed to open file: %s\n", server_info); - return; - } - } - - // retreive all the current NTP alarms - int rc; - unsigned int max_alarms=75; - fm_alarm_id alarm_id; - snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); - SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT)); - if (active_alarms == NULL) - { - elog ("Failed to allocate memory for NTP alarms"); - return; - } - - int ret = fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms); - if (!(ret == FM_ERR_OK || ret == FM_ERR_ENTITY_NOT_FOUND)) - { - elog ("fm_get_faults_by_id failed trying to retreive all the NTP alarms, error:%d", ret); - free(active_alarms); - return; - } - - // Clear alarms if required - - bool clear_major_alarm = false; - bool created_major_alarm = false; - - if ( ntp_query_status == NTP_NOT_PROVISIONED || ntp_query_status == NTP_SOME_REACHABLE || ntp_query_status == NTP_OK ) - { - // We are going to clear the major alarm since there is at least one server selected or - // no servers are provisioned - clear_major_alarm = true; - } - - // fm_get_faults_by_id returns the number of alarms found - if (max_alarms != 0) - { - // verify if alarms need to cleared and clear them - clear_ntp_alarms(non_reachable_ntp_servers, max_alarms, active_alarms, clear_major_alarm); - } - - // There are no NTP servers provisioned so there is no alarms to raise - if (ntp_query_status == NTP_NOT_PROVISIONED) - { - return; - } - - // Raise alarms if required - - // Set up alarms data - AlarmFilter alarmFilter; - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), "Monitor and if condition persists, contact next level of support."); - snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); - strcpy(alarmData.uuid, ""); - snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "ntp"); - alarmData.alarm_state = FM_ALARM_STATE_SET; - alarmData.alarm_type = FM_ALARM_COMM; - alarmData.probable_cause = FM_ALARM_CAUSE_UNKNOWN; - alarmData.timestamp = 0; - alarmData.service_affecting = FM_FALSE; - alarmData.suppression = FM_FALSE; - - // Here we raise the major alarm if required - if (ntp_query_status == NTP_NONE_REACHABLE || ntp_query_status == NTP_SOME_REACHABLE_NONE_SELECTED) - { - wlog("NTP configuration does not contain any valid or reachable NTP servers"); - - // Check if alarm is raised already - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname); - - bool found = false; - for ( unsigned int i = 0; i < max_alarms; i++ ) - { - if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 ) - { - // Alarm already exist - dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id); - found = true; - break; - } - } - - // Alarm does not exist so raise it - if (!found && !created_major_alarm) - { - // Alarm does not exist so raise it - alarmData.severity = FM_ALARM_SEVERITY_MAJOR; - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP configuration does not contain any valid or reachable NTP servers."); - snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id); - - rc = rmon_fm_set(&alarmData, NULL); - if (rc == FM_ERR_OK ) - { - ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id); - created_major_alarm = true; - } - else - { - ilog("Failed to create alarm %s for entity instance id:%s error: %d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc); - } - } - } - - // Here were raise alarms for individual servers - if (ntp_query_status != NTP_OK) - { - wlog("Some or all of the NTP servers are not reachable"); - std::list::iterator iter; - alarmData.severity = FM_ALARM_SEVERITY_MINOR; - - // Loop through all the non reachable NTP servers - // Check to see if an alarms is lready raised for the server. - // If we do not find an alarm for the server then we raise it - for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ ) - { - bool found = false; - - // Build the alarm entity instatance id - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str()); - - dlog("Search alarms for entity instance id:%s \n", alarmFilter.entity_instance_id); - for ( unsigned int i = 0; i < max_alarms; i++ ) - { - if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 ) - { - dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id); - found = true; - break; - } - } - - // If the NTP alarm was not found then raise one for this NTP server - if (!found) - { - - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP address %s is not a valid or a reachable NTP server.", iter->c_str() ); - snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id); - - rc = rmon_fm_set(&alarmData, NULL); - if (rc == FM_ERR_OK ) - { - ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id); - } - else - { - ilog("Failed to create alarm %s for entity instance id:%s error:%d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc); - } - } - } - } - - free(active_alarms); - return; -} - -/***************************************************************************** - * - * Name : query_ntp_servers - * - * Purpose : execute script query_ntp_servers.sh which run the "ntpq -np" - * which query the healths of the NTP servers. The script will return a - * status code and also create a temporate file which will save the list - * of reachable and non reachable NTP servers. This temp file is required - * to generate proper alarms - * - *****************************************************************************/ -int query_ntp_servers ( ) -{ - pid_t child_pid; - - dlog ("Main Pid:%d \n", getpid() ); - - ntp_child_pid = child_pid = fork (); - if (child_pid == 0) - { - dlog ("Child Pid:%d \n", getpid() ); - - char* argv[] = {(char*)NTPQ_QUERY_SCRIPT, NULL}; - char cmd[MAX_FILE_SIZE] ; - memset (cmd,0,MAX_FILE_SIZE); - - snprintf ( &cmd[0], MAX_FILE_SIZE, "%s/%s", RMON_FILES_DIR, NTPQ_QUERY_SCRIPT ); - - bool close_file_descriptors = true ; - if ( setup_child ( close_file_descriptors ) != PASS ) - { - exit(NTP_ERROR); - } - - /* Set child to ignore child exit */ - signal (SIGCHLD, SIG_DFL); - - /* Setup the exec arguement */ - int res = execv(cmd, argv); - elog ( "Failed to run %s return code:%d error:%s\n", cmd, res, strerror(errno) ); - exit (NTP_ERROR); - } - - if ( child_pid == -1 ) - { - elog ("Fork failed (%s)\n", strerror(errno)); - - /* TODO: Consider making this a critical fault - * after 100 retries. - * All possibilities based on man page are - * due to resource limitations and if that does - * not resolve in 100 retries then ip probably will never. - **/ - return (FAIL); - } - - return (PASS); -} - -/***************************************************************************** - * - * Name : rmonHdlr_ceilometer_handler - * - * Purpose : Handles the ceilometer sample create response message - * - *****************************************************************************/ -void rmonHdlr_ceilometer_handler( struct evhttp_request *req, void *arg ) -{ - if ( !req ) - { - elog (" Request Timeout\n"); - ceilometerEvent.status = FAIL_TIMEOUT; - goto _ceilometer_handler_done ; - } - - ceilometerEvent.status = rmonHttpUtil_status(ceilometerEvent); - if ( ceilometerEvent.status != PASS ) - { - elog ("ceilometer HTTP request Failed (%d)\n", ceilometerEvent.status); - rmonHttpUtil_get_response(ceilometerEvent); - goto _ceilometer_handler_done ; - } - -_ceilometer_handler_done: - event_base_loopbreak((struct event_base *)arg); -} - -/***************************************************************************** - * - * Name : generate_ceilometer_pm - * - * Purpose : Generate ceilometer PMs through the REST API - * - *****************************************************************************/ -void generate_ceilometer_pm ( string r_id, string m_id, string m_type, - string m_unit, string m_volume, - string m_metadata ) -{ - int rc = PASS; - daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); - string command_path=""; - string host_ip = cfg_ptr->keystone_auth_host; - int port = cfg_ptr->ceilometer_port; - int count = 0; - - rmonHttpUtil_libEvent_init ( &ceilometerEvent, CEILOMETER_EVENT_SIG, host_ip, port); - - ceilometerEvent.address.append("/v2/meters/"); - ceilometerEvent.address.append(m_id); - - ceilometerEvent.user_agent = "ceilometerclient.openstack.common.apiclient"; - - ceilometerEvent.payload = "[{"; - ceilometerEvent.payload.append("\"resource_id\":\""); - ceilometerEvent.payload.append(r_id); - ceilometerEvent.payload.append("\",\"counter_name\":\""); - ceilometerEvent.payload.append(m_id); - ceilometerEvent.payload.append("\",\"counter_type\":\""); - ceilometerEvent.payload.append(m_type); - ceilometerEvent.payload.append("\",\"counter_unit\":\""); - ceilometerEvent.payload.append(m_unit); - ceilometerEvent.payload.append("\",\"counter_volume\":\""); - ceilometerEvent.payload.append(m_volume); - ceilometerEvent.payload.append("\",\"resource_metadata\":"); - // the resource metadata is dictionary of key-value pairs - ceilometerEvent.payload.append(m_metadata); - ceilometerEvent.payload.append("}]"); - dlog ("Payload is : %s\n", ceilometerEvent.payload.c_str()); - - rc = rmonHttpUtil_api_request (CEILOMETER_SAMPLE_CREATE, ceilometerEvent, command_path); - do - { - if ( rc != PASS ) - { - count++; - wlog ("ceilometer failed request (%d) ... retrying (%d)\n", rc, count); - } - rmonHttpUtil_log_event (ceilometerEvent); - - } while ( ( rc!=PASS ) && ( count < REST_API_RETRY_COUNT ) ); - - if ( rc!= PASS ) - { - elog ("ceilometer sample create Failed (%d) (cnt:%d)\n", rc, count); - } -} - -void clear_rmon_api_counts ( registered_clients * ptr ) -{ - if ( ptr->b2b_miss_count > ptr->b2b_miss_peak ) - { - ptr->b2b_miss_peak = ptr->b2b_miss_count ; - } - - if ( ptr->mesg_err_cnt > ptr->mesg_err_peak ) - { - ptr->mesg_err_peak = ptr->mesg_err_cnt ; - } - ptr->b2b_miss_count = 0 ; - ptr->send_err_cnt = 0 ; - ptr->recv_err_cnt = 0 ; - ptr->mesg_err_cnt = 0 ; -} - -/***************************************************************************** - * - * Name : _space_to_underscore - * - * Purpose : Converts spaces in a string to underscores - * *****************************************************************************/ -void _space_to_underscore (string & str ) -{ - char space = ' '; - for(unsigned int i = 0; i < str.size(); i++) - { - if(str[i] == space) - { - str[i] = '_'; - } - } -} - -/***************************************************************************** - * - * Name : set_alarm_defaults - * - * Purpose : Set the defaults for the fm alarms - * *****************************************************************************/ -void set_alarm_defaults ( resource_config_type * ptr ) -{ - strcpy(alarmData.uuid, ""); - /* common data for all alarm messages */ - snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "system.host"); - - build_entity_instance_id (ptr, alarmData.entity_instance_id); - - alarmData.alarm_state = FM_ALARM_STATE_SET; - alarmData.alarm_type = FM_ALARM_OPERATIONAL; - alarmData.probable_cause = FM_ALARM_THRESHOLD_CROSSED; - alarmData.timestamp = 0; - alarmData.service_affecting = FM_FALSE; - alarmData.suppression = FM_TRUE; - snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); - -} - -/***************************************************************************** - * - * Name : resource_handler - * - * Purpose : Handle the failed resources and raise alarms through - * the FM API as well as calling a function to notify registered clients - *****************************************************************************/ -int resource_handler ( resource_config_type * ptr ) -{ - int rc = RETRY ; - AlarmFilter alarmFilter; - string err_res_name(ptr->resource); - _space_to_underscore(err_res_name); - - if ( ptr->stage < RMON_STAGE__STAGES ) - { - dlog2 ("%s %s Stage %d\n", ptr->resource, rmonStages_str[ptr->stage], ptr->stage ); - } - else - { - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - - switch ( ptr->stage ) - { - case RMON_STAGE__START: - { - dlog ( "%s failed:%d set_cnt:%d debounce_cnt:%d\n", - ptr->resource, - ptr->failed, - ptr->count, - ptr->debounce_cnt); - break ; - } - case RMON_STAGE__MANAGE: - { - /* send messages to maintnance in thresholds are crossed */ - if (ptr->alarm_status == ALARM_ON) - { - /* set up the fm api alarm defaults */ - set_alarm_defaults( ptr ); - if ( strcmp(ptr->resource, MEMORY_RESOURCE_NAME) == 0 ) - { - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Monitor and if condition persists, contact next level of support; may require additional memory on Host."); - } - else if ( strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0 ) - { - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Check Management and Infrastructure Networks and Controller or Storage Nodes."); - } - else - { - if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 )) - { - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Monitor and if condition persists, consider adding additional physical volumes to the volume group."); - } - else - { - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Monitor and if condition persists, contact next level of support."); - } - } - - if ( ptr->sev == SEVERITY_MINOR ) - { - alarmData.severity = FM_ALARM_SEVERITY_MINOR; - - if ( ptr->percent == PERCENT_USED ) { - - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->resource, ptr->minor_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->resource, ptr->minor_threshold, ptr->resource_value); - } - else { - ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->minor_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->minor_threshold, ptr->resource_value); - } - } else { - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value); - } else { - ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->minor_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->minor_threshold_abs_node0, ptr->resource_value); - } - } - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s minor_threshold_set", err_res_name.c_str()); - } - else if ( ptr->sev == SEVERITY_MAJOR ) - { - alarmData.severity = FM_ALARM_SEVERITY_MAJOR; - - if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0) - { - if (ptr->percent == PERCENT_USED){ - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->resource, ptr->major_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->resource, ptr->major_threshold, ptr->resource_value); - } - else { - ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->major_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->major_threshold, ptr->resource_value); - } - } else { - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value); - } else { - ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->major_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->major_threshold_abs_node0, ptr->resource_value); - } - } - } - else if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0) - { - /* instance alarming is a special case of alarm */ - wlog ("No access to remote VM volumes.\n"); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "No access to remote VM volumes."); - } - - if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE ) - { - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s %s",err_res_name.c_str(), DEGRADE_CLEAR_MSG ); - } - else - { - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s major_threshold_set",err_res_name.c_str()); - } - } - else if ( ptr->sev == SEVERITY_CRITICAL ) - { - alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; - - if (ptr->percent == PERCENT_USED){ - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->resource, ptr->critical_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->resource, ptr->critical_threshold, ptr->resource_value); - } - else { - ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", - ptr->critical_threshold, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.", - ptr->critical_threshold, ptr->resource_value); - } - } else { - if ( ptr->alarm_type == STANDARD_ALARM ) - { - ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value); - } else { - ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", - ptr->critical_threshold_abs_node0, ptr->resource_value); - snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), - "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", - ptr->critical_threshold_abs_node0, ptr->resource_value); - } - } - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), - "%s major_threshold_set",err_res_name.c_str()); - } - - rc = rmon_fm_set(&alarmData, NULL); - if (rc == FM_ERR_OK ) { - ilog("%s: %s alarm\n", - ptr->resource, - FmAlarmSeverity_to_string(alarmData.severity).c_str()); - ptr->alarm_raised = true; - } else { - ilog("%s: %s alarm failed (rc:%d)\n", - ptr->resource, - FmAlarmSeverity_to_string(alarmData.severity).c_str(), - (int)rc); - } - - if (ptr->alarm_raised) - { - if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) - { - /* If degrade debounce is non-zero then this - * alarm condition is candidate for host degrade */ - if (ptr->debounce) - { - if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) - { - ptr->failed_send++; - wlog ("%s request send failed (count:%d)\n", - ptr->resource, - ptr->failed_send ); - } - else - { - ptr->failed_send = 0; - } - } - } - else - { - ptr->failed_send = 0; - } - resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); - } - } - else { - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - - break; - } - - case RMON_STAGE__IGNORE: - { - - //nothing to do here, go to the finished stage - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - - break ; - } - - case RMON_STAGE__MONITOR_WAIT: - { - if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND) && (ptr->failed_send > 0)) - { - if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) - { - wlog ("%s request send failed \n", ptr->resource); - ptr->failed_send++; - } - else - { - ptr->failed_send = 0; - } - } - break; - } - - case RMON_STAGE__FINISH: - { - if ((ptr->alarm_status == ALARM_ON) && (ptr->alarm_raised)) - { - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); - - build_entity_instance_id (ptr, alarmData.entity_instance_id); - - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); - ilog ("%s alarm clear\n", ptr->resource ); - - /* clear the alarm */ - EFmErrorT ret = rmon_fm_clear(&alarmFilter); - if (( ret == FM_ERR_OK ) || ( ret == FM_ERR_ENTITY_NOT_FOUND )) - { - if (ret == FM_ERR_ENTITY_NOT_FOUND) - { - dlog ("%s alarm clear failed, entity '%s' not found", - ptr->resource, alarmData.entity_instance_id); - } - - snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s cleared_alarms_for_resource", err_res_name.c_str()); - if ( (_rmon_ctrl_ptr->clients > 0) && ( ptr->failed_send < MAX_FAIL_SEND ) && (ret == FM_ERR_OK) ) - { - while (( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) && - ( ptr->failed_send < MAX_FAIL_SEND )) - { - wlog ("%s request send failed \n", ptr->resource); - ptr->failed_send++; - } - - ptr->alarm_raised = false; - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0 ; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - else - { - ptr->alarm_raised = false; - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0 ; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - } - else - { - wlog("%s alarm clear failed, entity '%s' (rc:%d)\n", - ptr->resource, - alarmData.entity_instance_id, - ret); - } - } - else - { - ptr->alarm_raised = false; - ptr->failed_send = 0; - ptr->failed = false ; - ptr->count = 0 ; - ptr->sev = SEVERITY_CLEARED ; - ptr->stage = RMON_STAGE__START ; - } - rc = PASS ; - break ; - } - default: - { - slog ("%s Invalid stage (%d)\n", ptr->resource, ptr->stage ); - - /* Default to finish for invalid case. - * If there is an issue then it will be detected */ - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } - return rc; -} - -/***************************************************************************** - * - * Name : process_failures - * - * Purpose : Check whether a percentage resource is to be failed or a failure - * threshold is to be cleared by the resource_handler - * - *****************************************************************************/ -void process_failures ( resource_config_type * ptr ) -{ - if (ptr->stage == RMON_STAGE__INIT) - { - /* first time after restart/reboot, clear the alarm if the first reading is good */ - resourceStageChange ( ptr, RMON_STAGE__START ); - if (ptr->resource_value < ptr->minor_threshold) - { - // assuming we left as alarm on last time - ptr->alarm_status = ALARM_ON; - ptr->alarm_raised = true; - ptr->failed = true; - ilog("%s Setting the state to FINISH\n", ptr->resource); - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - // Now we start counting as normal ... - } - else - { - if (ptr->failed) - { - /* If the resource is already failed, check to see if it is to be cleared */ - if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value < ptr->minor_threshold )) || - (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value < ptr->major_threshold )) || - (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value < ptr->critical_threshold ))) - { - if (ptr->count > ptr->num_tries) - ptr->count = ptr->num_tries; - - if (ptr->count > 0) - ptr->count--; - - if (ptr->count == 0) { - ptr->sev = SEVERITY_CLEARED; - ilog("%s Setting the state to FINISH\n", ptr->resource); - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } - else - { - /* While in failed state, the resource usage must sustain normal level - * num_tries number of times before an alarm can be cleared. Keep incrementing the counter - * as it will be set to num_tries in the above block as soon as resource usage returns to - * normal level.*/ - ptr->count++; - - // rmon needs to send degrade assert message periodically as the - // condition might be cleared by maintenance over controller swact. - // - // added meaning to the debounce config setting. - // must be non-zero to degrade the host. - if ((ptr->alarm_raised) && (ptr->debounce) && - (_rmon_ctrl_ptr->clients > 0)) - { - if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) - { - ptr->failed_send++ ; - wlog ("%s request send failed (count:%d)\n", - ptr->resource, - ptr->failed_send); - } - else - { - mlog ("%s rmon_send_request ok\n", ptr->resource ); - ptr->failed_send = 0 ; - } - } - else - { - /* typical path for resources that - * - do not degrade host - * - do not raise alarms */ - dlog ("%s: alarm:%d debounce:%d clients:%d\n", - ptr->resource, - (ptr->alarm_raised), - (ptr->debounce), - (_rmon_ctrl_ptr->clients)); - } - } - } - } - - /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures */ - if (( ptr->resource_value >= ptr->minor_threshold ) && - ( ptr->resource_value < ptr->major_threshold ) - && (ptr->sev != SEVERITY_MINOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries) { - ptr->failed = true; - ptr->sev = SEVERITY_MINOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - - else if (( ptr->resource_value >= ptr->major_threshold ) && - ( ptr->resource_value < ptr->critical_threshold ) - && (ptr->sev != SEVERITY_MAJOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_MAJOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else if (( ptr->resource_value >= ptr->critical_threshold )&& - (ptr->sev != SEVERITY_CRITICAL)) - { - ptr->count++; - if (ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_CRITICAL; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else - { - /* if the host experienced a resource blip in the previous audit run and usage - * is now back at the normal level, decrement the count.*/ - if ((!ptr->failed) && (ptr->count > 0)){ - ptr->count--; - dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); - } - } -} - -/***************************************************************************** - * - * Name : process_failures_absolute - * - * Purpose : Check whether an absolute resource is to be failed or a - * failure threshold is to be cleared by the resource_handler - * - *****************************************************************************/ -void process_failures_absolute ( resource_config_type * ptr ) -{ - int node = 0; - - if (strcmp(ptr->resource,"processor_node1") == 0) - { - /* per node memory checking is enabled */ - node = 1; - } - - if (ptr->failed) { - /* If the resource is already failed, check to see if it is to be cleared */ - if (node == 0) { - - if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node0 )) || - (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node0 )) || - (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node0 ))) - { - if (ptr->count > ptr->num_tries) - ptr->count = ptr->num_tries; - if (ptr->count > 0) - ptr->count--; - - if (ptr->count == 0) { - ptr->sev = SEVERITY_CLEARED; - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } - else - { - /* While in failed state, the resource usage must sustain normal level - * num_tries number of times before an alarm can be cleared. Keep incrementing the counter - * as it will be set to num_tries in the above block as soon as resource usage returns to - * normal level.*/ - ptr->count++; - } - } - else { - - if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node1 )) || - (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node1 )) || - (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node1 ))) - { - if (ptr->count > ptr->num_tries) - ptr->count = ptr->num_tries; - if (ptr->count > 0) - ptr->count--; - - if (ptr->count == 0) { - ptr->sev = SEVERITY_CLEARED; - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } - else - { - /* While in failed state, the resource usage must sustain normal level - * num_tries number of times before an alarm can be cleared. Keep incrementing the counter - * as it will be set to num_tries in the above block as soon as resource usage returns to - * normal level.*/ - ptr->count++; - } - } - } - - if (node == 0) { - /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 0 */ - if (( ptr->resource_value <= ptr->minor_threshold_abs_node0 ) && - ( ptr->resource_value > ptr->major_threshold_abs_node0 ) && - (ptr->sev != SEVERITY_MINOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_MINOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - - else if (( ptr->resource_value <= ptr->major_threshold_abs_node0 ) && - ( ptr->resource_value > ptr->critical_threshold_abs_node0 ) && - (ptr->sev != SEVERITY_MAJOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_MAJOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else if (( ptr->resource_value < ptr->critical_threshold_abs_node0 )&& - (ptr->sev != SEVERITY_CRITICAL)) - { - ptr->count++; - if (ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_CRITICAL; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else - { - /* if the host experienced a resource blip in the previous audit run and usage - * is now back at the normal level, decrement the count.*/ - if ((!ptr->failed) && (ptr->count > 0)){ - ptr->count--; - dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); - } - } - } else { - - /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 1 */ - if (( ptr->resource_value <= ptr->minor_threshold_abs_node1 ) && - ( ptr->resource_value > ptr->major_threshold_abs_node1 ) && - (ptr->sev != SEVERITY_MINOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_MINOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else if (( ptr->resource_value <= ptr->major_threshold_abs_node1 ) && - ( ptr->resource_value > ptr->critical_threshold_abs_node1 ) && - (ptr->sev != SEVERITY_MAJOR)) - { - ptr->count++; - if ( ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_MAJOR; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else if (( ptr->resource_value < ptr->critical_threshold_abs_node1 )&& - (ptr->sev != SEVERITY_CRITICAL)) - { - ptr->count++; - if (ptr->count >= ptr->num_tries){ - ptr->failed = true; - ptr->sev = SEVERITY_CRITICAL; - resourceStageChange ( ptr, RMON_STAGE__MANAGE); - } - } - else - { - /* if the host experienced a resource blip in the previous audit run and usage - * is now back at the normal level, decrement the count.*/ - if ((!ptr->failed) && (ptr->count > 0)){ - ptr->count--; - dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); - } - } - } -} - -void update_total_clients (int total_clients) -{ - _rmon_ctrl_ptr->clients = total_clients; -} - -void add_registered_client (registered_clients client) -{ - - registered_clt[_rmon_ctrl_ptr->clients] = client; - ilog("added registered client: %s \n", client.client_name); -} - -/***************************************************************************** - * - * Name : add_fs_resource - * - * Purpose : Add a dynamic or static fs resource by reading - * the: /etc/rmonfiles.d/dynamic.conf file - *****************************************************************************/ -void add_fs_resource ( int resource_index, int criticality_index, int enabled, - int percent, int abs_values[3], int alarm_type, - int types_index, int devices_index, int mounted ) -{ - int fs_resource_index; - get_resource_index( FS_RESOURCE_NAME, &fs_resource_index ); - - int i = _rmon_ctrl_ptr->resources; - - if (i >= MAX_RESOURCES) { - wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); - } - else { - - resource_config[i].resource = dynamic_resource.at(resource_index).c_str(); - resource_config[i].severity = criticality_resource.at(criticality_index).c_str(); - resource_config[i].type = types.at(types_index).c_str(); - resource_config[i].device = devices.at(devices_index).c_str(); - resource_config[i].critical_threshold = UNUSED_CRITICAL; // initialization - resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; - - resource_config[i].num_tries = DEFAULT_NUM_TRIES; - resource_config[i].alarm_status = enabled; - resource_config[i].percent = percent; - resource_config[i].mounted = mounted; - resource_config[i].alarm_type = alarm_type; - resource_config[i].debounce = resource_config[fs_resource_index].debounce; - - // percentage based threshold measure - switch (percent) { - case PERCENT_USED: - if (abs_values[0] == 0) { - // if this is a static mounted file system resource - // then use common threshold values provided for the - // File System Resource - if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) { - resource_config[i].minor_threshold = - resource_config[fs_resource_index].minor_threshold; - - resource_config[i].major_threshold = - resource_config[fs_resource_index].major_threshold; - - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold = - resource_config[fs_resource_index].critical_threshold; - } - resource_config[i].num_tries = - resource_config[fs_resource_index].num_tries; - } - else { - /* There are no specific percent thresholds for - the dynamic resource, use defaults */ - resource_config[i].minor_threshold = FS_MINOR; - resource_config[i].major_threshold = FS_MAJOR; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold = FS_CRITICAL; - } - } - } - else if (abs_values[0] != 0) { - /* Specific percent thresholds are defined for the dynamic resource */ - resource_config[i].minor_threshold = abs_values[0]; - resource_config[i].major_threshold = abs_values[1]; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold = abs_values[2]; - } - } - break; - - case PERCENT_UNUSED: - if (abs_values[0] == 0) { - // if this is a static mounted file system then use common - // threshold values provided for the File System Resource - if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) { - resource_config[i].minor_threshold_abs_node0 = - resource_config[fs_resource_index].minor_threshold_abs_node0; - - resource_config[i].major_threshold_abs_node0 = - resource_config[fs_resource_index].major_threshold_abs_node0; - - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; - } - resource_config[i].num_tries = - resource_config[fs_resource_index].num_tries; - } - else { - /* If the percent thresholds are selected - * use the default thresholds for the absolute - * value thresholds for the dynamic resource */ - resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0; - resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; - } - } - } - else if (abs_values[0] != 0) { - /* Specific absolute value thresholds are specified for the dynamic resource */ - resource_config[i].minor_threshold_abs_node0 = abs_values[0]; - resource_config[i].major_threshold_abs_node0 = abs_values[1]; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold_abs_node0 = abs_values[2]; - } - } - break; - } - - ilog ("Monitoring %2d: %-20s (%s) (%s)\n", i, resource_config[i].resource , - resource_config[i].severity, (enabled ? "enabled" : "disabled") ); - - /* Init the timer for this resource */ - mtcTimer_init ( rtimer[i] ) ; - - rtimer[i].hostname = "localhost" ; - rtimer[i].service = resource_config[i].resource ; - resource_config[i].i = i; - resource_config[i].failed = false ; - resource_config[i].count = 0 ; - resource_config[i].stage = RMON_STAGE__START ; - resource_config[i].sev = SEVERITY_CLEARED ; - resource_config[i].failed_send = 0; - resource_config[i].alarm_raised = false; - resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; - - /* add the alarm id for the FM API per resource monitored */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); - - mem_log_resource ( &resource_config[i] ); - - i++; - _rmon_ctrl_ptr->resources = i; - } -} - -/***************************************************************************** - * - * Name : save_dynamic_resource - * - * Purpose : Loops through resources and only adds a dynamic file system - * resource if it does not yet exist - ******************************************************************************/ -void save_fs_resource ( string resource_name, string criticality, - int enabled, int percent, - int abs_values[3], int alarm_type, - string type, string device, int mounted) -{ - - size_t resource_index; - size_t criticality_index; - size_t types_index; - size_t devices_index; - - bool newResource = true; - - for (int k=0; k< _rmon_ctrl_ptr->resources; k++) { - - if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) { - newResource = false; - break; - } - } - - if (newResource == true) { - dlog ("%s(%s) fs resource add in %s state\n", resource_name.c_str(), - criticality.c_str(), (enabled) ? "enabled" : "disabled"); - dynamic_resource.push_back(resource_name); - resource_index = dynamic_resource.size() - 1; - /* add the criticality value to a vector for permenant storage */ - criticality_resource.push_back(criticality); - criticality_index = criticality_resource.size() - 1; - types.push_back(type); - types_index = types.size() - 1; - devices.push_back(device); - devices_index = devices.size() - 1; - add_fs_resource ( resource_index, criticality_index, enabled, percent, abs_values, alarm_type, types_index, devices_index, mounted ); - } -} - -/***************************************************************************** - * - * Name : add_dynamic_mem_resource - * - * Purpose : Add a dynamic memory resource at runtime based on the name and criticality. - * The resource has both custom or default percent and absolute thresholds. - * *****************************************************************************/ -int add_dynamic_mem_resource ( int resource_index, int criticality_index, - double r_value, int percent, int abs_values[3], - const char * alarm_id, int socket_id=0 ) -{ - - int i = _rmon_ctrl_ptr->resources; - int new_index = i; - if (i >= MAX_RESOURCES) { - wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); - } - else { - - resource_config[i].resource = dynamic_resource.at(resource_index).c_str(); - resource_config[i].severity = criticality_resource.at(criticality_index).c_str(); - - if ((percent == 1) && (abs_values[0] == 0)) { - /* There are no specific percent thresholds for the dynamic resource, use defaults */ - resource_config[i].minor_threshold = DEFAULT_MINOR; - resource_config[i].major_threshold = DEFAULT_MAJOR; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold = DEFAULT_CRITICAL; - } else { - resource_config[i].critical_threshold = UNUSED_CRITICAL; - } - } - else if ((percent == 1) && (abs_values[0] != 0)) { - /* Specific percent thresholds are defined for the dynamic resource */ - resource_config[i].minor_threshold = abs_values[0]; - resource_config[i].major_threshold = abs_values[1]; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold = abs_values[2]; - } else { - resource_config[i].critical_threshold = UNUSED_CRITICAL; - } - } - - if ((percent == 0) && (abs_values[0] == 0)) { - /* If the percent thresholds are selected use the default thresholds for the absolute - * value thresholds for the dynamic resource */ - resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0; - resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; - } else { - resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; - } - resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1; - resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1; - resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1; - } - else if ((percent == 0) && (abs_values[0] != 0)) { - /* Specific absolute value thresholds are specified for the dynamic resource */ - resource_config[i].minor_threshold_abs_node0 = abs_values[0]; - resource_config[i].major_threshold_abs_node0 = abs_values[1]; - if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { - resource_config[i].critical_threshold_abs_node0 = abs_values[2]; - } else { - resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; - } - resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1; - resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1; - resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1; - } - - resource_config[i].num_tries = DEFAULT_NUM_TRIES; - resource_config[i].alarm_status = DEFAULT_ALARM_STATUS; - resource_config[i].percent = percent; - - ilog ("Monitoring %2d: Dynamic Resource- %s (%s)\n", i, resource_config[i].resource , - resource_config[i].severity ); - - /* Init the timer for this resource */ - mtcTimer_init ( rtimer[i] ) ; - - rtimer[i].hostname = "localhost" ; - rtimer[i].service = resource_config[i].resource ; - resource_config[i].i = i; - resource_config[i].failed = false ; - resource_config[i].count = 0 ; - resource_config[i].resource_value = r_value ; - resource_config[i].resource_prev = r_value ; - resource_config[i].stage = RMON_STAGE__START ; - resource_config[i].sev = SEVERITY_CLEARED ; - resource_config[i].alarm_type = STANDARD_ALARM; - resource_config[i].failed_send = 0; - resource_config[i].alarm_raised = false; - resource_config[i].socket_id = socket_id; - - /* add the alarm id for the FM API per resource monitored */ - snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, alarm_id); - - mem_log_resource ( &resource_config[i] ); - i++; - _rmon_ctrl_ptr->resources = i; - } - return new_index; -} - -/***************************************************************************** - * - * Name : save_dynamic_mem_resource - * - * Purpose : Loops through resources and only adds a memory resource if it does not yet - * exist - ******************************************************************************/ -int save_dynamic_mem_resource ( string resource_name, string criticality, - double r_value, int percent, int abs_values[3], - const char * alarm_id, int socket_id=0 ) -{ - - size_t resource_index; - size_t criticality_index; - bool newResource = true; - int updated_index; - - for (int k=0; k< _rmon_ctrl_ptr->resources; k++) { - - if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) { - resource_config[k].resource_value= - resource_config[k].resource_prev = r_value; - updated_index = k; - newResource = false; - break; - } - } - - if (newResource == true) { - dynamic_resource.push_back(resource_name); - resource_index = dynamic_resource.size() - 1; - /* add the criticality value to a vector for permenant storage */ - criticality_resource.push_back(criticality); - criticality_index = criticality_resource.size() - 1; - updated_index = add_dynamic_mem_resource(resource_index, criticality_index, - r_value, percent, abs_values, - alarm_id, socket_id); - rmon_alarming_init( &resource_config[updated_index] ); - resource_config[updated_index].resource_prev = - resource_config[updated_index].resource_value= r_value; - } - return updated_index; -} - -/***************************************************************************** - * - * Name : calculate_fs_usage - * - * Purpose : Calculate the file system usage as a percentage or an absolute value - * for the number of MiB remaining overall and in a specific fs. The calculation - * is done by executing the df command and getting the response for each type - * of filesystem being monitored. - *****************************************************************************/ -void calculate_fs_usage ( resource_config_type * ptr ) -{ - dlog("%s, is mounted resource: %d is enabled: %d\n", ptr->resource, ptr->mounted, ptr->alarm_status); - - FILE *pFile; - int last_index; - char fsLine[128]; - char buf[200]; - double fsUsage = 0; - char mounted_on[50], file_system[50], capacity[10]; - unsigned long long size, used, available; - string res_val; - double cap_percent; - double MiB = 1024.0; - double free_units = 0; - double usage_percents = 0; - double total_units = 0; - - if (ptr->mounted == MOUNTED) - { - if (strcmp(ptr->resource, FS_RESOURCE_NAME) == 0) - { - // We do not calculate the total for filesystem - // Resource FS_RESOURCE_NAME represents the total filesystem - return; - } - else - { - snprintf(buf, sizeof(buf), "timeout 2 df -T -P --local %s 2>/dev/null", ptr->resource); - } - - /* convert output of "df -P" from KiB to MiB */ - if(!(pFile = popen(buf, "r"))) - { - elog("Error, command df is not executed on resource: %s\n", ptr->resource); - } - else - { - while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) - { - sscanf(fsLine, "%49s %*s %llu %llu %llu %9s %49s", file_system, &size, &used, &available, capacity, mounted_on); - if (strcmp(mounted_on, ptr->resource) == 0) - { - string temp_val(capacity); - // exclude percentage (%) sign - last_index = temp_val.find_first_not_of("0123456789"); - res_val = temp_val.substr(0, last_index); - snprintf(capacity, sizeof(capacity), res_val.c_str()); - sscanf(capacity, "%lf", &cap_percent); - - if (ptr->percent == PERCENT_USED) - { - fsUsage = cap_percent; - ptr->resource_value = fsUsage; - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP ) ) - { - plog("filesystem: %s usage: %.2f%%\n", - ptr->resource, ptr->resource_value); - } - } - else - { - fsUsage = (double) (((100 - cap_percent) / 100) * size); - fsUsage = fsUsage / MiB; - ptr->resource_value = fsUsage; - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP ) ) - { - plog("filesystem: %s has %f (MiB) (free)\n", - ptr->resource, ptr->resource_value); - } - } - - // The size of the file system is 2X the user specified size to allow upgrades. - // Currently we are alarming on the used size but instead the alarming should be based on used size /2. - // As a result there is no indication to the user that they have may have eaten into the reserved space - // for upgrades resulting in an aborted upgrade. - if (strcmp(mounted_on, POSTGRESQL_FS_PATH) == 0) - { - ptr->resource_value = ptr->resource_value / 2; - } - } - } - } - pclose(pFile); - } - else if(strcmp(ptr->resource, NOVA_LOCAL) == 0) - { - /*rmon queries the thin pool usage if the volume group is nova-local*/ - snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m " - "/dev/nova-local/nova-local-pool 2>/dev/null"); - - if(!(pFile = popen(buf, "r"))) - { - elog("Error, command lvdisplay free units is not executed \n"); - } - else - { - while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) - { - usage_percents = atof(fsLine); - } - pclose(pFile); - } - ptr->resource_value = usage_percents; - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("filesystem: %s, usage: %f%% \n", ptr->resource, ptr->resource_value); - } - } - else if(strcmp(ptr->resource, CINDER_VOLUMES) == 0) - { - /*rmon queries the thin pool usage if the volume group is cinder-volumes*/ - snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m " - "/dev/cinder-volumes/cinder-volumes-pool 2>/dev/null"); - - if(!(pFile = popen(buf, "r"))) - { - elog("Error, command lvdisplay free units is not executed \n"); - } - else - { - while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) - { - usage_percents = atof(fsLine); - } - pclose(pFile); - } - ptr->resource_value = usage_percents; - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("filesystem: %s, usage: %.2f%% \n", ptr->resource, ptr->resource_value); - } - } - else - { - /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg free units */ - snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_free --units m %s 2>/dev/null", ptr->resource); - - if(!(pFile = popen(buf, "r"))) - { - elog("Error, command vgdisplay free units is not executed \n"); - } - else - { - while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) - { - free_units = atof(fsLine); - } - pclose(pFile); - } - - /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg size */ - snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_size --units m %s 2>/dev/null", ptr->resource ); - - if(!(pFile = popen(buf, "r"))) - { - elog("Error, command vgdisplay total units is not executed \n"); - } - else - { - while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) - { - total_units = atof(fsLine); - } - pclose(pFile); - } - - if ( ptr->percent == PERCENT_USED ) - { - if (total_units != 0) - { - ptr->resource_value = (double) (( (total_units - free_units) / total_units ) * 100); - } - else - { - ptr->resource_value = 0; - } - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("volume-group: %s, usage: %.2f%%\n", ptr->resource, ptr->resource_value); - } - } - else - { - ptr->resource_value = free_units; - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("volume-group: %s, %.2f (MiB) free\n", ptr->resource, ptr->resource_value); - } - } - } -} - -/***************************************************************************** - * - * Name : init_memory_checking - * - * Purpose : Get the memory accounting used either 0: overcommit or 1: strict - *****************************************************************************/ -void init_memory_accounting() -{ - - const char *strict_memory_file = "/proc/sys/vm/overcommit_memory"; - - ifstream mem_file ( strict_memory_file ); - string strict_line; - - if (mem_file.is_open()) - { - - while ( getline (mem_file, strict_line) ) { - IS_STRICT = atoi(strict_line.c_str()); - } - mem_file.close(); - - } - -} - -/***************************************************************************** - * - * Name : thinpool_calcVirtUsage - * - * Purpose : Obtain the percentage of the used virtual space in thin - * provisioning. - * - * Params : index - the index of the monitored resource (virtual space) - * - * Return : PASS/FAIL - * - *****************************************************************************/ -int thinpool_calcVirtUsage(int index, - const char *poolName, - const char *poolOwner, - const char *allocParam) { - - /* Initialize the variables used in calculating the virtual usage. */ - double provisioned_capacity = 0; - double total_capacity = 0; - double allocation_ratio = 1; - double ratio = 0; - double MiB = 1024.0; - - /* Buffer (and its size) for keeping the initial result after executing - the above commands. */ - char result[BUFFER_SIZE]; - const unsigned int buffer_size = BUFFER_SIZE; - - /* Return code. */ - int rc; - - /* Save the necessary commands for obtaining the information about virtual - thin pool usage: provisioned capacity, total capacity and maximum - oversubscription ratio. */ - const char *provisioned_capacity_cmd = NULL; - const char *allocation_ratio_cmd = NULL; - char total_capacity_cmd[BUFFER_SIZE]; - - snprintf(total_capacity_cmd, sizeof(total_capacity_cmd), - "lvs --units m --segments | grep \"%s\" | awk '{print $6}' | sed '$s/.$//'", - poolName); - - if (strcmp (poolOwner, "Cinder") == 0) { - const char *cinder_provisioned_capacity_cmd ="lvs --units m | grep \"volume-[.]*\" | awk '{ sum+=$4} END {print sum}'"; - const char *cinder_allocation_ratio_cmd = "cat /etc/cinder/cinder.conf | grep \"^max_over_subscription_ratio\" | cut -d '=' -f 2"; - provisioned_capacity_cmd = cinder_provisioned_capacity_cmd; - allocation_ratio_cmd = cinder_allocation_ratio_cmd; - } else if (strcmp (poolOwner, "Nova") == 0) { - const char *nova_provisioned_capacity_cmd = "lvs --units m | grep \"[.]*_disk\" | awk '{ sum+=$4} END {print sum}'"; - provisioned_capacity_cmd = nova_provisioned_capacity_cmd; - } - /* Determine the provisioned capacity. */ - rc = execute_pipe_cmd(provisioned_capacity_cmd, result, buffer_size); - if (rc != PASS) { - wlog("%s LVM Thinpool ; unable to query provisioned capacity (rc:%i)", - poolOwner, rc); - return (FAIL); - } - provisioned_capacity = atof(result); - dlog("%s LVM Thinpool provisioned capacity is %f", poolOwner, provisioned_capacity); - - /* If the threshold is of percentage type, then also determine the total - thin pool capacity and the max oversubscription ratio. */ - rc = execute_pipe_cmd(total_capacity_cmd, result, buffer_size); - if (rc != PASS) { - elog("%s LVM Thinpool ; unable to query total capacity (rc:%i)", - poolOwner, rc); - return (FAIL); - } - total_capacity = atof(result); - dlog("%s LVM Thinpool total capacity is %f", - poolOwner, total_capacity); - - if (strcmp (poolOwner, "Cinder") == 0) { - rc = execute_pipe_cmd(allocation_ratio_cmd, result, buffer_size); - if (rc != PASS) { - elog("%s LVM Thinpool %s ratio could not be determined (rc:%i)", - allocParam, poolOwner, rc); - return (FAIL); - } - allocation_ratio = atof(result); - } else if (strcmp (poolOwner, "Nova") == 0) { - allocation_ratio = 1.0; - } - dlog("%s LVM Thinpool %s is %f", poolOwner, allocParam, allocation_ratio); - - /* If the allocation_ratio is 0 or hasn't been found, its default - value should be 1. */ - if (allocation_ratio == 0) - allocation_ratio = 1; - - /* Compute the current virtual space usage of the thin pool. */ - if (total_capacity != 0){ - ratio = provisioned_capacity / (total_capacity * allocation_ratio) * 100; - } else { - /*3 minutes (30 sec * rate_throttle = 180 sec)*/ - /* Change the warning log to a debug log to avoid generating this log in - rmond.log when Cinder is Ceph backended. Once the repackaging of cinder_virtual_resource.conf - and nova_virtual_resource.conf is done, we will change it back to warning log. */ - dlog("%s LVM Thinpool total capacity is 0\n", poolOwner); - return (FAIL); - } - - /* Update the resource value configuration. */ - if (resource_config[index].percent == 1) { - resource_config[index].resource_value = ratio; - if ( log_value ( resource_config[index].resource_value, - resource_config[index].resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("%s LVM Thinpool Usage: %.2f%%", poolOwner, ratio); - } - } - else { - resource_config[index].resource_value = - ((total_capacity * allocation_ratio) - provisioned_capacity) * MiB; - if ( log_value ( resource_config[index].resource_value, - resource_config[index].resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("%s LVM Thinpool has %.2f (MiB) free", - poolOwner, - resource_config[index].resource_value); - } - } - return (PASS); -} -/***************************************************************************** - * - * Name : calculate_virtual_space_usage - * - * Purpose : Obtain the percentage of the used virtual space in thin - * provisioning. - * - * Params : index - the index of the monitored resource (virtual space) - * - * Return : PASS/FAIL - * - *****************************************************************************/ -int calculate_virtual_space_usage(int index, const char* constant) { - int rc = 0; - if (strcmp(constant, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { - rc = thinpool_calcVirtUsage(index, - "cinder-volumes-pool", - "Cinder", - "max_over_subscription_ratio"); - } else if (strcmp(constant, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { - rc = thinpool_calcVirtUsage(index, - "nova-local-pool", - "Nova", - "disk_allocation_ratio"); - } - - return rc; -} - -/***************************************************************************** - * - * Name : calculate_memory_usage - * - * Purpose : Calculate the memory usage as a percentage or absolute value for the - * number of MiB left. The overall average memory usage as well as the per NUMA - * node memory usage is computed. - *****************************************************************************/ -void calculate_memory_usage( int index ) { - - const char *mem_info = "/proc/meminfo"; - FILE *pFile; - char memoryLine[40]; - char attribute_name[30]; - double memUsage, memUsageHuge; - char *line0 = &memoryLine[0]; - char *line3 = &memoryLine[3]; - char *line10 = &memoryLine[10]; - unsigned long int value; - unsigned long int avail = 0; - unsigned long int memTotal; - int resource_name_size = 100; - string resource_name_huge = "processor_hugepages_"; - string resource_name = "processor_"; - char numa_node[resource_name_size]; - string criticality = "critical"; - double MiB = 1024.0; - int absolute_thresholds[3]; - memoryinfo memInfo; - struct dirent *ent; - DIR *numa_node_dir; - vector numa_files; - vector node_files; - - memset ( (char*)&memInfo, 0, sizeof(memoryinfo)); - - if ((pFile = fopen(mem_info, "r")) == NULL){ - dlog("failed to open: /proc/meminfo \n"); - } - - else { - - while (memset(memoryLine, 0, sizeof(memoryLine)) && (fgets((char*) &memoryLine, sizeof(memoryLine), pFile) != NULL)) { - - if (*line3 == 'T') { - /* match MemTotal */ - value = 0UL; - if (sscanf(memoryLine, "MemTotal: %lu", &value) == 1) { - memInfo.MemTotal = value; - continue; - } - } else if (*line3 == 'F') { - /* match MemFree */ - value = 0UL; - if (sscanf(memoryLine, "MemFree: %lu", &value) == 1) { - memInfo.MemFree = value; - continue; - } - } else if (*line3 == 'f') { - /* match Buffers */ - value = 0UL; - if (sscanf(memoryLine, "Buffers: %lu", &value) == 1) { - memInfo.Buffers = value; - continue; - } - } else if (*line3 == 'h') { - /* match Cached */ - value = 0UL; - if (sscanf(memoryLine, "Cached: %lu", &value) == 1) { - memInfo.Cached = value; - continue; - } - } else if ((*line0 == 'S') && (*line3 == 'c')) { - /* match Slab Reclaimable */ - value = 0UL; - if (sscanf(memoryLine, "SReclaimable: %lu", &value) == 1) { - memInfo.SlabReclaimable = value; - continue; - } - } else if ((*line0 == 'C') && (*line10 == 't')) { - /* match CommitLimit */ - value = 0UL; - if (sscanf(memoryLine, "CommitLimit: %lu", &value) == 1) { - memInfo.CommitLimit = value; - continue; - } - } else if ((*line0 == 'C') && (*line10 == 'A')) { - /* match Committed_AS */ - value = 0UL; - if (sscanf(memoryLine, "Committed_AS: %lu", &value) == 1) { - memInfo.Committed_AS = value; - continue; - } - } else if ((*line0 == 'H') && (*line10 == 'T')) { - /* match Hugepages_Total */ - value = 0UL; - if (sscanf(memoryLine, "HugePages_Total: %lu", &value) == 1) { - memInfo.HugePages_Total = value; - continue; - } - } - else if ((*line0 == 'H') && (*line10 == 'z')) { - /* match Hugepagesize */ - value = 0UL; - if (sscanf(memoryLine, "Hugepagesize: %lu", &value) == 1) { - memInfo.Hugepagesize = value; - continue; - } - } - else if ((*line0 == 'A') && (*line3 == 'n')) { - /* match AnonPages */ - value = 0UL; - if (sscanf(memoryLine, "AnonPages: %lu", &value) == 1) { - memInfo.AnonPages = value; - continue; - } - } - } - fclose(pFile); - } - - avail = memInfo.MemFree + memInfo.Buffers + memInfo.Cached + memInfo.SlabReclaimable; - memTotal = avail + memInfo.AnonPages; - dlog("memTotal: %lu\n", memTotal); - - /* average memory utilization */ - if (IS_STRICT == 1) { - /* strict memory checking enabled */ - if (resource_config[index].percent == 1) { - memUsage = (double) memInfo.Committed_AS / memInfo.CommitLimit; - memUsage = memUsage * 100; - } else { - memUsage = (double) (memInfo.CommitLimit - memInfo.Committed_AS) / MiB; - } - } else { - if (resource_config[index].percent == 1) - { - memUsage = (double) memInfo.AnonPages / memTotal; - memUsage = memUsage * 100; - } else - { - memUsage = (double) avail / MiB; - } - } - resource_config[index].resource_value = memUsage; - if (resource_config[index].percent == 1) - { - if ( log_value ( resource_config[index].resource_value, - resource_config[index].resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("%s: %.2f%%\n", - resource_config[index].resource, memUsage); - } - } - else - { - if ( log_value ( resource_config[index].resource_value, - resource_config[index].resource_prev, - DEFAULT_LOG_VALUE_STEP )) - { - plog("%s: %.2f (MiB) free\n", - resource_config[index].resource, memUsage); - } - } - if ((numa_node_dir= opendir ("/sys/devices/system/node/")) != NULL) { - /* print all the files and directories within directory */ - while ((ent = readdir (numa_node_dir)) != NULL) { - if (strstr(ent->d_name, "node") != NULL) { - numa_files.push_back(ent->d_name); - } - } - closedir (numa_node_dir); - } - - /* loop through all NUMA nodes to get memory usage per NUMA node */ - for (unsigned int p=0; pper_node == 1) { - /* if set to 1 get the per NUMA node memory values */ - memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); - avail = memInfo.MemFree + memInfo.FilePages + memInfo.SlabReclaimable; - memTotal = avail + memInfo.AnonPages; - /* NUMA node memory usage */ - if (resource_config[index].percent == 1) { - memUsage = (double) memInfo.AnonPages / memTotal; - memUsage = memUsage * 100; - dlog("Memory Usage %s: %.2f%% \n", resource_name.c_str(), memUsage); - } else { - memUsage = (double) avail / MiB; - dlog("Memory Available %s: %.2f MB \n", resource_name.c_str(), memUsage); - } - /* initialize a new dynamic resource for the NUMA node if it does not already exist */ - save_dynamic_mem_resource ( resource_name, criticality, memUsage, resource_config[index].percent, - absolute_thresholds, MEMORY_ALARM_ID ); - } - - - if (HUGEPAGES_NODE == 1) { - /* huge pages memory usage for the NUMA node */ - if (memInfo.HugePages_Total != 0){ - if (resource_config[index].percent == 1){ - memUsageHuge = (double) (memInfo.HugePages_Total - memInfo.HugePages_Free) / memInfo.HugePages_Total; - memUsageHuge = memUsageHuge * 100; - dlog("Memory Usage %s: %.2f%% \n", resource_name_huge.c_str(), memUsageHuge); - } else { - memUsageHuge = (double) memInfo.HugePages_Free * (memInfo.Hugepagesize/MiB) ; - dlog("Memory Available %s: %.2f MB \n", resource_name_huge.c_str(), memUsageHuge); - } - save_dynamic_mem_resource ( resource_name_huge, criticality, memUsageHuge, resource_config[index].percent, - absolute_thresholds, MEMORY_ALARM_ID ); - } - } - resource_name_huge = "processor_hugepages_"; - resource_name = "processor_"; - } -} - -/***************************************************************************** - * - * Name : get_cpu_time - * - - * Purpose : Parse per-cpu hi-resolution scheduling stats - * - *****************************************************************************/ -int get_cpu_time( unsigned long long * cpu_time ) -{ -#define MAX_STRING_SIZE (19) - - const char *sched_stat = "/proc/schedstat"; - FILE * pFile; - char cpu_line[500]; - unsigned long long value; - int version = 0; - int index = 0; - char cpu_time_len[50]; - - if ((pFile = fopen(sched_stat, "r")) == NULL){ - dlog("failed to open: /proc/schedstat \n"); - return (FAIL); - } - - else { - /* Parse per-cpu hi-resolution scheduling stats */ - while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) { - - if (version != 15){ - /* only version 15 is supported */ - if (sscanf(cpu_line, "version %llu", &value) == 1) { - version = (int) value; - } - } - else if ((strstr(cpu_line, "cpu") != NULL) && (version == 15)) - { - sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %49s ",cpu_time_len); - if (((unsigned)strlen(cpu_time_len)) < MAX_STRING_SIZE) { - /* get the cpu time values for each cpu which is the 7th field */ - sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %llu ",&value); - cpu_time[index++] = value; - } - else { - elog("%s exceeded 2^64 for cpu stats cannot calculate cpu usage\n", cpu_time_len); - cpu_time[index++] = 0; - } - } - - } - fclose(pFile); - } - - return (PASS); -} - -/***************************************************************************** - * - * Name : cpu_monitoring_init - * - - * Purpose : Get the base cpu list if running on a compute. Also get the number - * of cpus from: /proc/cpuinfo - *****************************************************************************/ -void cpu_monitoring_init() -{ - - string base_cpu=""; - FILE * pFile; - string delimiter = ",", delimiterTwo = "-"; - size_t pos = 0; - string token; - char cpu_line[100]; - const char *cpu_info = "/proc/cpuinfo"; - char processor[20]; - - pFile = fopen (COMPUTE_RESERVED_CONF , "r"); - if (pFile != NULL){ - ilog("File %s is present\n", COMPUTE_RESERVED_CONF); - ifstream fin( COMPUTE_RESERVED_CONF ); - string line; - - while( getline( fin, line ) ) { - /* process each line */ - if( line.find ("PLATFORM_CPU_LIST=") != string::npos ) { - stringstream ss( line ); - getline( ss, base_cpu, '=' ); // token = string before = - getline( ss, base_cpu, '=' ); // token = string after = - ilog("Found PLATFORM_CPU_LIST set to %s in file %s\n", base_cpu.c_str(), COMPUTE_RESERVED_CONF); - } - } - fclose (pFile); - } - - if (base_cpu.compare("") != 0) - { - /* get base cpus if they are available */ - if ((pos = base_cpu.find(delimiter)) != string::npos) { - - /* if the base cpus are listed with a comma, ex: 1,2 */ - base_cpu = base_cpu + delimiter; - while ((pos = base_cpu.find(delimiter)) != string::npos) { - token = base_cpu.substr(0, pos); - included_cpu[num_base_cpus++] = atoi(token.c_str()); - base_cpu.erase(0, pos + delimiter.length()); - } - } else if ((pos = base_cpu.find(delimiterTwo)) != string::npos) { - - /* if the base cpus are listed with a dash, ex: 1-3 */ - base_cpu = base_cpu + delimiterTwo; - token = base_cpu.substr(0, pos); - int first_cpu = atoi(token.c_str()); - base_cpu.erase(0, pos + delimiterTwo.length()); - pos = base_cpu.find(delimiterTwo); - token = base_cpu.substr(0, pos); - int last_cpu = atoi(token.c_str()); - - /* loop through the list of base cpus */ - for (num_base_cpus=0; num_base_cpus<=(last_cpu - first_cpu); num_base_cpus++){ - included_cpu[num_base_cpus++] = first_cpu++; - } - } - - if (num_base_cpus == 0) { - /* only one base cpu available */ - included_cpu[num_base_cpus++] = atoi(base_cpu.c_str()); - } - } - - ilog("Number of base CPUs for this node is %d \n", num_base_cpus); - - /* get the number of cpus */ - if ((pFile = fopen(cpu_info, "r")) == NULL){ - wlog("failed to open: /proc/cpuinfo \n"); - } - - else { - - /* Parse per-cpu hi-resolution scheduling stats */ - while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) { - - sscanf(cpu_line, "%19s %*s %*s", processor); - if (strcmp(processor, "processor") == 0) { - num_cpus++; - } - } - fclose(pFile); - } - - ilog("Number of CPUs for this node is %d \n", num_cpus); -} - -/***************************************************************************** - * - * Name : calculate_linux_usage - * - * Purpose : Calculate the cpu usage for Linux cards: controller, compute, storage - * The calculation runs as a delta. The first time the function is called no - * valid cpu calculation occurs. From the second time onwards, the cpu uasge is - * calculated by taking the delta from the previous time the function was called - * - *****************************************************************************/ -int calculate_linux_usage( resource_config_type * ptr ) -{ - - double delta_seconds; - unsigned long long cpu_occupancy[num_cpus]; - unsigned long long cpu_delta_time; - unsigned long long total_avg_cpu = 0; - unsigned int counted_cpu=0; - int rc; - unsigned long long cpu_time[num_cpus]; - - if (cpu_time_initial.size() == 0) { - /* get the cpu time initially if the first cpu time does not exist */ - rc = get_cpu_time( cpu_time ); - /* get the first timestamp */ - time(&t1); - - if (rc != PASS) - { - wlog("Failed get_cpu_time \n"); - return (FAIL); - } - - for (int x=0; xresource_value = 0; - } - else { - /* get the later cpu time if the first cpu time exists */ - rc = get_cpu_time( cpu_time ); - - if (rc != PASS) - { - wlog("Failed get_cpu_time \n"); - return (FAIL); - } - - /* get the later timestamp */ - time(&t2); - - for (int x=0; xresource_value = (double) (total_avg_cpu / counted_cpu); - /* clear the old cpu times and set the current times as the old times */ - cpu_time_initial.clear(); - for (int x=0; xresource_value, - ptr->resource_prev, - LINUX_CPU_LOG_VALUE_STEP )) - { - plog("%s: %.2f%% (average)\n", ptr->resource, ptr->resource_value); - } - } - - return (PASS); -} - -/* Read the node UUID from the: /etc/platform/platform.conf file */ -void _readUUID () -{ - FILE * pFile; - const char *platformFile = "/etc/platform/platform.conf"; - - pFile = fopen (platformFile , "r"); - if (pFile != NULL) { - ifstream fin( platformFile ); - string line; - - while( getline( fin, line ) ) { - /* process each line */ - if( line.find ("UUID=") != string::npos ) { - stringstream ss( line ); - getline( ss, hostUUID, '=' ); // token = string before = - getline( ss, hostUUID, '=' ); // token = string after = - } - } - fclose (pFile); - } -} - -/***************************************************************************** - * - * Name : _load_rmon_interfaces - * - * Purpose : Update the monitored network interfaces from the: - * /etc/plaform/interfaces file - *****************************************************************************/ -void _load_rmon_interfaces () -{ - - rmon_socket_type * sock_ptr = rmon_getSock_ptr (); - - /* initialize interface monitoring */ - for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ ) - { - init_physical_interfaces ( &interface_resource_config[j] ); - } - - for (int i=0; i<_rmon_ctrl_ptr->interface_resources; i++) - { - if ( interface_resource_config[i].interface_used == true ) - { - /* set the link state for all the primary physical interfaces */ - if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_one, &interface_resource_config[i].link_up_and_running ) ) - { - interface_resource_config[i].link_up_and_running = false ; - interface_resource_config[i].resource_value = INTERFACE_DOWN; - wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_one) ; - } - else - { - ilog ("%s link is: %s\n", interface_resource_config[i].interface_one, interface_resource_config[i].link_up_and_running ? "Up" : "Down" ); - if (interface_resource_config[i].link_up_and_running) - { - interface_resource_config[i].resource_value = INTERFACE_UP; - } - else - { - interface_resource_config[i].resource_value = INTERFACE_DOWN; - interface_resource_config[i].failed = true; - } - } - if (interface_resource_config[i].lagged == true) - { - /* set the link state for all the lagged physical interfaces */ - if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_two, &interface_resource_config[i].link_up_and_running ) ) - { - interface_resource_config[i].link_up_and_running = false ; - wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_two) ; - } - else - { - ilog ("%s link is: %s\n", interface_resource_config[i].interface_two, interface_resource_config[i].link_up_and_running ? "Up" : "Down" ); - if (interface_resource_config[i].link_up_and_running) - { - interface_resource_config[i].resource_value_lagged = INTERFACE_UP; - } - else - { - interface_resource_config[i].resource_value_lagged = INTERFACE_DOWN; - interface_resource_config[i].failed = true; - } - } - } - } - } - - for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ ) - { - interface_alarming_init ( &interface_resource_config[j] ); - } -} - -/***************************************************************************** - * - * Name : resource_stall_monitor - * - * Purpose : Detects stalls in the resource monitoring threads - ******************************************************************************/ -int resource_stall_monitor ( resource_config_type * ptr, pid_t tid, pid_t pid) -{ - #define MAX_SCHEDSTAT_LEN (128) - char file_path [MAX_FILENAME_LEN] ; - char schedstat [MAX_SCHEDSTAT_LEN] ; - FILE * fp ; - int rc = PASS; - unsigned long long nr_switches_old = t_data.nr_switches_count; - - snprintf ( &file_path[0], MAX_FILENAME_LEN, "/proc/%d/task/%d/schedstat", pid, tid ); - fp = fopen (file_path, "r" ); - if ( fp ) - { - /* check to see if the thread is stalled */ - memset ( schedstat, 0 , MAX_SCHEDSTAT_LEN ); - if ( fgets ( &schedstat[0], MAX_SCHEDSTAT_LEN, fp) != NULL) - { - if ( sscanf ( schedstat, "%*s %*s %llu", &t_data.nr_switches_count) >= 1 ) - { - dlog ("%s: nr_count: %llu, nr_count_old: %llu \n", ptr->resource, t_data.nr_switches_count, nr_switches_old); - if ((nr_switches_old != t_data.nr_switches_count) && (ptr->failed)) - { - /* Clear the stall monitor alarm */ - ilog("%s thread has unstalled \n", ptr->resource); - ptr->sev = SEVERITY_CLEARED; - t_data.nr_switches_count = 0; - resourceStageChange ( ptr, RMON_STAGE__FINISH ); - } - } - else - { - wlog ("Failed to get schedstat from (%s)\n", file_path); - rc = FAIL; - } - } - else - { - wlog ("failed to read from (%s)\n", file_path ); - rc = FAIL; - } - fclose(fp); - } - else - { - wlog ("Failed to open (%s)\n", file_path); - rc = FAIL; - } - - if ((((nr_switches_old == t_data.nr_switches_count) && (ptr->sev != SEVERITY_MAJOR))) || - (rc == FAIL)) - { - /* thread has stalled raise alarm */ - elog("%s thread has stalled \n", ptr->resource); - ptr->sev = SEVERITY_MAJOR; - ptr->failed = true; - resourceStageChange ( ptr, RMON_STAGE__MANAGE ); - } - - return rc; -} - -/***************************************************************************** - * - * Name : check_instance_file - * - * Purpose : Thread spawned by rmon to check if: /etc/nova/instances is mounted. - * It needs to be a thread because of NFS hang issues. - * - *****************************************************************************/ -void *check_instance_file(void *threadarg) -{ - struct thread_data *res_data; - FILE * pFile; - FILE *testFile; - string line; - struct stat p; - const char *instances_dir = "/etc/nova/instances"; - const char *test_file = "/etc/nova/instances/.rmon_test"; - - res_data = (struct thread_data *) threadarg; - - pthread_mutex_lock(&lock); - res_data->thread_running = true; - res_data->tid = syscall(SYS_gettid); - pthread_mutex_unlock(&lock); - - dlog("%s process id: %d, thread id: %d \n", res_data->resource->resource, res_data->pid, res_data->tid); - res_data->resource_usage = NOT_MOUNTED; - pFile = fopen (MOUNTS_DIR , "r"); - - /* query /proc/mounts and make sure the /etc/nova/instances file system is there */ - if (pFile != NULL) - { - ifstream fin( MOUNTS_DIR ); - while( getline( fin, line ) ) - { - /* process each line */ - if( line.find (instances_dir) != string::npos ) - { - /* the mount is present */ - res_data->resource_usage = MOUNTED; - break; - } - } - fclose (pFile); - } - - if ( res_data->resource_usage == MOUNTED ) - { - /* put the test file in and check that it is accessible */ - testFile = fopen(test_file, "w"); - if (testFile != NULL) - { - fclose (testFile); - if( remove( test_file ) != 0 ) - { - elog("Failure in removing rmond test file: %s \n", test_file); - } - } - else - { - res_data->resource_usage = NOT_MOUNTED; - } - } - - if (res_data->resource_usage == NOT_MOUNTED) - { - /* fail the resource */ - stat (COMPUTE_CONFIG_PASS, &p); - if ((p.st_ino != 0 ) || (p.st_dev != 0)) - { - pthread_mutex_lock(&lock); - if (res_data->resource->sev != SEVERITY_MAJOR) - { - res_data->resource->sev = SEVERITY_MAJOR; - res_data->resource->failed = true; - resourceStageChange ( res_data->resource, RMON_STAGE__MANAGE ); - } - pthread_mutex_unlock(&lock); - } - } - else if ((res_data->resource_usage == MOUNTED) && (res_data->resource->failed)) - { - pthread_mutex_lock(&lock); - res_data->resource->sev = SEVERITY_CLEARED; - resourceStageChange ( res_data->resource, RMON_STAGE__FINISH ); - pthread_mutex_unlock(&lock); - } - - pthread_mutex_lock(&lock); - res_data->thread_running = false; - pthread_mutex_unlock(&lock); - - pthread_exit(NULL); -} - - -/***************************************************************************** - * - * Name : postPMs - * - * Purpose : create samples for each resource in Ceilometer - * - *****************************************************************************/ -int _postPMs () -{ - char meta_data[MAX_LEN]; - if ( hostUUID.empty() ) - { - /* keep trying to get the host UUID if it is not present */ - _readUUID(); - } - - if ( !hostUUID.empty() ) - { - // indicate the platform hostname as metadata for all resources - char *hoststring = strdup(_rmon_ctrl_ptr->my_hostname); - if (hoststring) { - char *host = strtok(hoststring,"="); - host = strtok(NULL, "="); - snprintf(&meta_data[0], MAX_LEN, "{\"host\":\"%s\"}", host); - free(hoststring); - } - - for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) - { - ostringstream strs; - strs << resource_config[i].resource_value ; - string res_val = strs.str(); - - if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) { - /* cpu resource pm */ - generate_ceilometer_pm ( hostUUID, "platform.cpu.util", "delta", "%", - res_val, string(meta_data) ); - } - else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) { - /* memory resource pm */ - if (resource_config[i].percent == 1) { - generate_ceilometer_pm ( hostUUID, "platform.mem.util", "delta", "%", - res_val, string(meta_data) ); - } else { - generate_ceilometer_pm ( hostUUID, "platform.mem.util", "gauge", "MB", - res_val, string(meta_data) ); - } - } - else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) { - /* filesystem resource pm */ - if (resource_config[i].percent == 1) { - generate_ceilometer_pm ( hostUUID, "platform.fs.util", "delta", "%", - res_val, string(meta_data) ); - } else { - generate_ceilometer_pm ( hostUUID, "platform.fs.util", "gauge", "MB", - res_val, string(meta_data) ); - } - } - } // end of resource loop - } - return (PASS); -} - -/***************************************************************************** - * - * Name : _get_events - * - * Purpose : query each resource and extract the required usage values - * - *****************************************************************************/ - -extern bool is_cpe ( void ); -extern bool is_worker ( void ); - -void _get_events (void) -{ - int rc; - string v_cpu; - FILE * pFile; - - if ( _rmon_ctrl_ptr->clients == 0 ) - { - wlog ("Monitoring with no registered clients\n"); - } - - for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) - { - const char *resource = resource_config[i].resource; - ilog_throttled ( resource_config[i].resource_monitor_throttle, 120, - "Monitoring '%s'\n", - resource ); - - if (strcmp(resource, CPU_RESOURCE_NAME) == 0) - { - /* linux cards: controller, compute and storage cpu utilization */ - rc = calculate_linux_usage( &resource_config[i] ); - if ( rc == PASS ) - { - /* get if the resource is failed to be used by resource handler */ - process_failures ( &resource_config[i]); - } - } - else if (!strcmp(resource, V_CPU_RESOURCE_NAME) || - !strcmp(resource, V_MEMORY_RESOURCE_NAME) || - !strcmp(resource, V_PORT_RESOURCE_NAME) || - !strcmp(resource, V_INTERFACE_RESOURCE_NAME) || - !strcmp(resource, V_LACP_INTERFACE_RESOURCE_NAME) || - !strcmp(resource, V_OVSDB_RESOURCE_NAME) || - !strcmp(resource, V_NETWORK_RESOURCE_NAME) || - !strcmp(resource, V_OPENFLOW_RESOURCE_NAME)) - { - /* ensure that configuration has completed before computing - * vswitch resource utilization */ - if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) ) - continue ; - - pFile = fopen (COMPUTE_VSWITCH_DIR , "r"); - if (pFile != NULL){ - fclose (pFile); - } - else - { - wlog ("%s failed to open %s\n", resource, COMPUTE_VSWITCH_DIR); - } - } - else if (strstr(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) != NULL) - { - /* vswitch memory with specific sockets */ - /* skip these ones as they are already taken care of above */ - } - else if(strcmp(resource, REMOTE_LOGGING_RESOURCE_NAME) == 0) - { - rmonHdlr_remotelogging_query(&resource_config[i]); - } - else if (strcmp(resource, INSTANCE_RESOURCE_NAME) == 0) - { - /* do not perform this check if we are not on a compute node. - * its not valid on storage not combo load */ - if ( !is_worker () ) - continue ; - - if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) ) - continue ; - - /* nova instances mount check */ - pFile = fopen (COMPUTE_VSWITCH_DIR , "r"); - if (pFile != NULL) - { - rc = PASS ; - pthread_mutex_lock(&lock); - if (!t_data.thread_running) - { - pthread_attr_t attr ; - t_data.resource = &resource_config[i]; - pthread_attr_init (&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - /* launch a thread to monitor the /etc/nova/instances mount */ - rc = pthread_create(&thread, &attr, check_instance_file, (void *) &t_data); - if (rc) - { - elog("%s ERROR; return code from pthread_create() is %d\n", - resource, rc); - } - pthread_attr_destroy (&attr); - } - else - { - /* If thread is still running check that it is not stalled */ - resource_stall_monitor(&resource_config[i], t_data.tid, t_data.pid); - } - pthread_mutex_unlock(&lock); - fclose (pFile); - } - } - else if (strcmp(resource, MEMORY_RESOURCE_NAME) == 0) { - /* memory utilization */ - calculate_memory_usage(i); - /* get if the resource is failed to be used by resource handler */ - if (resource_config[i].percent == PERCENT_USED) { - process_failures ( &resource_config[i]); - } else { - process_failures_absolute ( &resource_config[i]); - } - - } - else if ((strcmp(resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) && - (resource_config[i].alarm_status == ALARM_ON)) { - /* virtual thin pool space utilization */ - rc = calculate_virtual_space_usage(i, V_CINDER_THINPOOL_RESOURCE_NAME); - /* only check resource for fail and clear if it is active */ - if (rc == PASS) { - if (resource_config[i].percent == PERCENT_USED) { - /* get if the resource is failed to be used by resource handler */ - process_failures (&resource_config[i]); - } else { - process_failures_absolute (&resource_config[i]); - } - } - } - else if ((strcmp(resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) && - (resource_config[i].alarm_status == ALARM_ON)){ - /* do not perform this check if we are not on a compute node. - * its not valid on storage not combo load */ - if ( !is_worker () && !is_cpe () ) - continue ; - - if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) ) - continue ; - - /* virtual thin pool space utilization */ - rc = calculate_virtual_space_usage(i, V_NOVA_THINPOOL_RESOURCE_NAME); - /* only check resource for fail and clear if it is active */ - if (rc == PASS) { - if (resource_config[i].percent == PERCENT_USED) { - /* get if the resource is failed to be used by resource handler */ - process_failures (&resource_config[i]); - } else { - process_failures_absolute (&resource_config[i]); - } - } - } - else if (strcmp(resource, FS_RESOURCE_NAME) == 0) { - /* file system utilization */ - /* do nothing as we calculate individual file system location and not the total */ - } - else { - /* dynamic file system resource */ - - pthread_mutex_lock(&lock); - if ((resource_config[i].alarm_status == ALARM_ON) && (modifyingResources == false)) - { - /* only calculate the resource usage if file systems aren't being added */ - calculate_fs_usage( &resource_config[i] ); - - /* only check resource for fail and clear if it is active */ - if (resource_config[i].percent == PERCENT_USED) { - /* get if the resource is failed to be used by resource handler */ - process_failures ( &resource_config[i]); - } else { - process_failures_absolute ( &resource_config[i]); - } - } - else if ((resource_config[i].alarm_status == ALARM_OFF) && (modifyingResources == false) - && (resource_config[i].failed == true)) - { - //send a clear message - send_clear_msg(i); - - // we need to clear the resource's alarm if there was any set for this resource - clear_alarm_for_resource(&resource_config[i]); - } - pthread_mutex_unlock(&lock); - } - } // end of rmon resources - - /* - * since interface resources are event based resourcs, i.e. - * they would only be called when netlink socket reports a - * link state event, we need to run a periodic audit on them - * as part of RMON event audit. - * This audit shall resend interface degrade statuses to maintaince - * if interface is in failed state - */ - for ( int j = 0; j < _rmon_ctrl_ptr->interface_resources; j++ ) - { - if ( interface_resource_config[j].interface_used && - interface_resource_config[j].failed == true ) - { - send_interface_msg ( &interface_resource_config[j], - _rmon_ctrl_ptr->clients ); - } - } -} - -int kill_running_process ( int pid ) -{ - int result = kill ( pid, 0 ); - if ( result == 0 ) - { - result = kill ( pid, SIGKILL ); - if ( result == 0 ) - { - wlog ("NTP process kill succeeded (%d)\n", pid ); - } - else - { - elog ("NTP process kill failed (%d)\n", pid ); - } - } - return (PASS); -} - -/* SIGCHLD handler support - for waitpid */ -static bool rmon_sigchld_received = false ; -void daemon_sigchld_hdlr ( void ) -{ - dlog("Received SIGCHLD ...\n"); - - int status = 0; - pid_t tpid = 0; - - while ( 0 < ( tpid = waitpid ( -1, &status, WNOHANG | WUNTRACED ))) - { - dlog("NTP query script returned WIFEXITED:%d and WEXITSTATUS:%d for pid:%d\n", WIFEXITED(status), WEXITSTATUS(status), tpid); - - if (tpid == ntp_child_pid) - { - rmon_sigchld_received = true ; - - /* no need to wait for a timeout since we got a response, force a ring */ - rmonTimer_ntp.ring = true; - ntp_status = WEXITSTATUS(status); - } - else - { - dlog ("PID:%d lookup failed ; reaped likely after timeout\n", tpid ); - ntp_status = NTP_ERROR; - } - } -} - -int ntp_audit_handler ( ) -{ - if ( ntp_stage >= NTP_STAGE__STAGES ) - { - wlog ("Invalid ntp_stage (%d) ; correcting\n", ntp_stage ); - ntpStageChange ( NTP_STAGE__BEGIN); - } - - switch ( ntp_stage ) - { - // First state - case NTP_STAGE__BEGIN: - { - mtcTimer_start ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntp_audit_period ); - dlog ("Start NTP period timer (%d secs) %p\n", _rmon_ctrl_ptr->ntp_audit_period, rmonTimer_ntp.tid); - ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ ); - break ; - } - - // Execute the ntpq command - case NTP_STAGE__EXECUTE_NTPQ: - { - if ( rmonTimer_ntp.ring == true ) //wake up from NTP period - { - ntp_status = PASS; - mtcTimer_start ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntpq_cmd_timeout ); - dlog ("Start NTPQ command timer (%d secs) %p\n", _rmon_ctrl_ptr->ntpq_cmd_timeout, rmonTimer_ntp.tid); - - // Execute the ntpq command - int rc = query_ntp_servers(); - if (rc != PASS) - { - elog ("NTP execute_status_command returned a failure (%d)\n", rc); - ntp_status = NTP_ERROR; - } - - ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ_WAIT ); - } - break ; - } - - // Wait for the ntpq command to finish and process results - case NTP_STAGE__EXECUTE_NTPQ_WAIT: - { - // Give the command time to execute. The daemon_sigchld_hdlr will force - // a ring when the command execute successfully or returns a failure - if ( ( rmonTimer_ntp.ring == true) || (ntp_status == NTP_ERROR ) ) - { - // Stop the NTP timer if still running - if ( rmonTimer_ntp.tid ) - { - mtcTimer_stop ( rmonTimer_ntp ); - } - - if (( !rmon_sigchld_received) || (ntp_status == NTP_ERROR)) - { - if ( rmon_sigchld_received == false ) - { - elog ("NTPQ command execution timed out (pid:%d)\n", ntp_child_pid ); - } - - elog ("NTPQ returned an execution failure (rc:%d) (pid:%d)\n", ntp_status, ntp_child_pid); - if (ntp_child_pid != 0) - { - kill_running_process ( ntp_child_pid ); - } - } - else - { - dlog ("NTPQ command was successful ; analyzing results\n"); - ntp_query_results(ntp_status); - } - - ntpStageChange ( NTP_STAGE__BEGIN ); - ntp_child_pid = 0; - rmon_sigchld_received = false; - } - break; - } - - default: - { - elog ("NTP invalid ntp_stage (%d)\n", ntp_stage ); - - /* Default to first state for invalid case. there is an issue then it will be detected */ - ntpStageChange ( NTP_STAGE__BEGIN ); - } - } - return (PASS); -} - - -/***************************************************************************** - * - * Name : rmon_service - * - * Purpose : main loop for monitoring resources - * - *****************************************************************************/ -void rmon_service (rmon_ctrl_type * ctrl_ptr) -{ - fd_set readfds; - struct timeval waitd; - std::list socks; - rmon_socket_type * sock_ptr = rmon_getSock_ptr (); - - /* initialize FM handler */ - rmon_fm_init(); - - /* ignore SIGPIPE on swacts */ - signal(SIGPIPE, SIG_IGN); - - /* initialize the memory accounting: either Strict or OOM */ - init_memory_accounting(); - /* initialize the cpu monitoring defaults */ - cpu_monitoring_init(); - _readUUID(); - /* Start an event timer for the interval of the resources being monitored */ - ilog ("Starting 'Event Monitor' timer (%d secs) \n", ctrl_ptr->audit_period); - mtcTimer_start ( rmonTimer_event, rmon_timer_handler, 1 ); - - ilog ("Starting 'PM Monitor' timer (%d secs) \n", ctrl_ptr->pm_period); - mtcTimer_start ( rmonTimer_pm, rmon_timer_handler,ctrl_ptr->pm_period); - - if (is_controller()) - { - ntp_stage = NTP_STAGE__BEGIN; - } - - /* Get an Authentication Token */ - ilog ("%s Requesting initial token\n", ctrl_ptr->my_hostname ); - tokenEvent.status = tokenUtil_new_token ( tokenEvent, ctrl_ptr->my_hostname ); - if ( tokenEvent.status != PASS ) - { - elog ("Failed to get authentication token (%d)\n", tokenEvent.status); - if ( tokenEvent.base ) - { - slog ("%s token base:%p\n", - ctrl_ptr->my_hostname, - tokenEvent.base); - } - } - - /* service all the register and deregister requests in the queue */ - rmon_alive_notification( _rmon_ctrl_ptr->clients ); - - ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients); - -#ifdef WANT_FS_MONITORING - - /* Initialize the resource specific configuration */ - for (int j=0; j<_rmon_ctrl_ptr->resources; j++) - { - if ( strcmp(resource_config[j].resource, FS_RESOURCE_NAME) == 0 ) { - /* determine whether percent or absolute values are used */ - /* determine if virtual thin pool memory usage alarm should be on or off */ - fs_percent = resource_config[j].percent; - } - } - /* add the static filesystem resources */ - process_static_fs_file(); - /* initialize the resource alarms */ - for (int j=0; j<_rmon_ctrl_ptr->resources; j++) - { - rmon_alarming_init ( &resource_config[j] ); - } - - /* add any dynamic resources from before */ - add_dynamic_fs_resource(false); -#else - ilog("static filesystem monitoring moved to collectd\n"); -#endif - - /* Clear any stale dynamic alarms that can be caused by dynamic resources. */ - /* An alarm become stale for example if it was raised against a local volumn group (lvg) and */ - /* later on the lvg is deleted. The node will come up and the lvg resource will not longer exist and */ - /* it's related alarms not refreshed. Dynamic alarms are any alarms which it's resource can be */ - /* provisioned. */ - - AlarmFilter alarmFilter; - unsigned int max_alarms=75; - char alarm_to_search[FM_MAX_BUFFER_LENGTH]; - - fm_alarm_id alarm_id; - snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); - - SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT)); - if (active_alarms != NULL) - { - /* get all the current alarms with id of FS_ALARM_ID which are alarms related to the file system */ - /* fm_get_faults_by_id returns the number of alarms found */ - if (fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms) == FM_ERR_OK) - { - bool found = false; - for ( unsigned int i = 0; i < max_alarms; i++ ) - { - /* only get the 100.104 alarms */ - if ((strncmp((active_alarms+i)->alarm_id, FS_ALARM_ID, sizeof((active_alarms+i)->alarm_id)) == 0) - && (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL) ) - { - found = false; - for (int j=0; j<_rmon_ctrl_ptr->resources; j++) - { - /* since we build the entity_instance_id with multiple data we must recreate it */ - snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource); - if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0) - { - found = true; - break; - } - - snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource); - if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0) - { - found = true; - break; - } - - // We found the resource but lets check if the alarm is enable for it, if it's not - // we want to clear that alarm - if (found) - { - if (resource_config[j].alarm_status == ALARM_OFF) - { - found = false; - } - } - } - if (!found) - { - /* the alarm did not match any current resources so let's clear it */ - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->alarm_id ); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->entity_instance_id); - - ilog ("Clearing stale alarm %s for entity instance id: %s", (active_alarms+i)->alarm_id, (active_alarms+i)->entity_instance_id); - - if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK) - { - wlog ("Failed to clear stale alarm for entity instance id: %s", (active_alarms+i)->entity_instance_id); - } - } - } - } - } - free(active_alarms); - } - else - { - elog ("Failed to allocate memory for clearing stale dynamic alarms"); - } - - if (( sock_ptr->ioctl_sock = open_ioctl_socket ( )) <= 0 ) - { - elog ("Failed to create ioctl socket"); - } - - /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ - if (( sock_ptr->netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 ) - { - elog ("Failed to create netlink listener socket"); - } - - /* load the current interfaces for monitoring */ - _load_rmon_interfaces(); - - socks.clear(); - socks.push_front (sock_ptr->rmon_tx_sock); - socks.push_front (sock_ptr->netlink_sock); - socks.sort(); - - for (;;) { - /* Accomodate for hup reconfig */ - FD_ZERO(&readfds); - FD_SET(sock_ptr->rmon_tx_sock, &readfds); - FD_SET(sock_ptr->netlink_sock, &readfds); - waitd.tv_sec = 0; - waitd.tv_usec = SOCKET_WAIT ; - tokenUtil_log_refresh (); - - /* This is used as a delay up to select timeout ; SOCKET_WAIT */ - select( socks.back()+1, &readfds, NULL, NULL, &waitd); - if (FD_ISSET(sock_ptr->rmon_tx_sock, &readfds)) - { - _rmon_ctrl_ptr->clients = rmon_service_inbox ( _rmon_ctrl_ptr->clients ); - } - else if (FD_ISSET(sock_ptr->netlink_sock, &readfds)) - { - dlog ("netlink socket fired\n"); - if ( service_interface_events ( sock_ptr->netlink_sock, sock_ptr->ioctl_sock ) != PASS ) - { - elog ("service_interface_events failed \n"); - } - } - - /* Manage the health of the resources */ - if ( rmonTimer_event.ring == true ) - { - // restart the audit period timer - mtcTimer_start ( rmonTimer_event, rmon_timer_handler, ctrl_ptr->audit_period ); - /* service all the register and deregister requests in the queue */ - rmon_alive_notification( _rmon_ctrl_ptr->clients ); - _get_events ( ); - } - - if ( rmonTimer_pm.ring == true ) - { - mtcTimer_start ( rmonTimer_pm, rmon_timer_handler, ctrl_ptr->pm_period ); - tokenUtil_token_refresh ( tokenEvent, ctrl_ptr->my_hostname ); - _postPMs(); - } - - /* loop through all the resource timers waiting for a ring */ - for ( int j = 0 ; j < ctrl_ptr->resources ; j++ ) - { - if (resource_config[j].failed == true) { - /* Run the FSM for this failed resource */ - resource_handler ( &resource_config[j]); - } - } - - /* loop through all the interface resources */ - for ( int j = 0 ; j < ctrl_ptr->interface_resources ; j++ ) - { - if (interface_resource_config[j].failed == true) { - /* Run the FSM for this failed interface */ - interface_handler ( &interface_resource_config[j] ); - } - } - - /* loop thorough all the LVM thinpool metadata resources waiting for a ring */ - for ( int j = 0; j < ctrl_ptr->thinmeta_resources; j++ ) - { - if (thinmeta_resource_config[j].critical_threshold) { - // a threshold of 0 disables monitoring - if (thinmetatimer[j].ring == true) { - // restart the audit period timer - mtcTimer_start ( thinmetatimer[j], rmon_timer_handler, - thinmeta_resource_config[j].audit_period ); - dlog("%s/%s running audit (resource index: %i)", - thinmeta_resource_config[j].vg_name, - thinmeta_resource_config[j].thinpool_name, j) - /* Handle resource */ - int k; - for (k = THINMETA_FSM_RETRY; k > 0; k--) { - // call again the FSM in case it instructs us to RETRY - if(thinmeta_handler(&thinmeta_resource_config[j]) != RETRY) { - break; - } - } - if (k == 0) { - dlog("%s/%s too many state changes in FSM at: %i stage!", - thinmeta_resource_config[j].vg_name, - thinmeta_resource_config[j].thinpool_name, - thinmeta_resource_config[j].stage); - } - } - } - } - - /* handle RMON FM interface */ - rmon_fm_handler (); - - daemon_signal_hdlr (); - } - -} - -/**************************************************************************** - * - * Name : log_value - * - * Purpose : Log resource state values while avoiding log flodding for - * trivial fluxuations. - * - * Description: Recommends whether the current resource state value should - * be logged based on current, previous and step values. - * - * Caller should not generate such log if a false is returned. - * - * A true is returned if the currrent and previous resource values differ - * by +/- step amount. - * - * The caller specifies the step that can be overridden by a smaller value - * in rmond.conf:log_step value. - * - * If step is zero then a true is always returned in support of a debug mode - * where we get the current reading as a log on every audit. - * - * The callers previous value is updated to current whenever true is returned. - * - ****************************************************************************/ -bool log_value ( double & current, double & previous, int step ) -{ - /* Support step override for debug purposes - * Allows for more frequent logging */ - int _step = daemon_get_cfg_ptr()->log_step ; - - /* a lower value from the conf file takes precidence */ - if ( _step > step ) - _step = step ; - - if (( round(current) >= ( round(previous) + _step )) || - ( round(current) <= ( round(previous) - _step ))) - { - previous = current ; - return true ; - } - return false ; -} diff --git a/mtce/src/rmon/rmonHttp.cpp b/mtce/src/rmon/rmonHttp.cpp deleted file mode 100644 index cc40fc38..00000000 --- a/mtce/src/rmon/rmonHttp.cpp +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Copyright (c) 2013, 2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River CGTS Platform rmon HTTP Utilities. - * - */ - -#include -#include -#include -#include /* for .. close and usleep */ -#include /* for .. system */ -#include /* for config dir reading */ -#include /* for the list of conf file names */ -#include /* for ... syslog */ -#include /* for ... waitpid */ -#include "rmon.h" -#include "tokenUtil.h" /* for ... tokenUtil_get_ptr */ -using namespace std; - -//#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ -#include "rmonHttp.h" /* this module header */ -//#include "rmonJsonUtil.h" /* Json Utilities */ -#include "rmonApi.h" - - -extern void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ); -extern void rmonHdlr_ceilometer_handler( struct evhttp_request *req, void *arg ); - -void rmonHttpUtil_free_base ( libEvent_type & event ); - - -static node_inv_type default_inv ; - -/***************************************************************************** - * - * Name : rmonHttpUtil_libEvent_init - * - * Purpose : Initialize the libEvent message for the HTTP request - * - *****************************************************************************/ -int rmonHttpUtil_libEvent_init ( libEvent_type *ptr , - string service, - string ip, - int port ) -{ - ptr->type = EVHTTP_REQ_GET ; /* request type GET/PUT/PATCH etc */ - - /* Characteristics */ - ptr->ip = ip ; - ptr->port = port ; - ptr->hostname = "default" ; - - /* Controls */ - ptr->status = FAIL ; /* The handler must run to make this PASS */ - ptr->active = false ; - ptr->mutex = false ; - ptr->stuck = 0 ; - ptr->found = false ; - ptr->count = 0 ; - //ptr->stage = 0 ; - ptr->result = "" ; - ptr->timeout = 0 ; - - /* Personality */ - ptr->service = service ; - ptr->request = RMON_SERVICE_NONE ; - - /* Execution Data */ - ptr->entity_path.clear() ; - ptr->entity_path_next.clear() ; - ptr->address.clear(); - ptr->payload.clear(); - ptr->response.clear(); - ptr->user_agent.clear(); - - /* Better to access a default struct than a bad pointer */ - ptr->inv_info_ptr = &default_inv ; - - /* Check for memory leaks */ - if ( ptr->base ) - { - slog ("rmon http base memory leak avoidance (%p) fixme !!\n", ptr->base); - event_base_free(ptr->base); - } - /* Create event base - like opening a socket */ - ptr->base = event_base_new(); - if ( ! ptr->base ) - { - elog ("Failed to create '%s' libEvent (event_base_new)\n", - ptr->service.c_str()); - - return(FAIL_EVENT_BASE) ; - } - return (PASS); -} - - -void rmonHttpUtil_start_timer ( libEvent_type & event ) -{ - clock_gettime (CLOCK_MONOTONIC, &event.start_ts ); -} - -void rmonHttpUtil_stop_timer ( libEvent_type & event ) -{ - clock_gettime (CLOCK_MONOTONIC, &event.stop_ts ); -} - -/* *********************************************************************** - * - * Name : rmonHttpUtil_free_conn - * - * Description: Free an event's connection memory if it exists. - * - * ************************************************************************/ -void rmonHttpUtil_free_conn ( libEvent_type & event ) -{ - if ( event.conn ) - { - dlog ("rmond Free Connection (%p)\n", event.conn ); - evhttp_connection_free ( event.conn ); - event.conn = NULL ; - } - else - { - wlog ("rmond Already Freed Connection\n"); - } -} - -/* *********************************************************************** - * - * Name : rmonHttpUtil_free_base - * - * Description: Free an event's base memory if it exists. - * - * ************************************************************************/ -void rmonHttpUtil_free_base ( libEvent_type & event ) -{ - /* Free the base */ - if ( event.base ) - { - dlog ("rmond Free Base (%p)\n", event.base ); - - event_base_free(event.base); - event.base = NULL ; - if ( event.conn ) - { - dlog ("rmond Free Connection (%p) --------- along with base\n", event.conn ); - evhttp_connection_free ( event.conn ); - event.conn = NULL ; - } - } - else - { - wlog ("rmond Already Freed Event Base\n"); - } -} - -/***************************************************************************** - * - * Name : rmonHttpUtil_connect_new - * - * Purpose : generic HTTP Conect utility - * - *****************************************************************************/ -int rmonHttpUtil_connect_new ( libEvent_type & event ) -{ - if ( event.base ) - { - /* Open an http connection to specified IP and port */ - event.conn = evhttp_connection_base_new ( event.base, NULL, - event.ip.c_str(), - event.port ); - if ( event.conn ) - { - dlog("connect successfull \n"); - return(PASS) ; - } - else - { - elog ("Failed to create http connection (evhttp_connection_base_new)\n"); - return (FAIL_CONNECT); - } - } - else - { - elog ("Null Event base\n"); - return (FAIL_EVENT_BASE); - } -} - -/* generic HTTP Conect utility */ -int rmonHttpUtil_request_new ( libEvent_type & event, - void(*hdlr)(struct evhttp_request *, void *)) -{ - int rc = PASS ; - - /* make a new request and bind the event handler to it */ - event.req = evhttp_request_new( hdlr , event.base ); - if ( ! event.req ) - { - dlog ("call to 'evhttp_request_new' returned NULL\n"); - rc = FAIL ; - } - - return (rc); -} - -/* Fill in the output buffer */ -/* return of 0 or -1 are errors */ -int rmonHttpUtil_payload_add ( libEvent_type & event ) -{ - int rc = PASS ; - - /* Returns the output buffer. */ - event.buf = evhttp_request_get_output_buffer ( event.req ); - - /* Check for no buffer */ - if ( ! event.buf ) - { - elog ("evhttp_request_get_output_buffer returned null (%p)\n", event.req ); - rc = FAIL ; - } - else - { - /* write the payload into the buffer */ - rc = evbuffer_add_printf ( event.buf, "%s", event.payload.c_str()); - if ( rc == -1 ) - { - elog ("evbuffer_add_printf returned error (-1)\n"); - rc = FAIL ; - } - else if ( rc == 0 ) - { - elog ("no data added to output buffer (len=0)\n"); - rc = FAIL ; - } - else - { - rc = PASS ; - } - } - return (rc); -} - -/* get the output buffer length and convert it to a string that is returned */ -string rmonHttpUtil_payload_len ( libEvent_type * ptr ) -{ - string body_len ; - char len_str[10] ; - int len = evbuffer_get_length ( ptr->req->output_buffer ) ; - if (( len == -1 ) || ( len == 0 )) - { - body_len = "" ; - } - else - { - memset ( &len_str[0], 0 , 10 ); - sprintf ( &len_str[0], "%d", len ); - body_len = len_str ; - dlog3 ("%s Buffer Len:%s\n", ptr->hostname.c_str(), body_len.c_str() ); - } - return ( body_len ); -} - -int rmonHttpUtil_header_add ( libEvent_type * ptr, http_headers_type * hdrs_ptr ) -{ - int rc = PASS ; - - if ( hdrs_ptr->entries > MAX_HEADERS ) - { - elog ("%s Too many headers (%d:%d)\n", - ptr->hostname.c_str(), MAX_HEADERS, hdrs_ptr->entries ); - return FAIL ; - } - for ( int i = 0 ; i < hdrs_ptr->entries ; i++ ) - { - /* Add the header */ - rc = evhttp_add_header( ptr->req->output_headers, - hdrs_ptr->entry[i].key.c_str() , - hdrs_ptr->entry[i].value.c_str()); - if ( rc ) - { - elog ("evhttp_add_header returned failure (%d:%s:%s)\n", rc, - hdrs_ptr->entry[i].key.c_str(), - hdrs_ptr->entry[i].value.c_str()); - rc = FAIL ; - break ; - } - } - return (rc); -} - -/***************************************************************************** - * - * Name : rmonHttpUtil_request_make - * - * Purpose : Make the HTTP request - * - *****************************************************************************/ -int rmonHttpUtil_request_make ( libEvent_type * ptr, - enum evhttp_cmd_type type, - string path ) -{ - return (evhttp_make_request( ptr->conn, ptr->req, type, path.data())); -} - -/***************************************************************************** - * - * Name : rmonHttpUtil_status - * - * Purpose : Get the status of the HTTP request - * - *****************************************************************************/ -int rmonHttpUtil_status ( libEvent_type & event ) -{ - int rc = PASS ; - - event.status = evhttp_request_get_response_code (event.req); - switch (event.status) - { - case HTTP_OK: - case 201: - case 202: - case 203: - case 204: - { - dlog3 ("%s HTTP_OK (%d)\n", event.hostname.c_str(), event.status ); - event.status = PASS ; - break; - } - case 401: - { - /* Authentication error - refresh the token */ - rc = RETRY ; - break ; - } - case 0: - { - dlog ("%s Status: 0\n", event.hostname.c_str()); - event.status = FAIL_HTTP_ZERO_STATUS ; - rc = FAIL_HTTP_ZERO_STATUS ; - break ; - } - default: - { - dlog ("%s Status: %d\n", event.hostname.c_str(), event.status ); - rc = event.status ; - break; - } - } - return (rc); -} - - - -/***************************************************************************** - * - * Name : rmonHttpUtil_api_request - * - * Purpose : Issue a HTTP REST API Request - * - *****************************************************************************/ -#define URL_LEN 200 -int rmonHttpUtil_api_request ( rmon_libEvent_enum request, - libEvent_type & event, - string command_path ) - -{ - http_headers_type hdrs ; - enum evhttp_cmd_type type = EVHTTP_REQ_PUT ; - int timeout = 1 ; - int hdr_entry = 0 ; - string payload = "" ; - int rc = FAIL ; - void(*handler)(struct evhttp_request *, void *) = NULL ; - - if ( request == REMOTE_LOGGING_REQUEST ) - { - /* Bind the handler for the request */ - handler = &rmonHdlr_remotelogging_handler ; - - /* The type of HTTP request */ - type = EVHTTP_REQ_GET ; - - /* set the timeout */ - timeout = HTTP_REMOTELOGGING_TIMEOUT ; - } - - else if ( request == CEILOMETER_SAMPLE_CREATE ) - { - /* Bind the handler for the request */ - handler = &rmonHdlr_ceilometer_handler ; - - /* The type of HTTP request */ - type = EVHTTP_REQ_POST ; - - /* set the timeout */ - timeout = HTTP_CEILOMETER_TIMEOUT ; - } - - else - { - slog ("%s Unsupported Request (%d)\n", event.hostname.c_str(), request); - return (FAIL_BAD_CASE); - } - - /* Establish connection */ - if ( rmonHttpUtil_connect_new ( event )) - { - return (FAIL_CONNECT); - } - - /* Create request */ - if ( rmonHttpUtil_request_new ( event, handler )) - { - return (FAIL_REQUEST_NEW); - } - - if ( type == EVHTTP_REQ_POST ) - { - /* Add payload to the output buffer but only for POST request. */ - if ( rmonHttpUtil_payload_add ( event ) ) - { - event.status = FAIL_PAYLOAD_ADD; - return ( event.status ); - } - if ( daemon_get_cfg_ptr()->debug_json ) - { - if ((!string_contains(event.payload,"token")) && - (!string_contains(event.payload,"assword"))) - { - jlog ("%s Payload : %s\n", event.hostname.c_str(), - event.payload.c_str() ); - } - else - { - jlog ("%s Payload : ... contains private content ...\n", - event.hostname.c_str()); - } - } - } - - if ( !command_path.empty() ) - { - event.address = command_path ; - dlog (" Address : %s\n", event.address.c_str()); - } - - /* Build the HTTP Header */ - hdrs.entry[hdr_entry].key = "Host" ; - hdrs.entry[hdr_entry].value = event.ip ; - hdr_entry++; - hdrs.entry[hdr_entry].key = "X-Auth-Project-Id" ; - hdrs.entry[hdr_entry].value = "admin"; - hdr_entry++; - - - hdrs.entry[hdr_entry].key = "Content-Type" ; - hdrs.entry[hdr_entry].value = "application/json" ; - hdr_entry++; - hdrs.entry[hdr_entry].key = "Accept" ; - hdrs.entry[hdr_entry].value = "application/json" ; - hdr_entry++; - - if ( request == CEILOMETER_SAMPLE_CREATE ) - { - hdrs.entry[hdr_entry].key = "User-Agent" ; - hdrs.entry[hdr_entry].value = event.user_agent ; - hdr_entry++; - - hdrs.entry[hdr_entry].key = "X-Auth-Token" ; - hdrs.entry[hdr_entry].value = tokenUtil_get_ptr()->token ; - hdr_entry++; - } - - hdrs.entry[hdr_entry].key = "Connection" ; - hdrs.entry[hdr_entry].value = "close" ; - hdr_entry++; - hdrs.entries = hdr_entry ; - - /* Add the headers */ - if ( rmonHttpUtil_header_add ( &event, &hdrs )) - { - return (FAIL_HEADER_ADD); - } - - rc = rmonHttpUtil_request_make ( &event, type, event.address.data() ); - - if ( rc == PASS ) - { - /* Send the message with timeout */ - evhttp_connection_set_timeout(event.req->evcon, timeout); - event_base_dispatch(event.base); - rmonHttpUtil_free_conn ( event ); - rmonHttpUtil_free_base ( event ); - - return(event.status) ; - } - elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n", - event.hostname.c_str(), rc); - - return (FAIL_MAKE_REQUEST); -} - -/***************************************************************************** - * - * Name : rmonHttpUtil_receive - * - * Purpose : Get the HTTP request response into a libEvent object - * - *****************************************************************************/ -int rmonHttpUtil_receive ( libEvent_type & event ) -{ - /* Send the request but don't wait for the response */ - // int rc = event_base_loop(event.base, EVLOOP_NONBLOCK) ; - int rc = event_base_loop(event.base, EVLOOP_ONCE) ; - switch ( rc ) - { - case PASS: /* 0 */ - { - /* Set in-progress flag */ - if ( event.active == false ) - { - /* look at the reported handler status */ - if ( event.status != PASS ) - rc = event.status ; - - rmonHttpUtil_log_event ( event ); - } - else - { - rc = RETRY ; - } - break ; - } - case 1: - { - dlog ("%s %s No Events Pending (1)\n", - event.hostname.c_str(), - event.service.c_str()); - rc = FAIL ; - break ; - } - case -1: - { - event.active = false ; - elog ("%s %s Failed event_base_loop (-1)\n", - event.hostname.c_str(), - event.service.c_str()); - rc = FAIL ; - break ; - } - default: - { - event.active = false ; - slog ("%s %s Failed event_base_loop - Unexpected Return (%d)\n", - event.hostname.c_str(), - event.service.c_str(), rc ); - rc = FAIL ; - break ; - } - } - return (rc); -} - -/* Get the length of the json response - * Deal with oversized messages. - * - * Get the length of the buffer so we can - * allocate one big enough to copy too. - */ -int rmonHttpUtil_get_length ( libEvent_type & event ) -{ - event.response_len = evbuffer_get_length (event.req->input_buffer); - if ( event.response_len == 0 ) - { - dlog ("%s %s Request Failed - Zero Length Response\n", - event.hostname.c_str(), - event.service.c_str()); - event.status = FAIL_JSON_ZERO_LEN ; - } - else - { - event.status = PASS ; - } - return ( event.status ); -} - -/* Load the response string into the event struct */ -int rmonHttpUtil_get_response ( libEvent_type & event ) -{ - if ( rmonHttpUtil_get_length ( event ) == PASS ) - { - size_t real_len ; - - /* Get a stack buffer, zero it, copy to it and terminate it */ - char * stack_buf_ptr = (char*)malloc (event.response_len+1); - memset ( stack_buf_ptr, 0, event.response_len+1 ); - real_len = evbuffer_remove( event.req->input_buffer, stack_buf_ptr, - event.response_len); - - if ( real_len != event.response_len ) - { - wlog ("%s %s Length differs from removed length (%ld:%ld)\n", - event.hostname.c_str(), - event.service.c_str(), - event.response_len, - real_len ); - } - - /* Terminate the buffer , this is where the +1 above is required. - * Without it there is memory corruption reported by Linux */ - *(stack_buf_ptr+event.response_len) = '\0'; - - /* Store the response */ - event.response = stack_buf_ptr ; - dlog ("%s Response: %s\n", event.hostname.c_str(), event.response.c_str()); - - free (stack_buf_ptr); - } - return ( event.status ); -} - -/***************************************************************************** - * - * Name : rmonHttpUtil_log_event - * - * Purpose : Log the HTTP event - * - *****************************************************************************/ -void rmonHttpUtil_log_event ( libEvent_type & event ) -{ - string event_sig = daemon_get_cfg_ptr()->debug_event ; - - dlog3 ("Event Signature (%s)\n", event_sig.c_str()); - if ( !event_sig.compare(event.service) || (event.status)) - { - if ( !event.address.empty() ) - { - log_event ("%s %s Address : %s\n", event.hostname.c_str(), event_sig.c_str(), event.address.c_str()); - } - if (!event.payload.empty()) - { - if ((!string_contains(event.payload,"token")) && - (!string_contains(event.payload,"assword"))) - { - log_event ("%s %s Payload : %s\n", event.hostname.c_str(), event_sig.c_str(), event.payload.c_str()); - } - else - { - log_event ("%s %s Payload : ... contains private content ...\n", event.hostname.c_str(), event_sig.c_str()); - } - } - if ( !event.response.empty() ) - { - if ((!string_contains(event.payload,"token")) && - (!string_contains(event.payload,"assword"))) - { - log_event ("%s %s Response: %s\n", event.hostname.c_str(), event_sig.c_str(), event.response.c_str()); - } - else - { - log_event ("%s %s Response: ... contains private content ...\n", event.hostname.c_str(), event_sig.c_str()); - } - } - } -} diff --git a/mtce/src/rmon/rmonHttp.h b/mtce/src/rmon/rmonHttp.h deleted file mode 100644 index c9c95029..00000000 --- a/mtce/src/rmon/rmonHttp.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef __INCLUDE_rmonHTTPUTIL_H__ -#define __INCLUDE_rmonHTTPUTIL_H__ -/* - * Copyright (c) 2013, 2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGTS Platform rmon - * - * libevent HTTP support utilities and control structure support header - */ - -#include /* for ... string */ -#include /* for ... http libevent client */ - -using namespace std; - -/** Maximum libevent response message size in bytes. */ -#define MAX_EVENT_LEN (16384) - -#define HTTP_VSWITCH_TIMEOUT (10) -#define HTTP_REMOTELOGGING_TIMEOUT (10) -#define HTTP_CEILOMETER_TIMEOUT (10) - -#define VSWITCH_EVENT_SIG "vswitchEvent" -#define REMOTE_LOGGING_EVENT_SIG "remoteLoggingEvent" -#define CEILOMETER_EVENT_SIG "ceilometerEvent" - -/** Request Type Enums for the common rmonHttpUtil_request utility */ -typedef enum { - RMON_SERVICE_NONE, - VSWITCH_REQUEST, - REMOTE_LOGGING_REQUEST, - CEILOMETER_SAMPLE_CREATE -} rmon_libEvent_enum ; - -/** Local event control structure for REST API services - * - * Keystone and Inventory - * - */ -typedef struct -{ - /** Execution Controls */ - bool mutex ; /**< single operation at a time */ - bool active ; /**< true if waiting on response */ - int stuck ; /**< Count mutex active stuck state */ - int status ; /**< Execution Status */ - string result ; /**< Command specific result str */ - bool found ; /**< true if query was found */ - int timeout ; /**< Request timeout */ - int count ; /**< retry recover counter */ - int fails ; /**< fail counter */ - int retries ; /**< retry counter ; for receive */ - string service ; /**< Service being executed */ - string hostname ; /**< Target hostname */ - string uuid ; /**< The UUID for this request */ - string ip ; /**< Server IP address */ - rmon_libEvent_enum request ; - int port ; /**< Server port number */ - string user_agent ; /**< set the User-Agent header */ - - enum evhttp_cmd_type type; /**< HTTP Request Type ; PUT/GET */ - struct event_base *base; /**< libEvent API service base */ - struct evhttp_connection *conn; /**< HTTP connection ptr */ - struct evhttp_request *req ; /**< HTTP request ptr */ - struct evbuffer *buf ; /**< HTTP output buffer ptr */ - struct evbuffer_ptr evp ; /**< HTTP output buffer ptr */ - - /** Timestamps used to measure the responsiveness of REST API */ - struct timespec start_ts ; /**< Request Dispatch Timestamp */ - struct timespec stop_ts ; /**< Response Handler Timestamp */ - - string entity_path ; /**< HTTP entity request string */ - string entity_path_next ; /**< next entity request string */ - - /** Result Info */ - node_inv_type * inv_info_ptr ; /**< Inventory data pointer */ - string address ; /**< http url address */ - string payload ; /**< the request's payload */ - size_t response_len ; /**< the json response length */ - string response ; /**< the json response string */ -} libEvent_type; - -int rmonHttpUtil_libEvent_init ( libEvent_type * ptr , - string service, - string ip, - int port ); - -void rmonHttpUtil_start_timer ( libEvent_type & event ); -void rmonHttpUtil_stop_timer ( libEvent_type & event ); -void rmonHttpUtil_log_time ( libEvent_type & event ); - - -/** Maximum number of headers that can be added to an HTTP message. */ -#define MAX_HEADERS (10) - -#if 0 -/** A header entry type. */ -typedef struct -{ - string key ; /**< the header label. */ - string value ; /**< the header value. */ -} http_header_entry_type; - -/** The header entry table. */ -typedef struct -{ - int entries ; /**< Number of entries in the header table. */ - http_header_entry_type entry[MAX_HEADERS]; /**< entry array. */ -} http_headers_type ; -#endif - -/** Add payload to the HTTP message body. */ -int rmonHttpUtil_payload_add ( libEvent_type & event ); - -/** Add all headers in header table to the HTTP connection message. */ -int rmonHttpUtil_header_add ( libEvent_type * ptr, http_headers_type * hdrs_ptr ); - -/** Create an HTTP request. */ -int rmonHttpUtil_request_make ( libEvent_type * ptr, enum evhttp_cmd_type type, string path ); - -/** Open a connection to an HTTP server. */ -int rmonHttpUtil_connect_new ( libEvent_type & event ); - -/** Get a new HTTP request pointer. */ -int rmonHttpUtil_request_new ( libEvent_type & event, - void(*hdlr)(struct evhttp_request *, void *)); - -/** Common REST API Request Utility */ -int rmonHttpUtil_api_request ( rmon_libEvent_enum request, - libEvent_type & event, - string command_path ); - -/** Common REST API Request Utility */ -int rmonHttpUtil_request ( libEvent_type & event , bool block, - void(*hdlr)(struct evhttp_request *, void *)); - -/** Common REST API Receive Utility for non-blocking requests */ -int rmonHttpUtil_receive ( libEvent_type & event ); - -/** HTTP response status checker */ -int rmonHttpUtil_status ( libEvent_type & event ); - -/** TODO: FIXME: Get the payload string length. */ -string rmonHttpUtil_payload_len ( libEvent_type * ptr ); - -/** Get the length of the json response */ -int rmonHttpUtil_get_length ( libEvent_type & event ); - -/** Load the json response into the event struct */ -int rmonHttpUtil_get_response ( libEvent_type & event ); - -/** print event filtered event */ -void rmonHttpUtil_log_event ( libEvent_type & event ); - -#endif /* __INCLUDE_rmonHTTPUTIL_H__ */ diff --git a/mtce/src/rmon/rmonInit.cpp b/mtce/src/rmon/rmonInit.cpp deleted file mode 100644 index 53c5b0f6..00000000 --- a/mtce/src/rmon/rmonInit.cpp +++ /dev/null @@ -1,616 +0,0 @@ -/* - * Copyright (c) 2013-2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGCS Platform Resource Monitor Service Initialization - */ - -#include "rmon.h" - -/* File definitions */ -#define CONFIG_FILE ((const char *)"/etc/mtc/rmond.conf") - - -static rmon_ctrl_type rmon_ctrl ; -rmon_ctrl_type * get_ctrlPtr ( void ) ; - -static daemon_config_type rmon_config ; -daemon_config_type * daemon_get_cfg_ptr () { return &rmon_config ; } - -/* Cleanup exit handler */ -void daemon_exit ( void ) -{ - rmon_msg_fini (); - rmon_hdlr_fini ( &rmon_ctrl ); - daemon_dump_info (); - daemon_files_fini (); - exit (0); -} - -bool is_worker ( void ) -{ - if (( rmon_ctrl.function == WORKER_TYPE ) && ( rmon_ctrl.subfunction == CGTS_NODE_NULL )) - return (true); - else - return (false); -} - -bool is_controller ( void ) -{ - if ( rmon_ctrl.function == CONTROLLER_TYPE ) - return (true); - else - return (false); -} - -bool is_cpe ( void ) -{ - if (( rmon_ctrl.function == CONTROLLER_TYPE ) && ( rmon_ctrl.subfunction == WORKER_TYPE )) - return (true); - else - return (false); -} - -/***************************************************************************** - * - * Name : rmon_config_handler - * - * Purpose : Startup config read from file: rmond.conf - * - *****************************************************************************/ -static int rmon_config_handler ( void * user, - const char * section, - const char * name, - const char * value) -{ - daemon_config_type* config_ptr = (daemon_config_type*)user; - - if (MATCH("config", "audit_period")) - { - config_ptr->audit_period = atoi(value); - config_ptr->mask |= CONFIG_AUDIT_PERIOD ; - } - else if (MATCH("config", "pm_period")) - { - config_ptr->pm_period = atoi(value); - config_ptr->mask |= PM_AUDIT_PERIOD ; - } - else if (MATCH("config", "ntp_audit_period")) - { - config_ptr->ntp_audit_period = atoi(value); - config_ptr->mask |= NTP_AUDIT_PERIOD ; - } - else if (MATCH("config", "ntpq_cmd_timeout")) - { - config_ptr->ntpq_cmd_timeout = atoi(value); - config_ptr->mask |= NTPQ_CMD_TIMEOUT ; - } - else if (MATCH("config", "rmon_tx_port")) - { - config_ptr->rmon_tx_port = atoi(value); - config_ptr->mask |= CONFIG_TX_PORT ; - } - else if (MATCH("config", "per_node")) - { - config_ptr->per_node = atoi(value); - config_ptr->mask |= CONFIG_NODE ; - } - else if (MATCH("timeouts", "start_delay")) - { - config_ptr->start_delay = atoi(value); - config_ptr->mask |= CONFIG_START_DELAY ; - } - else if (MATCH("config", "rmon_api_tx_port")) - { - config_ptr->rmon_api_tx_port = atoi(value); - config_ptr->mask |= CONFIG_TX_PORT ; - } - else if (MATCH("config", "critical_threshold")) - { - config_ptr->rmon_critical_thr = atoi(value); - config_ptr->mask |= CONFIG_CRITICAL_THR ; - } - else if (MATCH("config", "log_step")) - { - config_ptr->log_step = atoi(value); - } - return (PASS); -} - -/***************************************************************************** - * - * Name : rmon_interface_config - * - * Purpose : Read interface resource config file settings into the daemon configuration - * - *****************************************************************************/ -int rmon_interface_config ( void * user, - const char * section, - const char * name, - const char * value) -{ - int rc = FAIL ; - interface_resource_config_type * ptr = (interface_resource_config_type*)user; - - if (MATCH("resource", "resource")) - { - ptr->mask |= CONF_RESOURCE ; - ptr->resource = strdup(value); - dlog ("Resource : %s\n", ptr->resource); - rc = PASS ; - } - else if (MATCH("resource", "severity")) - { - ptr->mask |= CONF_SEVERITY ; - ptr->severity = strdup(value); - dlog ("Severity : %s\n", ptr->severity ); - rc = PASS ; - } - else if (MATCH("resource", "debounce")) - { - /* A zero value prevents degrade accompanying any alarm */ - ptr->mask |= CONF_DEBOUNCE ; - ptr->debounce = atoi(value); - dlog ("Debounce : %d\n", ptr->debounce ); - rc = PASS ; - } - else if (MATCH("resource", "num_tries")) - { - ptr->num_tries = atoi(value); - dlog ("Number of Tries : %d\n", ptr->num_tries ); - rc = PASS ; - } - else if (MATCH("resource", "alarm_on")) - { - ptr->alarm_status= atoi(value); - dlog ("Resource Alarm Status : %d\n", ptr->alarm_status); - rc = PASS ; - } - - - return (rc); -} - -/***************************************************************************** - * - * Name : rmon_thinmeta_config - * - * Purpose : Read resource config file settings into the daemon configuration - * - *****************************************************************************/ -int rmon_thinmeta_config ( void * user, - const char * section, - const char * name, - const char * value) -{ - int rc = FAIL ; - thinmeta_resource_config_type * ptr = (thinmeta_resource_config_type*)user; - - if(strcmp(section, "thinpool_metadata") == 0) - { - // This configuration item has the thinpool metadata section - ptr->section_exists = true; - } - - if (MATCH(THINMETA_CONFIG_SECTION, "vg_name")) - { - ptr->vg_name = strdup(value); - dlog ("Thinpool VG Name : %s\n", ptr->vg_name); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "thinpool_name")) - { - ptr->thinpool_name = strdup(value); - dlog ("Thinpool Thinpool Name : %s\n", ptr->thinpool_name); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "critical_threshold")) - { - ptr->critical_threshold = atoi(value); - dlog ("Thinpool Critical Alarm Threshold : %d%%\n", ptr->critical_threshold); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "alarm_on")) - { - ptr->alarm_on = atoi(value); - dlog ("Thinpool Metadata alarm_on : %s\n", ptr->alarm_on? "On": "Off"); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "autoextend_on")) - { - ptr->autoextend_on = atoi(value); - dlog ("Thinpool Metadata autoextend : %s\n", ptr->autoextend_on? "On": "Off"); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "autoexent_by")) - { - ptr->autoextend_by = atoi(value); - dlog ("Metadata Autoextend by : %d\n", ptr->autoextend_by); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "autoextend_percent")) - { - ptr->autoextend_percent = atoi(value); - dlog ("Thinpool Metadata Autoextend by : %s\n", - ptr->autoextend_percent? "percents": "absolute value (MiB)"); - rc = PASS ; - } - else if (MATCH(THINMETA_CONFIG_SECTION, "audit_period")) - { - ptr->audit_period = atoi(value); - dlog ("Metadata Audit Period : %ds\n", ptr->audit_period); - rc = PASS ; - } - - return (rc); -} - -/***************************************************************************** - * - * Name : rmon_resource_config - * - * Purpose : Read resource config file settings into the daemon configuration - * - *****************************************************************************/ -int rmon_resource_config ( void * user, - const char * section, - const char * name, - const char * value) -{ - int rc = FAIL ; - resource_config_type * ptr = (resource_config_type*)user; - - if (MATCH("resource", "resource")) - { - ptr->mask |= CONF_RESOURCE ; - ptr->resource = strdup(value); - dlog ("Resource : %s\n", ptr->resource); - rc = PASS ; - } - else if (MATCH("resource", "severity")) - { - ptr->mask |= CONF_SEVERITY ; - ptr->severity = strdup(value); - dlog ("Severity : %s\n", ptr->severity ); - rc = PASS ; - } - else if (MATCH("resource", "debounce")) - { - ptr->mask |= CONF_DEBOUNCE ; - ptr->debounce = atoi(value); - dlog ("Debounce : %d\n", ptr->debounce ); - rc = PASS ; - } - else if (MATCH("resource", "minor_threshold")) - { - ptr->minor_threshold = atoi(value); - dlog ("Minor Threshold : %d\n", ptr->minor_threshold ); - rc = PASS ; - } - else if (MATCH("resource", "major_threshold")) - { - ptr->major_threshold = atoi(value); - dlog ("Major Threshold : %d\n", ptr->major_threshold ); - rc = PASS ; - } - else if (MATCH("resource", "critical_threshold")) - { - ptr->critical_threshold = atoi(value); - dlog ("Critical Threshold : %d\n", ptr->critical_threshold ); - rc = PASS ; - } - else if (MATCH("resource", "minor_threshold_abs_node0")) - { - ptr->minor_threshold_abs_node0 = atoi(value); - dlog ("Minor Threshold Absolute Node 0 : %d\n", ptr->minor_threshold_abs_node0 ); - rc = PASS ; - } - else if (MATCH("resource", "major_threshold_abs_node0")) - { - ptr->major_threshold_abs_node0 = atoi(value); - dlog ("Major Threshold Absolute Node 0 : %d\n", ptr->major_threshold_abs_node0 ); - rc = PASS ; - } - else if (MATCH("resource", "critical_threshold_abs_node0")) - { - ptr->critical_threshold_abs_node0 = atoi(value); - dlog ("Critical Threshold Absolute Node 0 : %d\n", ptr->critical_threshold_abs_node0 ); - rc = PASS ; - } - else if (MATCH("resource", "minor_threshold_abs_node1")) - { - ptr->minor_threshold_abs_node1 = atoi(value); - dlog ("Minor Threshold Absolute Node 1 : %d\n", ptr->minor_threshold_abs_node1 ); - rc = PASS ; - } - else if (MATCH("resource", "major_threshold_abs_node1")) - { - ptr->major_threshold_abs_node1 = atoi(value); - dlog ("Major Threshold Absolute Node 1 : %d\n", ptr->major_threshold_abs_node1 ); - rc = PASS ; - } - else if (MATCH("resource", "critical_threshold_abs_node1")) - { - ptr->critical_threshold_abs_node1 = atoi(value); - dlog ("Critical Threshold Absolute Node 1 : %d\n", ptr->critical_threshold_abs_node1 ); - rc = PASS ; - } - else if (MATCH("resource", "num_tries")) - { - ptr->num_tries = atoi(value); - dlog ("Number of Tries : %d\n", ptr->num_tries ); - rc = PASS ; - } - else if (MATCH("resource", "alarm_on")) - { - ptr->alarm_status= atoi(value); - dlog ("Resource Alarm Status : %d\n", ptr->alarm_status); - rc = PASS ; - } - else if (MATCH("resource", "percent")) - { - ptr->percent= atoi(value); - dlog ("Resource Percent : %d\n", ptr->percent); - rc = PASS ; - } - - return (rc); -} - -/***************************************************************************** - * - * Name : daemon_configure - * - * Purpose : Read process config file settings into the daemon configuration - * - *****************************************************************************/ -int daemon_configure ( void ) -{ - int rc = PASS ; - - if (ini_parse( CONFIG_FILE, rmon_config_handler, &rmon_config) < 0) - { - elog("Can't load '%s'\n", CONFIG_FILE ); - return (FAIL_LOAD_INI); - } - - if (ini_parse(MTCE_INI_FILE, keystone_config_handler, &rmon_config) < 0) - { - elog ("Can't load '%s'\n", MTCE_INI_FILE ); - return (FAIL_LOAD_INI); - } - - get_debug_options ( CONFIG_FILE, &rmon_config ); - - /* Verify loaded config against an expected mask - * as an ini file fault detection method */ - if ( rmon_config.mask != CONF_MASK ) - { - elog ("Error: Agent configuration failed (%x)\n", - ((-1 ^ rmon_config.mask) & CONF_MASK)); - return (FAIL_INI_CONFIG); - } - - /* Manage the daemon pulse period setting - ensure in bound values */ - if ( rmon_config.audit_period < RMON_MIN_AUDIT_PERIOD ) - { - rmon_ctrl.audit_period = RMON_MIN_AUDIT_PERIOD ; - } - else if ( rmon_config.audit_period > RMON_MAX_AUDIT_PERIOD ) - { - rmon_ctrl.audit_period = RMON_MAX_AUDIT_PERIOD ; - } - else - { - rmon_ctrl.audit_period = rmon_config.audit_period ; - } - ilog("Event Audit Period: %d secs\n", rmon_ctrl.audit_period ); - rmon_ctrl.rmon_critical_thr = rmon_config.rmon_critical_thr; - - /* Manage the ceilometer pm period setting - ensure in bound values */ - if ( rmon_config.pm_period < RMON_MIN_PM_PERIOD ) - { - rmon_ctrl.pm_period = RMON_MIN_PM_PERIOD ; - } - else if ( rmon_config.pm_period > RMON_MAX_PM_PERIOD ) - { - rmon_ctrl.pm_period = RMON_MAX_PM_PERIOD ; - } - else - { - rmon_ctrl.pm_period = rmon_config.pm_period ; - } - ilog("PM Audit Period: %d\n", rmon_ctrl.pm_period ); - - /* Manage the NTP query pulse period setting - ensure in bound values */ - if ( rmon_config.ntp_audit_period < RMON_MIN_NTP_AUDIT_PERIOD ) - { - rmon_ctrl.ntp_audit_period = RMON_MIN_NTP_AUDIT_PERIOD ; - } - else if ( rmon_config.ntp_audit_period > RMON_MAX_NTP_AUDIT_PERIOD ) - { - rmon_ctrl.ntp_audit_period = RMON_MAX_NTP_AUDIT_PERIOD ; - } - else - { - rmon_ctrl.ntp_audit_period = rmon_config.ntp_audit_period ; - } - ilog("NTP Audit Period: %d secs\n", rmon_ctrl.ntp_audit_period ); - - - // NTPQ Command timeout - if ( rmon_config.ntpq_cmd_timeout >= rmon_ctrl.ntp_audit_period ) - { - rmon_ctrl.ntpq_cmd_timeout = NTPQ_CMD_TIMEOUT ; - wlog("NTPQ command timeout (%d secs) should be less than ntp_audit_period (%d secs) ; forcing default\n", - rmon_ctrl.ntpq_cmd_timeout, rmon_ctrl.ntp_audit_period ); - } - else - { - rmon_ctrl.ntpq_cmd_timeout = rmon_config.ntpq_cmd_timeout ; - } - ilog("NTPQ command timeout: %d secs\n", rmon_ctrl.ntpq_cmd_timeout ); - - rmon_ctrl.per_node = rmon_config.per_node; - - return (rc); -} - -/****************************/ -/* Initialization Utilities */ -/****************************/ - -/* Construct the messaging sockets * - * 1. receive socket (mtc_client_rx_socket) * - * 2. transmit socket (mtc_client_tx_socket) */ -int socket_init ( void ) -{ - int rc; - - rmon_msg_init ( ); - /* Init the resource monitor api tx port. - * This is the port that the rmon client api uses to - * inform rmon of any registering or deregistering client - * processes */ - rc = rmon_port_init ( rmon_config.rmon_api_tx_port ); - - return (rc); -} - -/***************************************************************************** - * - * Name : daemon_init - * - * Purpose : initialize the daemon and sockets - * - *****************************************************************************/ -int daemon_init ( string iface, string nodetype_str ) -{ - int rc = PASS ; - char temp_hostname [MAX_HOST_NAME_SIZE+1]; - - /* init the control struct */ - memset ( &rmon_ctrl.my_hostname[0], 0, MAX_HOST_NAME_SIZE+1); - rmon_ctrl.my_macaddr = "" ; - rmon_ctrl.my_address = "" ; - rmon_ctrl.resources = 0 ; - rmon_ctrl.clients = 0 ; - - /* Assign interface to config */ - rmon_config.mgmnt_iface = (char*)iface.data() ; - - if ( daemon_files_init ( ) != PASS ) - { - elog ("Pid, log or other files could not be opened\n"); - return ( FAIL_FILES_INIT ) ; - } - - if ( set_host_functions ( nodetype_str, &rmon_ctrl.nodetype, &rmon_ctrl.function, &rmon_ctrl.subfunction ) != PASS ) - { - elog ("failed to extract nodetype\n"); - return ( FAIL_NODETYPE ); - } - - /* Bind signal handlers */ - if ( daemon_signal_init () != PASS ) - { - elog ("daemon_signal_init failed\n"); - return ( FAIL_SIGNAL_INIT ); - } - - /************************************************************************ - * There is no point continuing with init ; i.e. running daemon_configure, - * initializing sockets and trying to query for an ip address until the - * daemon's configuration requirements are met. Here we wait for those - * flag files to be present before continuing. - ************************************************************************ - * Wait for /etc/platform/.initial_config_complete & /var/run/goenabled */ - daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); - daemon_wait_for_file ( GOENABLED_MAIN_PASS , 0); - - /* Configure the daemon */ - if ( (rc = daemon_configure ( )) != PASS ) - { - elog ("Daemon service configuration failed (rc:%i)\n", rc ); - rc = FAIL_DAEMON_CONFIG ; - } - - /* This ensures any link aggregation interface overrides the physical */ - rmon_config.mgmnt_iface = daemon_get_iface_master ( rmon_config.mgmnt_iface ); - - /* Log the startup settings */ - ilog("Interface : %s\n", rmon_config.mgmnt_iface ); - ilog("TX Interface: %d\n", rmon_config.rmon_tx_port ); - - get_iface_macaddr ( rmon_config.mgmnt_iface, rmon_ctrl.my_macaddr ); - get_iface_address ( rmon_config.mgmnt_iface, rmon_ctrl.my_address , true ); - get_iface_hostname ( rmon_config.mgmnt_iface, &temp_hostname[0] ); - - strcat(rmon_ctrl.my_hostname, "host=" ); - strcat(rmon_ctrl.my_hostname, temp_hostname); - - if ( (rc = rmon_hdlr_init (&rmon_ctrl)) != PASS ) - { - ilog ("rmon_hdlt_init failed\n"); - rc = FAIL_HDLR_INIT ; - } - - /* Setup the messaging sockets */ - else if ( (rc = socket_init ( )) != PASS ) - { - elog ("socket initialization failed (rc:%d)\n", rc ); - rc = FAIL_SOCKET_INIT ; - } - - return (rc); -} - -/***************************************************************************** - * - * Name : daemon_service_run - * - * Purpose : The main rmon service launch - * - * Waits for initial config complete and then go enabled pass flag files - * before starting resource monitoring. - * - *****************************************************************************/ -void daemon_service_run ( void ) -{ - rmon_service ( &rmon_ctrl ); - daemon_exit (); -} - -/* Push daemon state to log file */ -void daemon_dump_info ( void ) -{ - daemon_dump_membuf_banner (); - daemon_dump_membuf(); -} - -const char MY_DATA [100] = { "eieio\n" } ; -const char * daemon_stream_info ( void ) -{ - return (&MY_DATA[0]); -} - -/***************************************************************************** - * - * Name : daemon_run_testhead - * - * Purpose : Run the rmon test suite by sending alarms to maintainance - * (To be used in Sprint 11 for testing) - * - *****************************************************************************/ -int daemon_run_testhead ( void ) -{ - /* Clear All */ - return (FAIL); -} - - diff --git a/mtce/src/rmon/rmonMsg.cpp b/mtce/src/rmon/rmonMsg.cpp deleted file mode 100644 index 6660c40a..00000000 --- a/mtce/src/rmon/rmonMsg.cpp +++ /dev/null @@ -1,778 +0,0 @@ -/* - * Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGCS Platform Resource Monitor Messaging API - * This class implements a server that accepts client processes - * registering and deregistering for rmon notifications. This class - * also implements a send function to send alarm messages and clear - * messages to the clients registered for a particular resource. - */ - -#include /* for config dir reading */ -#include /* for the list of conf file names */ -#include -#include -#include -#include -#include "rmon.h" -#include "rmonApi/rmon_nodeMacro.h" - - -/** - * Messaging Socket Control Struct - The allocated struct - */ - -static rmon_socket_type rmon_sock; -rmon_socket_type * rmon_getSock_ptr ( void ) -{ - return ( &rmon_sock ); -} - -msgSock_type * get_mtclogd_sockPtr ( void ) -{ - return (&rmon_sock.mtclogd); -} - -/****************************/ -/* Initialization Utilities */ -/****************************/ - -/* Init the messaging socket control structure - * The following messaging interfaces use this structure and - * are initialized separately - * */ - -void rmon_msg_init ( void ) -{ - memset(&rmon_sock, 0, sizeof(rmon_sock)); -} - -void rmon_msg_fini ( void ) -{ - if ( rmon_sock.rmon_tx_sock ) { - close (rmon_sock.rmon_tx_sock); - } if ( rmon_sock.rmon_rx_sock ) { - close (rmon_sock.rmon_rx_sock); - } if ( rmon_sock.netlink_sock ) { - close (rmon_sock.netlink_sock); - } if ( rmon_sock.ioctl_sock ) { - close (rmon_sock.ioctl_sock); - } -} - - /*Initialize the default rmon tx socket from the socket provided in: - /etc/rmond.conf */ -int rmon_port_init ( int tx_port ) -{ - int val = 1 ; - int rc = FAIL ; - if ( tx_port ) - { - rmon_sock.rmon_tx_port = tx_port ; - - rmon_sock.rmon_tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( 0 >= rmon_sock.rmon_tx_sock ) - return (-errno); - - if ( setsockopt ( rmon_sock.rmon_tx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) - { - wlog ( "rmon: failed to set rmon api tx socket as re-useable (%d:%m)\n", errno ); - } - - /* Set socket to be non-blocking. */ - rc = ioctl(rmon_sock.rmon_tx_sock, FIONBIO, (char *)&val); - if ( 0 > rc ) - { - elog ("Failed to set rmon tx socket non-blocking\n"); - } - - /* Setup with localhost ip */ - memset(&rmon_sock.rmon_tx_addr, 0, sizeof(struct sockaddr_in)); - rmon_sock.rmon_tx_addr.sin_family = AF_INET ; - // rmon_sock.rmon_addr.sin_addr.s_addr = htonl(INADDR_ANY); - rmon_sock.rmon_tx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - rmon_sock.rmon_tx_addr.sin_port = htons(rmon_sock.rmon_tx_port) ; - - /* bind socket to the receive addr */ - if ( bind ( rmon_sock.rmon_tx_sock, (const struct sockaddr *)&rmon_sock.rmon_tx_addr, sizeof(struct sockaddr_in)) == -1 ) - { - elog ( "failed to bind to 'tx' socket with port %d (%d:%m)\n", tx_port, errno ); - close (rmon_sock.rmon_tx_sock); - rmon_sock.rmon_tx_sock = 0 ; - return (-errno); - } - } - else - { - elog ("No tx port specified\n"); - } - - return (rc) ; -} - -/* Open a socket for a new client process */ -int open_resource_socket ( char str[RMON_MAX_LEN], char registered_not[RMON_MAX_LEN], int port ) -{ - int rc = FAIL ; - int on = 1; - registered_clients clt; - - memset((char*)&clt, 0, sizeof(clt)); - strcpy(clt.registered_not, registered_not); - - clt.rx_sock.rmon_rx_port = port - 1 ; - clt.rx_sock.rmon_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - - if ( 0 >= clt.rx_sock.rmon_rx_sock ) - { - elog ("failed to open 'rx' socket (%d:%m)", errno ); - return (-errno); - } - - if ( setsockopt ( clt.rx_sock.rmon_rx_sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) - { - wlog ( "rmon: failed to set rmon api rx socket as re-useable (%d:%m)\n", errno); - } - - /* Set socket to be non-blocking. */ - rc = ioctl(clt.rx_sock.rmon_rx_sock, FIONBIO, (char *)&on); - if ( 0 > rc ) - { - elog ("Failed to set rmon rx socket non-blocking\n"); - } - - /* Setup with localhost ip */ - memset(&clt.rx_sock.rmon_rx_addr, 0, sizeof(struct sockaddr_in)); - clt.rx_sock.rmon_rx_addr.sin_family = AF_INET ; - // rmon_sock.rmon_addr.sin_addr.s_addr = htonl(INADDR_ANY); - clt.rx_sock.rmon_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - clt.rx_sock.rmon_rx_addr.sin_port = htons(clt.rx_sock.rmon_rx_port) ; - clt.port = port; - strcpy(clt.client_name, str); - - /* Prop the port numnber into the message struct */ - if ( clt.port ) { - clt.msg.tx_port = clt.port ; - } - - if ( clt.msg.tx_port ) - { - /* if the sock is already open then close it first */ - if ( clt.msg.tx_sock ) - { - wlog ("%s open on already open socket %d, closing first\n", - clt.client_name, clt.msg.tx_sock ); - close (clt.msg.tx_sock); - } - clt.msg.tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if ( 0 >= clt.msg.tx_sock ) - { - elog ("failed to open 'tx' socket (%d:%m)", errno ); - return (-errno); - } - - /* Setup with localhost ip */ - memset(&clt.msg.tx_addr, 0, sizeof(struct sockaddr_in)); - clt.msg.tx_addr.sin_family = AF_INET ; - clt.msg.tx_addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP); - clt.msg.tx_addr.sin_port = htons(clt.msg.tx_port) ; - - /* Make the resource monitor client api socket non-blocking */ - rc = ioctl(clt.msg.tx_sock , FIONBIO, (char *)&on); - if ( 0 > rc ) - { - elog("Failed to set rmon socket non-blocking\n"); - } - - add_registered_client(clt); - - rc = PASS ; - } - else - { - elog ("%s has no port specified\n", clt.client_name ); - } - return (rc) ; -} - -/* close the client process socket */ -void close_resource_socket ( registered_clients * ptr ) -{ - if ( ptr->msg.tx_sock ) { - close ( ptr->msg.tx_sock ); - } -} - -/* remove a client from the list of registered clients */ -int delete_client ( int clients, int index ) -{ - /* close the client socket first */ - close_resource_socket( get_registered_clients_ptr(index)); - if (index == (clients -1 )) { - registered_clients *tmp_ptr = get_registered_clients_ptr(index); - memset(tmp_ptr, 0 , sizeof(*tmp_ptr)); - } - else { - - for (int j = index; j < (clients - 1); j++) - { - registered_clients * clt = get_registered_clients_ptr(j); - registered_clients * cltTwo = get_registered_clients_ptr(j+1); - *clt = *cltTwo; - cltTwo = clt; - } - } - clients--; - ilog("deleted registered client, %d clients left \n",clients); - return clients; -} - -void send_response( char message[RMON_MAX_LEN], registered_clients * clt ) -{ - int rc; - /* send a message to the rmon api to tell it the client is registered or deregistered */ - rc = sendto ( clt->rx_sock.rmon_rx_sock, - &message , - strlen ( message ), 0, - (struct sockaddr *) &clt->rx_sock.rmon_rx_addr, - sizeof(struct sockaddr_in)); - - if ( 0 >= rc ) - { - elog ("sendto error (%d:%s)\n", errno , strerror(errno)); - } - -} - -/* Send the outstanding registration and deregistration messages to rmon */ -void rmon_alive_notification (int & clients) -{ - FILE * pFile; - char * line = NULL; - size_t len = RMON_MAX_LEN; - ssize_t read; - vector active_clients; - vector dereg_clients; - char buf[RMON_MAX_LEN]; - - active_clients.clear(); - dereg_clients.clear(); - - /* service deregister requests in queue */ - pFile = fopen (RMON_API_DEREG_DIR , "r"); - if (pFile != NULL) { - // take out a reader lock on this file incase another - // entity is exclusively writing to it at this time - flock(fileno(pFile), LOCK_SH); - while ((read = getline(&line, &len, pFile)) != -1) { - clients = rmon_service_file_inbox(clients, line, false ); - string str(line, find(line, line + len, '\0')); - /* add the deregistered clients to the list to avoid relaunching them */ - dereg_clients.push_back(str); - } - // release shared lock - flock(fileno(pFile), LOCK_UN); - fclose(pFile); - } - - /* In the case that rmon restarts or rmon_alive_notifaction() - * is called periodically, and the clients have not re-registered, - * then attempt registration from active.txt, ONLY for clients - * that are not already in the registered_client framework - */ - pFile = fopen (RMON_API_ACTIVE_DIR , "r"); - if (pFile != NULL) { - // take out a reader lock on this file incase another - // entity is exclusively writing to it at this time - flock(fileno(pFile), LOCK_SH); - ifstream fin( RMON_API_ACTIVE_DIR ); - string readLine; - - while (getline(fin, readLine)) { - if ((dereg_clients.empty()) || - (find(dereg_clients.begin(), dereg_clients.end(), readLine) == dereg_clients.end())) { - /* only add a previously active client if it has not de-registered */ - active_clients.push_back(readLine); - } - } - // release shared lock - flock(fileno(pFile), LOCK_UN); - fclose(pFile); - } - // remove(RMON_API_ACTIVE_DIR); - for (unsigned int i=0; i active_clients; - vector new_active_clients; - char lineBuf[NOT_SIZE]; - - active_clients.clear(); - new_active_clients.clear(); - - pFile = fopen (RMON_API_ACTIVE_DIR , "r"); - - if (pFile != NULL) { - // take out a reader lock which will block - // if a writer has exclusive access to this - // file. - flock(fileno(pFile), LOCK_SH); - ifstream fin( RMON_API_ACTIVE_DIR ); - string readLine; - - while (getline(fin, readLine)) { - active_clients.push_back(readLine); - } - // release shared lock - flock(fileno(pFile), LOCK_UN); - fclose(pFile); - } - - for (unsigned int i=0; iclient_name, str) == 0 ) - { - found = true; - if (strcmp(CLR_CLIENT, registered_not) == 0) { - /* the client process wants to deregister, delete it and close it's socket */ - remove_active_client(clt->client_name); - total_clients = delete_client(clients, j); - break; - } - break; - } - } - - /* only add a client process if it is not already added */ - if (!found) { - - ilog("registering client \n"); - if ( str[0] != '\0' ) - { - rc = open_resource_socket(str, registered_not, port); - if (rc == FAIL) { - wlog("resource client port open failed \n"); - } else if (rc==PASS) { - - total_clients++; - if ( add == true ) - { - /* Add the client to the active clients file */ - pFile = fopen (RMON_API_ACTIVE_DIR , "a+"); - if (pFile) - { - // take out a writer lock on this file to - // ensure that no other entity is writing to it - // at this time - int lock = flock(fileno(pFile), LOCK_EX); - if (lock < 0) - { - elog("Failed to get an exclusive on" - " '%s' (errno: %d)", RMON_API_ACTIVE_DIR, errno); - } - else - { - ilog ("adding %s to %s\n", active_buf, RMON_API_ACTIVE_DIR ); - fprintf(pFile, "%s\n", active_buf); - - // release the lock - flock(fileno(pFile), LOCK_UN); - } - fclose(pFile); - } - else - { - elog("Failed to open file %s", RMON_API_ACTIVE_DIR); - } - } - else - { - dlog ("avoid adding duplicate entry\n"); - } - } - } - else - { - wlog ("Null string !\n"); - } - } - - return total_clients; -} - -/* Service client registration and deregistration requests from select */ -int rmon_service_inbox ( int clients ) -{ - #define MAX_T (3) - int count = 0 ; - int bytes = 0 ; - char buf[RMON_MAX_LEN] ; - char active_buf[RMON_MAX_LEN] ; - socklen_t len = sizeof(struct sockaddr_in) ; - int rc; - unsigned int port = 0 ; - bool found = false; - int total_clients = clients; - char str[RMON_MAX_LEN] ; - char registered_not[RMON_MAX_LEN]; - FILE * pFile; - - memset ( buf,0,RMON_MAX_LEN); - memset ( str,0,RMON_MAX_LEN); - bytes = recvfrom( rmon_sock.rmon_tx_sock, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.rmon_tx_addr, &len); - if ( bytes > 0 ) - { - buf[RMON_MAX_LEN-1] = '\0'; - sscanf ( buf, "%99s %99s %u", str, registered_not, &port ); //RMON_MAX_LEN is defined as 100 - strcpy( active_buf, buf ); - - if ( strcmp(str, RMON_RESOURCE_NOT) != 0 ) - { - for (int j=0; jclient_name, str) == 0 ) { - - found = true; - memset ( buf,0,RMON_MAX_LEN ); - strcpy( buf, "client_already_registered"); - send_response(buf, clt); - - if (strcmp(CLR_CLIENT, registered_not) == 0) { - /* the client process wants to deregister, delete it and close it's socket */ - total_clients = delete_client(clients, j); - memset ( buf,0,RMON_MAX_LEN); - strcpy( buf, "deregistered_client"); - send_response(buf, clt); - break; - } - break; - } - } - - /* only add a client process if it is not already added */ - if (!found) { - - ilog("registering client \n"); - if ( str[0] != '\0' ) - { - rc = open_resource_socket(str, registered_not, port); - - if (rc == FAIL) { - dlog("resource client port open failed \n"); - } else if (rc==PASS) { - - memset ( buf,0,RMON_MAX_LEN ); - strcpy( buf, "registered_client"); - registered_clients * clt = get_registered_clients_ptr(clients); - send_response(buf, clt); - total_clients++; - /* Add the client to the active clients file */ - pFile = fopen (RMON_API_ACTIVE_DIR , "a+"); - if (pFile) - { - // take out a writer lock on this file to - // ensure that no other entity is writing to it - // at this time - int lock = flock(fileno(pFile), LOCK_EX); - if (lock < 0) - { - elog("Failed to get an exclusive on" - " '%s' (errno: %d)", RMON_API_ACTIVE_DIR, errno); - } - else - { - fprintf(pFile, "%s\n", active_buf); - // release the lock - flock(fileno(pFile), LOCK_UN); - } - fclose(pFile); - } - else - { - elog("Failed to open file %s", RMON_API_ACTIVE_DIR); - } - } - } - else - { - wlog ("Null string !\n"); - } - } - } - else if ( strcmp(str, RMON_RESOURCE_NOT) == 0 ) { - /* read the dynamic file systems file and send a response back */ - process_dynamic_fs_file(); - } - } - else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) - { - wlog_throttled ( count , MAX_T, "receive error (%d:%s)\n", errno, strerror(errno)); - } - - return total_clients; -} - -/* send resource response */ -int rmon_resource_response ( int clients ) -{ - int rc = FAIL ; - - for (int j=0; jwaiting = true; - if(( strcmp(clt->registered_not, RMON_RESOURCE_NOT) == 0)) { - /* only send to clients that are registered for the rmon api updates */ - clt->rx_sequence = 0 ; - memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); - strcpy( clt->msg.tx_buf, "done_reading_dynamic_file_systems") ; - dlog("sending: %s on socket: %d bytes: %lu \n", clt->msg.tx_buf, clt->msg.tx_sock, strlen(clt->msg.tx_buf)); - rc = sendto (clt->msg.tx_sock, - clt->msg.tx_buf , - strlen ( clt->msg.tx_buf), 0, - (struct sockaddr *) &clt->msg.tx_addr, - sizeof(struct sockaddr_in)); - if ( rc < 0 ) - { - elog ("%s sendto error (%d:%s) (%s) (%s)\n", - clt->client_name, - errno , strerror(errno), - clt->msg.tx_buf, - inet_ntoa(clt->msg.tx_addr.sin_addr)); - clt->send_err_cnt++ ; - } - else - { - mlog ("%s\n", &clt->msg.tx_buf[0] ); - clt->waiting = false; - clt->send_err_cnt = 0; - clt->send_msg_count++ ; - rc = PASS ; - } - /* - * In certain rare instances, the UDP response packet - * sent back to the rmon client (over localhost), may - * be lost, resulting in the rmon client waiting indefinately - * (or until timeout). As a fail-safe, we will also set an - * the acknowledgement flag file that the client can - * look at on timeout - */ - daemon_log(RESPONSE_RMON_RESOURCE_NOT, ""); - } - } - return (rc); -} - -/* send rmon resource set and clear alarm messages to registered client processes */ -int rmon_send_request ( resource_config_type * ptr, int clients) -{ - dlog("%s, number of clients: %d\n", ptr->resource, clients); - int rc = FAIL ; - int total_clients = clients; - - for (int j=0; jwaiting = true; - - dlog("registered notification client: %s\n", clt->registered_not); - if(( strcmp(clt->registered_not, ptr->resource) == 0) || ( strcmp(clt->registered_not, ALL_USAGE) == 0)) { - /* only send to clients that are registered for the resource type in question */ - clt->rx_sequence = 0 ; - memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); - sprintf ( clt->msg.tx_buf, "%s %u", ptr->errorMsg, ++clt->tx_sequence ) ; - mlog( "%s sending: %s on socket: %d bytes: %lu\n", - ptr->resource, - clt->msg.tx_buf, - clt->msg.tx_sock, - strlen(clt->msg.tx_buf)); - rc = sendto (clt->msg.tx_sock, - clt->msg.tx_buf , - strlen ( clt->msg.tx_buf), 0, - (struct sockaddr *) &clt->msg.tx_addr, - sizeof(struct sockaddr_in)); - if ( rc < 0 ) - { - elog ("%s %s sendto error (%d:%s) rc: (%d) (%s) (%s)\n", - ptr->resource, - clt->client_name, - errno , strerror(errno), - rc, - clt->msg.tx_buf, - inet_ntoa(clt->msg.tx_addr.sin_addr)); - clt->send_err_cnt++ ; - if (clt->send_err_cnt >= MAX_ERR_CNT) { - /* assume the client process is killed, deregister the client */ - ilog("%s client process: %s is not responding, deregistering it \n", ptr->resource, clt->client_name); - total_clients = delete_client(clients, j); - update_total_clients(total_clients); - } - } - else - { - mlog ("%s %s\n", ptr->resource, &clt->msg.tx_buf[0] ); - clt->waiting = false; - clt->send_err_cnt = 0; - clt->send_msg_count++ ; - rc = PASS ; - } - } - } - return (rc); -} - -/* send rmon interface resource set and clear alarm messages to registered client processes */ -int send_interface_msg ( interface_resource_config_type * ptr, int clients) -{ - int rc = FAIL ; - int total_clients = clients; - - for (int j=0; jwaiting = true; - if(( strcmp(clt->registered_not, ptr->resource) == 0) || ( strcmp(clt->registered_not, ALL_USAGE) == 0)) { - /* only send to clients that are registered for the resource type in question */ - clt->rx_sequence = 0 ; - memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); - sprintf ( clt->msg.tx_buf, "%s %u", ptr->errorMsg, ++clt->tx_sequence ) ; - mlog("sending: %s on socket: %d bytes: %lu\n", - clt->msg.tx_buf, - clt->msg.tx_sock, - strlen(clt->msg.tx_buf)); - rc = sendto (clt->msg.tx_sock, - clt->msg.tx_buf , - strlen ( clt->msg.tx_buf), 0, - (struct sockaddr *) &clt->msg.tx_addr, - sizeof(struct sockaddr_in)); - if ( 0 >= rc ) - { - elog ("%s sendto error (%d:%s) (%s) (%s)\n", - clt->client_name, - errno , strerror(errno), - clt->msg.tx_buf, - inet_ntoa(clt->msg.tx_addr.sin_addr)); - clt->send_err_cnt++ ; - if (clt->send_err_cnt >= MAX_ERR_CNT) { - /* assume the client process is killed, deregister the client */ - ilog("client process: %s is not responding, deregistering it \n", clt->client_name); - total_clients = delete_client(clients, j); - update_total_clients(total_clients); - } - } - else - { - mlog ("%s\n", &clt->msg.tx_buf[0] ); - clt->waiting = false; - clt->send_err_cnt = 0; - clt->send_msg_count++ ; - rc = PASS ; - } - } - } - return (rc); -} diff --git a/mtce/src/rmon/rmon_resource_notify/Makefile b/mtce/src/rmon/rmon_resource_notify/Makefile deleted file mode 100644 index 726875b1..00000000 --- a/mtce/src/rmon/rmon_resource_notify/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (c) 2014-2018 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -SRCS = rmon_resource_notify.cpp -OBJS = $(SRCS:.cpp=.o) -LDLIBS = -lstdc++ -lrmonapi -lrt -INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common -INCLUDES += -I.. -CCFLAGS = -g -O2 -Wall -Wextra -Werror - -STATIC_ANALYSIS_TOOL = cppcheck -STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) - -all: build - -.cpp.o: - $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ - -static_analysis: -ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) - $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h -else - echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." -endif - -build: clean static_analysis $(OBJS) - $(CXX) $(CCFLAGS) $(OBJS) -L../../common -L../../daemon -L../rmonApi/ $(LDLIBS) $(EXTRALDFLAGS) -o rmon_resource_notify - -clean: - @rm -v -f $(OBJ) rmon_resource_notify *.o *.a diff --git a/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.cpp b/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.cpp deleted file mode 100644 index 61de8ca3..00000000 --- a/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.cpp +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright (c) 2013-2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River CGTS Platform Resource Monitor Resource Notify - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -#include "../rmonApi/rmon_nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ -#include "rmon_resource_notify.h" - -extern "C" -{ -#include "../rmonApi/rmon_api.h" -} -#define LOOPBACK_IP "127.0.0.1" -#define RX_PORT (2304) - -static char my_hostname [MAX_HOST_NAME_SIZE+1]; -static rmon_socket_type rmon_sock ; -static rmon_socket_type * sock_ptr ; - -/** Client Config mask */ -#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ - CONFIG_CLIENT_API_PORT |\ - CONFIG_CLIENT_PORT) - -/****************************/ -/* Initialization Utilities */ -/****************************/ - -/* Initialize the unicast api response message */ -/* One time thing ; tx same message all the time. */ -int rmon_message_init ( void ) -{ - /* Build the transmit api response message */ - memset ( &sock_ptr->tx_message, 0, sizeof (rmon_message_type)); - memcpy ( &sock_ptr->tx_message.m[RMON_HEADER_SIZE], my_hostname, strlen(my_hostname)); - return (PASS); -} - -int rmon_socket_init ( int port, const char * process_name ) -{ - - int on = 1 ; - int rc = PASS ; - - CREATE_NONBLOCK_INET_UDP_RX_SOCKET ( LOOPBACK_IP, - port, - rmon_sock.rmon_api_sock, - rmon_sock.rmon_api_addr, - rmon_sock.rmon_api_port, - rmon_sock.rmon_api_len, - "rmon api socket receive", - rc ); - if ( rc ) return (rc) ; - - /* Open the monitoring socket */ - rmon_sock.rmon_socket = resource_monitor_initialize ( process_name, port, RMON_RESOURCE_NOT ); - //ilog("Resource Monitor API Socket %d\n", rmon_sock.rmon_socket); - if ( 0 > rmon_sock.rmon_socket ) - { - close_syslog(); - return (FAIL); - } - - /* Make the socket non-blocking */ - rc = ioctl(rmon_sock.rmon_socket, FIONBIO, (char *)&on); - if ( 0 > rc ) - { - //elog("Failed to set rmon socket non-blocking (%d:%m)\n", errno ); - return (FAIL_SOCKET_NOBLOCK); - } - return (PASS); -} - - -int daemon_init (int port, const char * process_name ) -{ - int rc = PASS ; - - /* Initialize socket construct and pointer to it */ - memset ( &rmon_sock, 0, sizeof(rmon_sock)); - sock_ptr = &rmon_sock ; - - /* Setup the resmon api rx messaging sockets */ - if ( (rc = rmon_socket_init (port, process_name)) != PASS ) - { - // elog("socket initialization failed (rc:%d)\n", rc); - rc = FAIL_SOCKET_INIT; - } - return (rc); -} - -#define RMON_MAX_LEN (100) -int client_service_inbox () -{ -#define MAX_T 100 - int bytes = 0 ; - char buf[RMON_MAX_LEN] ; - socklen_t len = sizeof(struct sockaddr_in) ; - char str[RMON_MAX_LEN]; - int rc = FAIL; - - do - { - memset ( buf,0,RMON_MAX_LEN); - memset ( str,0,RMON_MAX_LEN); - - bytes = recvfrom( rmon_sock.rmon_socket, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.client_sockAddr, &len); - if ( bytes > 0 ) - { - sscanf ( buf, "%99s", str); - if ( str[0] != '\0' ) - { - if ( strcmp(str, RMON_DONE) == 0) - { - return (PASS); - } - } - return (FAIL); - } - else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) - { - //ilog("problem with test client recv \n"); - } - } while ( bytes > 0 ) ; - - return rc; -} - -/* Maximum length of the dynamic resource list */ -#define DYNAMIC_RESOURCE_MAX (1024) - -int main ( int argc, char *argv[] ) -{ - int rc; - int port = RX_PORT; - const char * process_name = PROCESS_NAME; - char res_name[30]; - char state[20]; - char mount[50]; - char type[20]; - char device[50]; - char volume_group[50]; - string delimiter = ","; - unsigned long long timeout = DEFAULT_RESPONSE_TIMEOUT; - char dynamic_res[DYNAMIC_RESOURCE_MAX]; - char resource_name [50]; - struct stat fileInfo; - struct timespec start, stop; - struct flock fl; - int fd; - bool toNotify = false; - vector dynamic_resources; - size_t pos; - string token; - - open_syslog(); - - memset ((char *)&fileInfo, 0 , sizeof(fileInfo)); - memset(&res_name[0], 0, sizeof(res_name)); - memset(&state[0], 0, sizeof(state)); - memset(&mount[0], 0, sizeof(mount)); - memset(&type[0], 0, sizeof(type)); - memset(&device[0], 0, sizeof(device)); - memset(&volume_group[0], 0, sizeof(volume_group)); - memset(&dynamic_res[0], 0, sizeof(dynamic_res)); - memset(&resource_name[0], 0, sizeof(resource_name)); - - fl.l_whence = SEEK_SET; - fl.l_start = 0; - fl.l_len = 0; - fl.l_pid = getpid(); - - if ((argc > 1) && (strcmp(argv[1],"--help") == 0)) { - printf("usage: rmon_resource_notify " - "--resource-name " - "--resource-state " - "--resource-type " - "--device " - "--mount-point " - "--volume_group " - "--timeout \n"); - close_syslog(); - return FAIL; - } - - for (int i=0; i(mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0)); - if (map == MAP_FAILED) - { - close(fd); - // elog("Error mmapping the file"); - close_syslog(); - return (FAIL); - } - string oldFile(map); - /* extract the resource name */ - sscanf(dynamic_res, "%49s", resource_name); - string newResource(resource_name); - string updatedResource(dynamic_res); - dynamic_resources.clear(); - - if ( oldFile.find(updatedResource) == string::npos ) - { - if ( oldFile.find(newResource) != string::npos ) - { - /* the resource exists, update it in the file */ - while ((pos = oldFile.find(delimiter)) != string::npos) - { - /* separate the resources from the file */ - token = oldFile.substr(0, pos); - if (token.find(newResource) == string::npos) - { - dynamic_resources.push_back(token); - } - oldFile.erase(0, pos + delimiter.length()); - } - oldFile = ""; - for (unsigned int i=0; i(mmap(0, textsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); - if (map == MAP_FAILED) - { - close(fd); - // elog("Error mmapping the file"); - close_syslog(); - return (FAIL); - } - - /* write the resource into memory */ - memcpy(map, text.c_str(), textsize); - - /* write the updated list to the disk */ - if (msync(map, textsize, MS_SYNC) == -1) - { - ; // elog("Could not sync the file to disk"); - } - - - /* free the mmapped memory */ - if (munmap(map, textsize) == -1) - { - /* unlock the file */ - fl.l_type = F_UNLCK; - fcntl(fd, F_SETLK, &fl); - // elog("Error un-mmapping the file"); - close_syslog(); - return (FAIL); - } - } - close(fd); - /* unlock the file */ - fl.l_type = F_UNLCK; - fcntl(fd, F_SETLK, &fl); - if (!toNotify) - { - close_syslog(); - return (PASS); - } - - /* Check to see if rmond is running */ - rc = system("pidof rmond"); - if (WEXITSTATUS(rc) != 0) - { - return (PASS); - } - - rc = daemon_init(port, process_name); - if (rc == PASS) { - - if( clock_gettime( CLOCK_MONOTONIC, &start) == -1 ) { - // elog("clock gettime \n" ); - close_syslog(); - return (FAIL); - } - - rmon_message_init(); - rmon_sock.rmon_socket = resource_monitor_get_sel_obj (); - std::list socks; - socks.clear(); - socks.push_front ( rmon_sock.rmon_socket ); - socks.sort(); - // remove the rmon resource notify flag file - // as this will be reset by rmon - remove (RESPONSE_RMON_RESOURCE_NOT); - /* signal to rmon that the dynamic file has been written */ - rc = rmon_notification ( RMON_RESOURCE_NOT ); - - for ( ; ; ) - { - /* Initialize the timeval struct */ - rmon_sock.waitd.tv_sec = 0; - rmon_sock.waitd.tv_usec = SELECT_TIMEOUT * 100; - - /* Initialize the master fd_set */ - FD_ZERO(&rmon_sock.readfds); - FD_SET(rmon_sock.rmon_socket, &rmon_sock.readfds); - - rc = select( socks.back()+1, - &rmon_sock.readfds, NULL, NULL, - &rmon_sock.waitd); - - /* If the select time out expired then */ - if (( rc < 0 ) || ( rc == 0 )) - { - /* Check to see if the select call failed. */ - /* ... but filter Interrupt signal */ - if (( rc < 0 ) && ( errno != EINTR )) - { - //ilog("Socket Select Failed (rc:%d) %s \n", errno, strerror(errno)); - } - } - - if ( FD_ISSET(rmon_sock.rmon_socket, &rmon_sock.readfds)) - { - rc = client_service_inbox(); - - if (rc == PASS) { - close_syslog(); - return PASS; - } - } - - if ( clock_gettime( CLOCK_MONOTONIC, &stop) == -1 ) { - // elog("clock gettime\n"); - return (FAIL); - } - unsigned long delta = (stop.tv_sec - start.tv_sec) * 1000 + (stop.tv_nsec - start.tv_nsec) / 1000000; - if (delta > timeout) - { - /* we exceeded the timeout. - * It may have happened that rmon - * sent its acknowledgment but that response - * got lost. In that case check for the flag file - * as a last ditch effort - */ - if (access(RESPONSE_RMON_RESOURCE_NOT, F_OK) != -1) { - close_syslog() - return (PASS); - } - close_syslog(); - return (FAIL); - } - } - } - close_syslog(); - return FAIL; -} - diff --git a/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.h b/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.h deleted file mode 100644 index 74dbba2d..00000000 --- a/mtce/src/rmon/rmon_resource_notify/rmon_resource_notify.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2013-2015 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - - /** - * @file - * Wind River CGTS Platform Resource Monitor Resource Notify Header - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "nodeBase.h" -#include "nodeUtil.h" - -/** Maximum service fail count before action */ -#define MAX_FAIL_COUNT (1) - -#define RMON_HEADER_SIZE (15) -#define RMON_MAX_MSG (50) -#define MAX_COUNT (3) -#define SELECT_TIMEOUT (100) -#define DEFAULT_RESPONSE_TIMEOUT (120 * 1000) // 2mins - -/* default process name if none is specified */ -#define PROCESS_NAME ((const char *)"rmonResourceNotify") - -#define RMON_DONE ((const char *)"done_reading_dynamic_file_systems") -#define DYNAMIC_FS_FILE ((const char *)"/etc/rmonfiles.d/dynamic.conf") -#define RMON_RESOURCE_NOT ((const char *)"read_dynamic_file_system") -#define RESPONSE_RMON_RESOURCE_NOT ((const char *)"/var/run/.dynamicfs_registered") - -typedef struct -{ - /** Message buffer */ - char m [RMON_MAX_MSG]; - - /** Sequence number */ - unsigned int s ; - - /* Fast Lookup Clue Info */ - unsigned int c ; - - /* Status Flags */ - unsigned int f ; - - /* reserved for future use */ - unsigned int r ; - -} rmon_message_type ; - -/** rmon resource notify socket control structure */ -typedef struct -{ - - struct sockaddr_in client_addr ; - socklen_t client_addr_len ; - - /** Unix domain socket used to transmit on-node event messages - * to from other local services such as rmon */ - int send_event_socket ; - struct sockaddr_un agent_domain ; - socklen_t agent_domain_len ; - - /** rmon api Socket using UDP Inet over 'lo' interface */ - int rmon_api_sock ; /**< receive rmon pulses socket */ - int rmon_api_port ; /**< the port */ - struct sockaddr_in rmon_api_addr ; /**< attributes */ - socklen_t rmon_api_len ; /**< length */ - - int rmon_socket ; /**< Active monitor socket */ - /** The addr and port are stored in the shared librmonapi.so library */ - - struct sockaddr_in client_sockAddr ; /**< Client socket attributes */ - socklen_t agentLen ; /**< Agent socket attr struct len */ - socklen_t clientLen ; /**< Client socket attr struct len */ - int tx_socket ; /**< general transmit socket ID */ - int rx_socket ; /**< general receive socket ID */ - rmon_message_type tx_message ; /**< transmit message */ - rmon_message_type rx_message ; /**< receive message */ - int rmon_client_port ; - int fail_count ; /**< Socket retry thresholding */ - - /* For select dispatch */ - struct timeval waitd ; - fd_set readfds; - msgSock_type mtclogd ; - -} rmon_socket_type ; - - diff --git a/mtce/src/rmon/scripts/cinder_virtual_resource.conf b/mtce/src/rmon/scripts/cinder_virtual_resource.conf deleted file mode 100644 index 03d19dff..00000000 --- a/mtce/src/rmon/scripts/cinder_virtual_resource.conf +++ /dev/null @@ -1,27 +0,0 @@ -[resource] -resource = Cinder LVM Thinpool Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 70 ; minor Cinder LVM Thinpool utilization threshold percentage -major_threshold = 80 ; major Cinder LVM Thinpool utilization threshold percentage -critical_threshold = 90 ; critical Cinder LVM Thinpool utilization threshold percentage (use 101 if unused) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (Cinder LVM Thinpool available in MiB) (default is 1) - -[thinpool_metadata] -vg_name = cinder-volumes ; name of parent VG -thinpool_name = cinder-volumes-pool ; name of LV Thin Pool to monitor -critical_threshold = 80 ; critical alarm threshold percentage for metadata utilization -alarm_on = 1 ; 1 to raise critical alarm on threshold, 0 not to raise alarm -autoextend_on = 1 ; 1 to first try extending the metadata on threshold, 0 for autoextend off. -autoexent_by = 20 ; autoextend by a percentage or absolute value in MiB -autoextend_percent = 1 ; percent or MiB -audit_period = 10 ; interval to performa the audit - diff --git a/mtce/src/rmon/scripts/infrastructure_resource.conf b/mtce/src/rmon/scripts/infrastructure_resource.conf deleted file mode 100644 index ca0956ac..00000000 --- a/mtce/src/rmon/scripts/infrastructure_resource.conf +++ /dev/null @@ -1,6 +0,0 @@ -[resource] -resource = infra -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce/src/rmon/scripts/instance_resource.conf b/mtce/src/rmon/scripts/instance_resource.conf deleted file mode 100644 index 0b2d004c..00000000 --- a/mtce/src/rmon/scripts/instance_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Platform Nova Instances -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 80 ; minor memory utilization threshold percentage -major_threshold = 90 ; major memory utilization threshold percentage -critical_threshold = 95 ; critical memory utilization threshold percentage (use 101 if unsed) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1) diff --git a/mtce/src/rmon/scripts/management_resource.conf b/mtce/src/rmon/scripts/management_resource.conf deleted file mode 100644 index f7f3a513..00000000 --- a/mtce/src/rmon/scripts/management_resource.conf +++ /dev/null @@ -1,6 +0,0 @@ -[resource] -resource = mgmt -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce/src/rmon/scripts/nova_virtual_resource.conf b/mtce/src/rmon/scripts/nova_virtual_resource.conf deleted file mode 100644 index da968712..00000000 --- a/mtce/src/rmon/scripts/nova_virtual_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Nova LVM Thinpool Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 70 ; minor Nova LVM Thinpool threshold percentage -major_threshold = 80 ; major Noval LVM Thinpool utilization threshold percentage -critical_threshold = 90 ; critical Nova LVM Thinpool utilization threshold percentage (use 101 if unused) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (Nova LVM Thinpool available in MiB) (default is 1) diff --git a/mtce/src/rmon/scripts/oam_resource.conf b/mtce/src/rmon/scripts/oam_resource.conf deleted file mode 100644 index d1b8692f..00000000 --- a/mtce/src/rmon/scripts/oam_resource.conf +++ /dev/null @@ -1,6 +0,0 @@ -[resource] -resource = oam -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce/src/rmon/scripts/remotelogging_resource.conf b/mtce/src/rmon/scripts/remotelogging_resource.conf deleted file mode 100644 index e8540c42..00000000 --- a/mtce/src/rmon/scripts/remotelogging_resource.conf +++ /dev/null @@ -1,6 +0,0 @@ -[resource] -resource = Remote Logging Connectivity -debounce = 20 ; number of seconds to wait before degrade clear -severity = minor ; minor, major, critical -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce/src/rmon/scripts/rmon b/mtce/src/rmon/scripts/rmon deleted file mode 100644 index 241b4dea..00000000 --- a/mtce/src/rmon/scripts/rmon +++ /dev/null @@ -1,97 +0,0 @@ -#! /bin/sh -# -# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -# -# chkconfig: 2345 95 95 -# -### BEGIN INIT INFO -# Provides: rmon -# Default-Start: 3 5 -# Default-Stop: 0 1 2 6 -# Short-Description: Resource Monitor daemon -### END INIT INFO - -# echo "7:3:respawn:/usr/local/bin/rmond" >> /etc/inittab - -. /etc/init.d/functions - -DAEMON_NAME="rmond" -DAEMON="/usr/local/bin/${DAEMON_NAME}" -IFACE="" - -if [ ! -e "$DAEMON" ] ; then - logger "$DAEMON is missing" - exit 1 -fi - -RETVAL=0 - -PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin -export PATH - -case "$1" in - start) - # Prevent multipe starts - if [ -e /var/lock/subsys/${DAEMON_NAME} ] ; then - if [ -n "`pidof ${DAEMON_NAME}`" ] ; then - exit $RETVAL - else - rm -f /var/lock/subsys/${DAEMON_NAME} - fi - fi - echo -n "Starting $DAEMON_NAME: " - start-stop-daemon --start -b -x ${DAEMON} -- - RETVAL=$? - if [ $RETVAL -eq 0 ] ; then - echo "OK" - touch /var/lock/subsys/${DAEMON_NAME} - else - echo "FAIL" - fi - ;; - - stop) - echo -n "Stopping ${DAEMON_NAME}: " - if [ -n "`pidof ${DAEMON_NAME}`" ] ; then - killproc ${DAEMON_NAME} - fi - if [ -n "`pidof ${DAEMON_NAME}`" ] ; then - echo "FAIL" - else - echo "OK" - - # remove lockfile - rm -f /var/lock/subsys/${DAEMON_NAME} - fi - ;; - - restart) - $0 stop - sleep 1 - $0 start - ;; - - status) - pid=`pidof ${DAEMON_NAME}` - RETVAL=$? - if [ ${RETVAL} -eq 0 ] ; then - echo "${DAEMON_NAME} is running" - else - echo "${DAEMON_NAME} is NOT running" - fi - ;; - - condrestart) - [ -f /var/lock/subsys/${DAEMON_NAME} ] && $0 restart - ;; - - *) - echo "usage: $0 { start | stop | status | restart | condrestart | status }" - ;; -esac - -exit $RETVAL diff --git a/mtce/src/rmon/scripts/rmon.conf b/mtce/src/rmon/scripts/rmon.conf deleted file mode 100644 index b0df981d..00000000 --- a/mtce/src/rmon/scripts/rmon.conf +++ /dev/null @@ -1,21 +0,0 @@ -[process] ; The block label -process = rmond ; The name of the process to be monitored. -service = rmon ; The name of the service known to systemd -pidfile = /var/run/rmond.pid ; The path to process pidfile -script = /etc/init.d/rmon ; The path and restart script file name -style = lsb ; The type of script file. Only lsb is supported -severity = major ; Process failure severity - ; critical : host is failed - ; major : host is degraded - ; minor : log is generated -restarts = 3 ; Number of back to back unsuccessful restarts before severity assertion -interval = 10 ; Number of seconds to wait between back-to-back unsuccessful restarts -debounce = 20 ; Number of seconds the process needs to run before declaring - ; it as running O.K. after a restart. - ; Time after which back-to-back restart count is cleared. -startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor -mode = passive ; Monitoring mode: passive (default) or active - ; passive: process death monitoring (default: always) - ; active: heartbeat monitoring, i.e. request / response messaging - - diff --git a/mtce/src/rmon/scripts/rmon.logrotate b/mtce/src/rmon/scripts/rmon.logrotate deleted file mode 100755 index 88d75c98..00000000 --- a/mtce/src/rmon/scripts/rmon.logrotate +++ /dev/null @@ -1,43 +0,0 @@ -#daily -nodateext - -/var/log/rmond.log -{ - nodateext - size 100M - start 1 - missingok - rotate 20 - compress - postrotate - sharedscripts - postrotate - systemctl reload syslog-ng > /dev/null 2>&1 || true - endscript -} - -/var/log/rmond_notify.log -{ - size 10M - start 1 - missingok - rotate 10 - compress - sharedscripts - postrotate - systemctl reload syslog-ng > /dev/null 2>&1 || true - endscript -} - -/var/log/rmond.dump -{ - size 10M - start 1 - rotate 10 - compress - missingok - sharedscripts - postrotate - systemctl reload syslog-ng > /dev/null 2>&1 || true - endscript -} diff --git a/mtce/src/rmon/scripts/rmon.service b/mtce/src/rmon/scripts/rmon.service deleted file mode 100644 index 5fd53ed3..00000000 --- a/mtce/src/rmon/scripts/rmon.service +++ /dev/null @@ -1,23 +0,0 @@ -[Unit] -Description=Titanium Cloud Maintenance Resource Monitor -After=network.target syslog.service config.service -Before=pmon.service - -[Service] -Type=forking -ExecStart=/etc/rc.d/init.d/rmon start -ExecStop=/etc/rc.d/init.d/rmon stop -ExecReload=/etc/rc.d/init.d/rmon reload -PIDFile=/var/run/rmond.pid - -# Failure handling -TimeoutStartSec=10s -TimeoutStopSec=10s - -# Auto recovery by pmond -Restart=no -RestartSec=10 - -[Install] -WantedBy=multi-user.target - diff --git a/mtce/src/rmon/scripts/rmon_reload_on_cpe.sh b/mtce/src/rmon/scripts/rmon_reload_on_cpe.sh deleted file mode 100644 index eea8fb56..00000000 --- a/mtce/src/rmon/scripts/rmon_reload_on_cpe.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2015-2016 Wind River Systems, Inc. -# -# SPDX-License-Identifier: Apache-2.0 -# - -source "/etc/init.d/log_functions.sh" - -# is it a worker subfunction on a CPE system -isCompute () -{ - [ -f /etc/platform/platform.conf ] || return 0 - res=$(grep "subfunction" /etc/platform/platform.conf | grep "controller,worker" | wc -l) - - if [ "$res" -eq 0 ] ; then - return 0 - else - return 1 - fi -} - -# only reload rmon if it is a CPE system -isCompute - -if [[ "$?" -eq 0 ]]; then - log "Cannot run on a non CPE system." - exit 0 -fi - -if [ ! -f /var/run/.worker_config_complete ]; then - log "Cannot run prior to worker configuration complete." - exit 0 -fi - -################################################################################################# -# Temporarily switch this to a process kill instead of reload due to a problem found -# in the rmon config reload handling. A clone Jira was created to track the fix that will migrate -# this back to a reload. -################################################################################################# -# rc=`pkill -hup rmond` -# log "rmond config reload (rc=$rc)" - -/usr/local/sbin/pmon-restart rmond -logger "requesting graceful rmon restart in goenabled test on cpe" - -exit 0 diff --git a/mtce/src/rmon/scripts/rmond.conf b/mtce/src/rmon/scripts/rmond.conf deleted file mode 100755 index c5f5689f..00000000 --- a/mtce/src/rmon/scripts/rmond.conf +++ /dev/null @@ -1,34 +0,0 @@ -; CGTS Resource Monitor Configuration File -[config] ; Configuration -audit_period = 30 ; Resource polling period in seconds (1 - 120) -pm_period = 300 ; Period For posting PMs to Ceilometer -ntp_audit_period = 600 ; Resource polling period for querying NTP servers in seconds (10 - 1200) -ntpq_cmd_timeout = 60 ; Max amount of time in seconds to wait for the ntpq command to complete - -rmon_tx_port = 2101 ; Transmit Event and Command Reply Port -per_node = 0 ; enable (1) or disable (0) memory checking per processor node -rmon_api_tx_port = 2300 ; Resource Monitor API tx Port -critical_threshold = 1 ; enable (1) or disable (0) critical threshold checking - -log_step = 20 ; create resource value logs when readings cross this +/- value threshold - ; - this represents highest step for all resources - ; - resources can override to smaller step - -[timeouts] -start_delay = 10 ; managed range 1 .. 120 seconds - -[features] - -[debug] ; SIGHUP to reload -debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) -debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) -debug_state = 0 ; enable(1) or disable(0) state change logs (clog) -debug_level = 0 ; decimal mask 0..15 (8,4,2,1) -debug_all = 0 ; - -flush = 1 ; enable(1) or disable(0) force log flush (main loop) -flush_thld = 5 ; if enabled - force flush after this number of loops - -debug_event = none ; Not used -debug_filter = none ; Not used -stress_test = 0 ; In-Service Stress test number diff --git a/mtce/src/rmon/scripts/virtual_resource.conf b/mtce/src/rmon/scripts/virtual_resource.conf deleted file mode 100644 index 7031d113..00000000 --- a/mtce/src/rmon/scripts/virtual_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Cinder LVM Backend Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 70 ; minor Cinder LVM Backend threshold percentage -major_threshold = 80 ; major Cinder LVM Backend utilization threshold percentage -critical_threshold = 90 ; critical Cinder LVM Backend utilization threshold percentage (use 101 if unused) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (Cinder LVM Backend available in MiB) (default is 1) diff --git a/mtce/src/rmon/thinmetaHdlr.cpp b/mtce/src/rmon/thinmetaHdlr.cpp deleted file mode 100644 index c7184bf8..00000000 --- a/mtce/src/rmon/thinmetaHdlr.cpp +++ /dev/null @@ -1,396 +0,0 @@ -/* - * Copyright (c) 2017 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* - */ - -/** - * @file - * Wind River Titanium Cloud Platform, LVM Thinpool Metadata Monitor Handler - */ -#include "rmon.h" /* rmon header file */ - -/* Used to set interface alarms through the FM API */ -static SFmAlarmDataT alarmData; - -/******************************************************************************* - * - * Name : _build_entity_instance_id - * - * Purpose : Build the entity instance id needed by our alarm - * - * *****************************************************************************/ -void thinmeta_init(thinmeta_resource_config_type * res, struct mtc_timer * timers, int count) { - if (count > MAX_RESOURCES) { - elog("Thinpool metadata resource 'count' is: %i, maximum number or resources is: %i, " - "initializing count to max!", - count, MAX_RESOURCES); - count = MAX_RESOURCES; - } - - for (int i = 0; i < count; i++) { - - /* Mark first execution after reloading the configuration */ - res[i].first_run = true; - - /* Init timer defaults for this resource */ - mtcTimer_init ( timers[i] ) ; - timers[i].hostname = "localhost" ; - timers[i].service = res[i].thinpool_name ; - timers[i].ring = true; // set it to true for the initial run - - } -} - -/******************************************************************************* - * - * Name : _build_entity_instance_id - * - * Purpose : Build the entity instance id needed by our alarm - * - * *****************************************************************************/ -void _build_entity_instance_id(thinmeta_resource_config_type * ptr, char * entity) { - if (!entity) { - elog("%s/%s pool alarm failed to create entity instance id, 'entity' is NULL!", - ptr->vg_name, ptr->thinpool_name); - return; - } - rmon_ctrl_type * _rmon_ctrl_ptr; - _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); - - snprintf(entity, sizeof(alarmData.entity_instance_id), - "%s.lvmthinpool=%s/%s", _rmon_ctrl_ptr->my_hostname, ptr->vg_name, ptr->thinpool_name); -} - -/******************************************************************************* - * - * Name : _set_thinmeta_alarm - * - * Purpose : Set or clears the threshold alarm - * - * *****************************************************************************/ -void _set_thinmeta_alarm( thinmeta_resource_config_type * ptr) -{ - strcpy(alarmData.uuid, ""); - strcpy(alarmData.entity_type_id ,"system.host"); - _build_entity_instance_id(ptr, alarmData.entity_instance_id); - alarmData.alarm_state = FM_ALARM_STATE_SET; - alarmData.alarm_type = FM_ALARM_OPERATIONAL; - alarmData.probable_cause = FM_ALARM_STORAGE_PROBLEM; - if ( ptr->autoextend_on ) { - snprintf(alarmData.reason_text , sizeof(alarmData.reason_text), - "Metadata usage for LVM thin pool %s/%s " - "exceeded threshold and automatic extension failed; " - "threshold: %u%%, actual: %.2f%%.", - ptr->vg_name, ptr->thinpool_name, - ptr->critical_threshold, ptr->resource_value); - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Increase Storage Space Allotment for Cinder on the 'lvm' backend. " - "Consult System Administration Manual for more details. " - "If problem persists, contact next level of support."); - } - else { - snprintf(alarmData.reason_text , sizeof(alarmData.reason_text), - "Metadata usage for LVM thin pool %s/%s exceeded threshold; " - "threshold: %u%%, actual: %.2f%%.", - ptr->vg_name, ptr->thinpool_name, ptr->critical_threshold, ptr->resource_value); - snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), - "Extend the metadata LV with 'lvextend --poolmetadatasize " - "+M %s/%s'. " - "Consult System Administration Manual for more details. " - "If problem persists, contact next level of support.", - ptr->vg_name, ptr->thinpool_name); - } - alarmData.timestamp = 0; - alarmData.service_affecting = FM_FALSE; - alarmData.suppression = FM_TRUE; - alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; - strcpy(alarmData.alarm_id, THINMETA_ALARM_ID); - - dlog("%s/%s pool exceeding usage threshold, raising alarm\n", ptr->vg_name, ptr->thinpool_name); - int ret = rmon_fm_set(&alarmData, NULL) == FM_ERR_OK; - if (ret == FM_ERR_OK || ret == FM_ERR_ALARM_EXISTS) { - if (!ptr->alarm_raised) { - // log only once to avoid filling logs - ilog("%s/%s pool exceeding usage threshold, alarm raised", ptr->vg_name, ptr->thinpool_name); - ptr->alarm_raised = true; - } - } - else { - elog("Creation of alarm %s for entity instance id: %s failed. Error: %d \n", - alarmData.alarm_id, alarmData.entity_instance_id, ret); - ptr->alarm_raised = false; - } -} - -/***************************************************************************** - * - * Name : _clear_thinmeta_alarm - * - * Purpose : Clear the alarm of the resource passed in - * - *****************************************************************************/ -void _clear_thinmeta_alarm ( thinmeta_resource_config_type * ptr ) -{ - dlog ("%s/%s below threshold, clearing alarm\n", ptr->vg_name, ptr->thinpool_name); - AlarmFilter alarmFilter; - - _build_entity_instance_id (ptr, alarmData.entity_instance_id); - - snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, THINMETA_ALARM_ID); - snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); - - int ret = rmon_fm_clear(&alarmFilter); - if (ret == FM_ERR_OK) { - ilog ("Cleared stale alarm %s for entity instance id: %s", - alarmFilter.alarm_id, alarmFilter.entity_instance_id); - ptr->alarm_raised = false; - } - else if (ret == FM_ERR_ENTITY_NOT_FOUND) { - if (!ptr->first_run) { - wlog ("Alarm %s for entity instance id: %s was not found", - alarmFilter.alarm_id, alarmFilter.entity_instance_id); - } - ptr->alarm_raised = false; - } - else { - elog ("Failed to clear stale alarm %s for entity instance id: %s error: %d", - alarmFilter.alarm_id, alarmFilter.entity_instance_id, ret); - ptr->alarm_raised = true; - } -} - -/***************************************************************************** - * - * Name : is_pool_ready - * - * Purpose : Check if an LVM Thin Pool is configured - * Return : PASS/FAIL - * - *****************************************************************************/ -bool is_pool_ready(thinmeta_resource_config_type * ptr) { - char result[BUFFER_SIZE]; - int rc = PASS; - char cmd[BUFFER_SIZE]; - snprintf(cmd, sizeof(cmd), "timeout 2 lvs --noheadings -o vg_name,lv_name --separator / %s/%s", - ptr->vg_name, ptr->thinpool_name); - rc = execute_pipe_cmd(cmd, result, sizeof(result)); - if (rc == 5 || rc == 1) { // ECMD_FAILED or ECMD_PROCESSED - // pool or VG was not found or not ready - return false; - } - else if (rc) { - // unexpected error - elog("%s/%s pool config query failed", ptr->vg_name, ptr->thinpool_name); - wlog("...cmd: '%s' exit status: %i result: '%s'", cmd, rc, result); - return false; - } - return true; -} - -/***************************************************************************** - * - * Name : calculate_metadata_usage - * - * Purpose : Obtain the percentage of used metadata space for a thin pool - * in thin provisioning. - * Return : PASS/FAIL - * - *****************************************************************************/ -int calculate_metadata_usage(thinmeta_resource_config_type * ptr) { - char result[BUFFER_SIZE]; - int rc = PASS; - char meta_usage_cmd[BUFFER_SIZE]; - - snprintf(meta_usage_cmd, sizeof(meta_usage_cmd), - "set -o pipefail; timeout 2 lvs -o metadata_percent --noheadings %s/%s | tr -d ' '", - ptr->vg_name, ptr->thinpool_name); - rc = execute_pipe_cmd(meta_usage_cmd, result, sizeof(result)); - if (rc == 1) { // ECMD_PROCESSED - // sometimes lvs command fail to process, not critical just retry in this case - dlog("%s/%s pool metadata usage query failed\n", ptr->vg_name, ptr->thinpool_name); - dlog("...cmd: '%s' exit status: %i result: '%s'\n", meta_usage_cmd, rc, result); - rc = execute_pipe_cmd(meta_usage_cmd, result, sizeof(result)); - } - if (rc != PASS) { - elog("%s/%s pool metadata usage query failed", ptr->vg_name, ptr->thinpool_name); - wlog("...cmd: '%s' exit status: %i result: '%s'", meta_usage_cmd, rc, result); - return (FAIL); - } - ptr->resource_value = atof(result); - if ( log_value ( ptr->resource_value, - ptr->resource_prev, - DEFAULT_LOG_VALUE_STEP ) ) - { - plog("%s/%s pool metadata usage is: %.2f%%\n", - ptr->vg_name, ptr->thinpool_name, ptr->resource_value); - } - return rc; -} - -/***************************************************************************** - * - * Name : extend_thinpool_metadata - * - * Purpose : Extend the Logical Volume used by LVM Thin Pool metadata - * Return : PASS/FAIL - * - *****************************************************************************/ -int extend_thinpool_metadata(thinmeta_resource_config_type * ptr) { - char result[THINMETA_RESULT_BUFFER_SIZE]; - int rc = PASS; - char cmd[BUFFER_SIZE]; - - dlog(">>> ptr->autoextend_percent: %i", ptr->autoextend_percent); - dlog("%s/%s pool, extending metadata by %i%s\n", ptr->vg_name, ptr->thinpool_name, - ptr->autoextend_by, ptr->autoextend_percent? "%": "MiB"); - if (ptr->autoextend_percent) { - char meta_lv_name[BUFFER_SIZE]; - /* Get metadata LV name - * 'lvextend --poolmetadatasize' parameter is only allowed in MiB not percents. - * For percent we need to rely on 'lvextend -l...%LV', but we first have to get - * the real name of the metadata LV */ - snprintf(cmd, sizeof(cmd), - "set -o pipefail; timeout 2 lvs %s/%s -o metadata_lv --noheadings | " - "tr -d '[] '", - ptr->vg_name, ptr->thinpool_name); - rc = execute_pipe_cmd(cmd, meta_lv_name, sizeof(meta_lv_name)); - if (rc != PASS) { - elog("%s/%s pool metadata name query failed. Aborting auto extend.", - ptr->vg_name, ptr->thinpool_name); - return (FAIL); - } - dlog("%s/%s pool metadata LV name is: %s\n", - ptr->vg_name, ptr->thinpool_name, meta_lv_name); - /* Extend metadata cmd*/ - snprintf(cmd, sizeof(cmd), - "timeout 10 lvextend -l +%u%%LV %s/%s", - ptr->autoextend_by, ptr->vg_name, meta_lv_name); - } - else { - /* Extend metadata cmd*/ - snprintf(cmd, sizeof(cmd), - "timeout 10 lvextend --poolmetadatasize +%uM %s/%s", - ptr->autoextend_by, ptr->vg_name, ptr->thinpool_name); - } - rc = execute_pipe_cmd(cmd, result, sizeof(result)); - if (rc != PASS) { - dlog("%s/%s pool metadata size extension failed\n", ptr->vg_name, ptr->thinpool_name); - dlog("...cmd: '%s' exit status: %i result: '%s'\n", cmd, rc, result); - return (FAIL); - } - return rc; -} - -/***************************************************************************** - * - * Name : thinmeta_handler - * - * Purpose : Handle the metadata usage and raise alarms through the FM API - * - *****************************************************************************/ -int thinmeta_handler( thinmeta_resource_config_type * ptr ) { - if (!ptr) { - elog ("Function called with NULL pointer!"); - return (PASS); - } - switch ( ptr->stage ) { - case RMON_STAGE__INIT: - { - /* Check if pool is ready */ - dlog("%s/%s pool config query", ptr->vg_name, ptr->thinpool_name); - if (!is_pool_ready(ptr)) { - ilog("%s/%s pool not ready, monitoring will be resumed when ready", - ptr->vg_name, ptr->thinpool_name); - ptr->stage = RMON_STAGE__MONITOR_WAIT; - } - else { - dlog("%s/%s pool ready", ptr->vg_name, ptr->thinpool_name); - ptr->stage = RMON_STAGE__MONITOR; - return (RETRY); // execute next stage immediately - } - break; - } - case RMON_STAGE__MONITOR_WAIT: - { - /* Waiting for pool to be ready*/ - if (is_pool_ready(ptr)) { - ilog("%s/%s pool ready, starting monitoring", - ptr->vg_name, ptr->thinpool_name); - ptr->stage = RMON_STAGE__MONITOR; - return (RETRY); // execute next stage immediately - } - break; - } - case RMON_STAGE__MONITOR: - { - dlog("%s/%s pool metadata usage monitoring", ptr->vg_name, ptr->thinpool_name); - /* calculate usage. The first time we calculate thinpool meta - * usage is to get the baseline resource value, if it exceeds - * the critical threshold and if the resource configuration - * allows us to autoextend thinpools then we do an extend - * operation and then check again if our thinpool usage has - * fallen below the critical watermark. */ - if(calculate_metadata_usage(ptr) == FAIL) { - ptr->stage = RMON_STAGE__INIT; - return (RETRY); // execute next stage immediately - break; - } - - /* act on thresholds */ - if((ptr->alarm_raised || ptr->first_run) && - ptr->resource_value < ptr->critical_threshold) { - // clear alarm - _clear_thinmeta_alarm(ptr); - } - else if(ptr->resource_value >= ptr->critical_threshold) { - if (ptr->autoextend_on) { - // Extend metadata - // Retry at each pass (failures are fast) till successful, in case - // our VG is extended on the fly and we suddenly get enough space. - // Log operation and error only once to avoid filling log file. - if(!ptr->alarm_raised) { - ilog("%s/%s pool metadata will be extended by: %i%s", - ptr->vg_name, ptr->thinpool_name, - ptr->autoextend_by, ptr->autoextend_percent? "%": "MiB"); - } - if(extend_thinpool_metadata(ptr) == PASS) { - // after extension recalculate metadata usage - if(calculate_metadata_usage(ptr) == FAIL) { - // this was successful < 1s ago, should not happen! - elog("%s/%s pool second metadata usage calculation failed!", - ptr->vg_name, ptr->thinpool_name); - } - } - else { - if(!ptr->alarm_raised) { - elog("%s/%s pool metadata extension failed ", - ptr->vg_name, ptr->thinpool_name); - } - } - } - if ((ptr->resource_value >= ptr->critical_threshold) && // resource_value may change - ptr->alarm_on) { - // raise alarm (if autoextend is disabled or failed) - _set_thinmeta_alarm(ptr); - } - else if (ptr->alarm_on && (ptr->alarm_raised || ptr->first_run)) { - // condition also needed if alarm existed prior to rmon startup - _clear_thinmeta_alarm(ptr); - } - } - /* Mark first run as complete */ - ptr->first_run = false; - break; - } - default: - { - slog ("%s/%s Invalid stage (%d)\n", ptr->vg_name, ptr->thinpool_name, ptr->stage); - /* Default to init for invalid case */ - ptr->stage = RMON_STAGE__INIT; - return (RETRY); // execute next stage immediately - } - } - return (PASS); -} diff --git a/mtce/src/scripts/dmemchk.sh b/mtce/src/scripts/dmemchk.sh index 1f3e8fbe..d9e00c28 100755 --- a/mtce/src/scripts/dmemchk.sh +++ b/mtce/src/scripts/dmemchk.sh @@ -89,8 +89,8 @@ function helpMessage { echo "" echo "sudo memchk -t 60 --C mtcClient mtcAgent ... Check PSS and RSS values of the processes belonging to mtcClient" echo " and mtcAgent every 60 seconds (1 minute)" - echo "sudo memchk -t 3600 --C pmond rmond hwmond ... Check PSS and RSS values of pmond, rmond and hwmond every 3600s (1h)" - echo "sudo memchl --C pmond rmond hwmond ... Check PSS and RSS values of commands using default period of 3600s (1h)" + echo "sudo memchk -t 3600 --C pmond hwmond ... Check PSS and RSS values of pmond, and hwmond every 3600s (1h)" + echo "sudo memchl --C pmond hwmond ... Check PSS and RSS values of commands using default period of 3600s (1h)" echo "--------------------------------------------------------------------------------------" exit 0 } diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf index 0060df60..6673ac8a 100644 --- a/mtce/src/scripts/mtc.conf +++ b/mtce/src/scripts/mtc.conf @@ -74,7 +74,6 @@ hbs_client_mgmnt_port = 2106 ; Management Interface Heartbeat Pulse Request Rx P hbs_client_infra_port = 2116 ; Infrastructure Interface Heartbeat Pulse Request Rx Port hwmon_cmd_port = 2114 ; hwmond Command Rx Port Number pmon_pulse_port = 2109 ; Process Monitor I'm Alive pulse Port Rx Port -rmon_event_port = 2302 ; Resource Monitor Event Port Rx Port sched_delay_threshold = 300 ; scheduler delay time in msecs that will trigger ; a scheduler history dump daemon_log_port = 2121 ; daemon logger port