Collectd+InfluxDb-RMON Replacement(ALL METRICS) P1
This is the primary update that introduces collectd monitoring and sample storage into the influxdb database. Two new packages are introduced by this update - collectd-extensions package which includes - newly developed collectd platform memory, cpu and filesystem plugins - note that the example, ntpq and interface plugins are not complete and are not enabled by this update. - pmond process monitoring / recovery support for collectd - updated service file for pidfile management ; needed by pmond - influxdb-extensions package which includes - pmond process monitoring / recovery support for influxdb - updated service file for pidfile management ; needed by pmond - log rotate support for influxdb Change-Id: I06511fecb781781ed5491c926ad4b1273a1bc23b Signed-off-by: Jack Ding <jack.ding@windriver.com>
This commit is contained in:
parent
7e0cf4b205
commit
892489acd7
@ -101,4 +101,6 @@ extended/memcached
|
||||
devtools/puppet-modules/openstack/puppet-memcached-3.0.2
|
||||
devtools/puppet-modules/openstack/puppet-horizon-9.5.0
|
||||
devtools/puppet-modules/openstack/puppet-swift-11.3.0
|
||||
monitoring/collectd-extensions
|
||||
monitoring/influxdb-extensions
|
||||
kubernetes/kubernetes
|
||||
|
10
monitoring/collectd-extensions/PKG-INFO
Normal file
10
monitoring/collectd-extensions/PKG-INFO
Normal file
@ -0,0 +1,10 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: collectd-extensions
|
||||
Version: 1.0
|
||||
Summary: collectd-extensions
|
||||
Home-page:
|
||||
Author: Windriver
|
||||
Author-email: info@windriver.com
|
||||
License: windriver
|
||||
Description: Titanium Cloud collectd extensions
|
||||
Platform: UNKNOWN
|
19
monitoring/collectd-extensions/centos/build_srpm.data
Normal file
19
monitoring/collectd-extensions/centos/build_srpm.data
Normal file
@ -0,0 +1,19 @@
|
||||
SRC_DIR="$PKG_BASE"
|
||||
|
||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
||||
$PKG_BASE/src/collectd.conf.pmon \
|
||||
$PKG_BASE/src/collectd.service \
|
||||
$PKG_BASE/src/fm_notifier.py \
|
||||
$PKG_BASE/src/mtce_notifier.py \
|
||||
$PKG_BASE/src/python_plugins.conf \
|
||||
$PKG_BASE/src/cpu.py \
|
||||
$PKG_BASE/src/cpu.conf \
|
||||
$PKG_BASE/src/memory.py \
|
||||
$PKG_BASE/src/memory.conf \
|
||||
$PKG_BASE/src/df.conf \
|
||||
$PKG_BASE/src/ntpq.py \
|
||||
$PKG_BASE/src/ntpq.conf \
|
||||
$PKG_BASE/src/example.py \
|
||||
$PKG_BASE/src/example.conf"
|
||||
|
||||
TIS_PATCH_VER=1
|
@ -0,0 +1,90 @@
|
||||
Summary: Titanuim Server collectd Package
|
||||
Name: collectd-extensions
|
||||
Version: 1.0
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
License: windriver
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
|
||||
# create the files tarball
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
Source1: collectd.service
|
||||
Source2: collectd.conf.pmon
|
||||
|
||||
# collectd python plugin files - notifiers
|
||||
Source3: fm_notifier.py
|
||||
Source4: mtce_notifier.py
|
||||
|
||||
# collectd python plugin files - resource plugins
|
||||
Source11: cpu.py
|
||||
Source12: memory.py
|
||||
Source14: example.py
|
||||
Source15: ntpq.py
|
||||
|
||||
# collectd plugin conf files into /etc/collectd.d
|
||||
Source100: python_plugins.conf
|
||||
Source101: cpu.conf
|
||||
Source102: memory.conf
|
||||
Source103: df.conf
|
||||
Source104: example.conf
|
||||
Source105: ntpq.conf
|
||||
|
||||
BuildRequires: systemd-devel
|
||||
|
||||
Requires: systemd
|
||||
Requires: collectd
|
||||
Requires: /bin/systemctl
|
||||
|
||||
%description
|
||||
Titanium Cloud collectd extensions
|
||||
|
||||
%define debug_package %{nil}
|
||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
||||
%define local_plugin_dir %{_sysconfdir}/collectd.d
|
||||
%define local_python_extensions_dir /opt/collectd/extensions/python
|
||||
%define local_config_extensions_dir /opt/collectd/extensions/config
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
|
||||
%install
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
||||
install -m 755 -d %{buildroot}%{local_plugin_dir}
|
||||
install -m 755 -d %{buildroot}%{local_config_extensions_dir}
|
||||
install -m 755 -d %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
# support files ; service and pmon conf
|
||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
||||
install -m 600 %{SOURCE2} %{buildroot}%{local_config_extensions_dir}
|
||||
|
||||
# collectd python plugin files - notifiers
|
||||
install -m 700 %{SOURCE3} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE4} %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
# collectd python plugin files - resource plugins
|
||||
install -m 700 %{SOURCE11} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
# collectd plugin conf files into /etc/collectd.d
|
||||
install -m 600 %{SOURCE100} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE101} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE102} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir}
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%config(noreplace) %{local_unit_dir}/collectd.service
|
||||
%{local_plugin_dir}/*
|
||||
%{local_config_extensions_dir}/*
|
||||
%{local_python_extensions_dir}/*
|
202
monitoring/collectd-extensions/src/LICENSE
Normal file
202
monitoring/collectd-extensions/src/LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
18
monitoring/collectd-extensions/src/collectd.conf.pmon
Normal file
18
monitoring/collectd-extensions/src/collectd.conf.pmon
Normal file
@ -0,0 +1,18 @@
|
||||
[process]
|
||||
process = collectd
|
||||
service = collectd
|
||||
style = lsb
|
||||
pidfile = /var/run/collectd.pid
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restart retries before error assertion
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 10 ; number of seconds that a process needs to remain
|
||||
; running before degrade is removed and retry count
|
||||
; is cleared.
|
||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
||||
mode = passive ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; ignore : do not monitor or stop monitoring
|
||||
quorum = 0 ; process is in the host watchdog quorum
|
||||
|
14
monitoring/collectd-extensions/src/collectd.service
Normal file
14
monitoring/collectd-extensions/src/collectd.service
Normal file
@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=Collectd statistics daemon and extension services
|
||||
Documentation=man:collectd(1) man:collectd.conf(5)
|
||||
After=local-fs.target network-online.target
|
||||
Requires=local-fs.target network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/sbin/collectd
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/collectd.pid'
|
||||
ExecStopPost=/bin/rm -f /var/run/collectd.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
22
monitoring/collectd-extensions/src/cpu.conf
Normal file
22
monitoring/collectd-extensions/src/cpu.conf
Normal file
@ -0,0 +1,22 @@
|
||||
# For stock plugin only
|
||||
# Uncomment to compare stock to tiS plugin readings
|
||||
# ---------------------
|
||||
# <Plugin cpu>
|
||||
# ReportByCpu false
|
||||
# ReportByState false
|
||||
# ValuesPercentage true
|
||||
# </Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "cpu">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 90.00
|
||||
FailureMax 95.00
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
253
monitoring/collectd-extensions/src/cpu.py
Executable file
253
monitoring/collectd-extensions/src/cpu.py
Executable file
@ -0,0 +1,253 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
||||
#
|
||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
||||
# platform core usable since the previous sample.
|
||||
#
|
||||
# Init Function:
|
||||
# - if 'compute_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import time
|
||||
import collectd
|
||||
|
||||
debug = False
|
||||
|
||||
PASS = 0
|
||||
FAIL = 1
|
||||
|
||||
PATH = '/proc/cpuinfo'
|
||||
COMPUTE_RESERVED_CONF = '/etc/nova/compute_reserved.conf'
|
||||
|
||||
PLUGIN = 'platform cpu usage plugin'
|
||||
|
||||
|
||||
# CPU Control class
|
||||
class CPU:
|
||||
hostname = "" # hostname for sample notification message
|
||||
usage = float(0.0) # float value of cpu usage
|
||||
|
||||
processors = int(0) # number of processors for all cpus case
|
||||
cpu_list = [] # list of CPUs to calculate combined usage for
|
||||
cpu_time = [] # schedstat time for each CPU
|
||||
cpu_time_last = [] # last schedstat time for each CPU
|
||||
time_last = float(0.0) # float of the time the last sample was taken
|
||||
|
||||
def log_error(self, err_str):
|
||||
""" Print an error log with plugin name prefixing the log """
|
||||
|
||||
collectd.error("%s %s" % (PLUGIN, err_str))
|
||||
|
||||
# Instantiate the class
|
||||
c = CPU()
|
||||
|
||||
|
||||
# The collectd configuration interface
|
||||
# collectd needs this defined ; but not used/needed.
|
||||
def config_func(config):
|
||||
collectd.info('%s config function' % PLUGIN)
|
||||
|
||||
|
||||
# Get the platform cpu list and number of cpus reported by /proc/cpuinfo
|
||||
def init_func():
|
||||
# get current hostname
|
||||
c.hostname = os.uname()[1]
|
||||
|
||||
collectd.info('%s init function for %s' % (PLUGIN, c.hostname))
|
||||
|
||||
raw_list = ""
|
||||
if os.path.exists(COMPUTE_RESERVED_CONF):
|
||||
with open(COMPUTE_RESERVED_CONF, 'r') as infile:
|
||||
for line in infile:
|
||||
if 'PLATFORM_CPU_LIST' in line:
|
||||
val = line.split("=")
|
||||
raw_list = val[1].strip('\n')[1:-1].strip('"')
|
||||
break
|
||||
if raw_list:
|
||||
|
||||
# Convert the cpu list fetched from the compute
|
||||
# reserved file into an integer list.
|
||||
# Handle mix of number list #,# and number range #-#
|
||||
split_list = raw_list.split(',')
|
||||
if debug:
|
||||
collectd.info('%s split list: %s' % (PLUGIN, split_list))
|
||||
for cpu in split_list:
|
||||
if cpu.find('-') == -1:
|
||||
# add individual cpu # with assumed ',' delimiter
|
||||
c.cpu_list.append(int(cpu))
|
||||
else:
|
||||
# add all in range #-#
|
||||
cpu_range = cpu.split('-')
|
||||
if len(cpu_range) == 2:
|
||||
first = int(cpu_range[0])
|
||||
last = int(cpu_range[1]) + 1
|
||||
# add each
|
||||
for i in list(range(first, last)):
|
||||
c.cpu_list.append(i)
|
||||
|
||||
# with the full CPU list in hand we can now just read their samples
|
||||
if debug:
|
||||
collectd.info('%s full cpu list: %s' %
|
||||
(PLUGIN, c.cpu_list))
|
||||
|
||||
try:
|
||||
f = open('/proc/cpuinfo')
|
||||
except EnvironmentError as e:
|
||||
collectd.error(str(e), UserWarning)
|
||||
else:
|
||||
|
||||
if len(c.cpu_list) == 0:
|
||||
_want_all_cpus = True
|
||||
else:
|
||||
_want_all_cpus = False
|
||||
|
||||
c.processors = 0
|
||||
for line in f:
|
||||
name_value = [s.strip() for s in line.split(':', 1)]
|
||||
if len(name_value) != 2:
|
||||
continue
|
||||
|
||||
name, value = name_value
|
||||
if 'rocessor' in name:
|
||||
if _want_all_cpus is True:
|
||||
c.cpu_list.append(int(c.processors))
|
||||
c.processors += 1
|
||||
|
||||
collectd.info('%s has found %d cpus total' %
|
||||
(PLUGIN, c.processors))
|
||||
collectd.info('%s monitoring %d cpus %s' %
|
||||
(PLUGIN, len(c.cpu_list), c.cpu_list))
|
||||
f.close()
|
||||
|
||||
|
||||
# Calculate the CPU usage sample
|
||||
def read_func():
|
||||
try:
|
||||
f = open('/proc/schedstat')
|
||||
except EnvironmentError as e:
|
||||
c.log_error('file open failed ; ' + str(e))
|
||||
return FAIL
|
||||
else:
|
||||
# schedstat time for each CPU
|
||||
c.cpu_time = []
|
||||
|
||||
# Loop over each line ...
|
||||
# get the output version ; only 15 is supported
|
||||
# get the cpu time from each line staring with 'cpux ....'
|
||||
for line in f:
|
||||
|
||||
# break each line into name/value pairs
|
||||
line_split = [s.strip() for s in line.split(' ', 1)]
|
||||
name, value = line_split
|
||||
|
||||
# get the output version.
|
||||
if 'ersion' in name:
|
||||
try:
|
||||
c.version = int(value)
|
||||
except ValueError as e:
|
||||
c.log_error('got invalid schedstat version ; ' + str(e))
|
||||
|
||||
# TODO: Consider exiting here and raising alarm.
|
||||
# Calling this type of exit will stop the plugin.
|
||||
# sys._exit()
|
||||
return FAIL
|
||||
|
||||
# only version 15 is supported
|
||||
if c.version == 15:
|
||||
if 'cpu' in name:
|
||||
# get the cpu number for each line
|
||||
if int(name.replace('cpu', '')) in c.cpu_list:
|
||||
_in_list = True
|
||||
else:
|
||||
_in_list = False
|
||||
|
||||
# get cpu time for each cpu that is valid
|
||||
if len(c.cpu_list) == 0 or _in_list is True:
|
||||
_schedstat = value
|
||||
value_split = value.split(' ')
|
||||
c.cpu_time.append(float(value_split[6]))
|
||||
if debug:
|
||||
collectd.info('%s %s schedstat is %s [%s]' %
|
||||
(PLUGIN, name, value_split[6],
|
||||
_schedstat))
|
||||
else:
|
||||
collectd.error('%s unsupported schedstat version [%d]' %
|
||||
(PLUGIN, c.version))
|
||||
return 0
|
||||
|
||||
f.close()
|
||||
|
||||
# Now that we have the cpu time recorded for each cpu
|
||||
_time_delta = float(0)
|
||||
_cpu_count = int(0)
|
||||
if len(c.cpu_time_last) == 0:
|
||||
c.time_last = time.time()
|
||||
if c.cpu_list:
|
||||
# This is a compute node.
|
||||
# Do not include vswitch or pinned cpus in calculation.
|
||||
for cpu in c.cpu_list:
|
||||
c.cpu_time_last.append(float(c.cpu_time[_cpu_count]))
|
||||
_cpu_count += 1
|
||||
if debug:
|
||||
collectd.info('%s cpu time ; first pass ; %s' %
|
||||
(PLUGIN, c.cpu_time))
|
||||
return PASS
|
||||
else:
|
||||
_time_this = time.time()
|
||||
_time_delta = _time_this - c.time_last
|
||||
c.total_avg_cpu = 0
|
||||
cpu_occupancy = []
|
||||
if debug:
|
||||
collectd.info('%s cpu time ; this pass ; %s -> %s' %
|
||||
(PLUGIN, c.cpu_time_last, c.cpu_time))
|
||||
|
||||
if c.cpu_list:
|
||||
# This is a compute node.
|
||||
# Do not include vswitch or pinned cpus in calculation.
|
||||
for cpu in c.cpu_list:
|
||||
if cpu >= c.processors:
|
||||
c.log_error(' got out of range cpu number')
|
||||
else:
|
||||
_delta = (c.cpu_time[_cpu_count] - c.cpu_time_last[_cpu_count])
|
||||
_delta = _delta / 1000000 / _time_delta
|
||||
cpu_occupancy.append(float((100*(_delta))/1000))
|
||||
c.total_avg_cpu += cpu_occupancy[_cpu_count]
|
||||
if debug:
|
||||
collectd.info('%s cpu %d - count:%d [%s]' %
|
||||
(PLUGIN, cpu, _cpu_count, cpu_occupancy))
|
||||
_cpu_count += 1
|
||||
|
||||
else:
|
||||
collectd.info('%s no cpus to monitor' % PLUGIN)
|
||||
return 0
|
||||
|
||||
c.usage = c.total_avg_cpu / _cpu_count
|
||||
if debug:
|
||||
collectd.info('%s reports %.2f %% usage (averaged)' %
|
||||
(PLUGIN, c.usage))
|
||||
|
||||
# Prepare for next audit ; mode now to last
|
||||
# c.cpu_time_last = []
|
||||
c.cpu_time_last = c.cpu_time
|
||||
c.time_last = _time_this
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=c.hostname)
|
||||
val.plugin = 'cpu'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[c.usage])
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
38
monitoring/collectd-extensions/src/df.conf
Normal file
38
monitoring/collectd-extensions/src/df.conf
Normal file
@ -0,0 +1,38 @@
|
||||
<Plugin df>
|
||||
ValuesPercentage true
|
||||
IgnoreSelected false
|
||||
ReportByDevice false
|
||||
ReportInodes false
|
||||
ValuesAbsolute false
|
||||
MountPoint "/"
|
||||
MountPoint "/tmp"
|
||||
MountPoint "/dev"
|
||||
MountPoint "/dev/shm"
|
||||
MountPoint "/var/run"
|
||||
MountPoint "/var/log"
|
||||
MountPoint "/var/lock"
|
||||
MountPoint "/boot"
|
||||
MountPoint "/scratch"
|
||||
MountPoint "/opt/cgcs"
|
||||
MountPoint "/opt/platform"
|
||||
MountPoint "/opt/extension"
|
||||
MountPoint "/etc/nova/instances"
|
||||
MountPoint "/var/lib/rabbitmq"
|
||||
MountPoint "/var/lib/postgresql"
|
||||
MountPoint "/var/lib/ceph/mon"
|
||||
MountPoint "/opt/backups"
|
||||
</Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "df">
|
||||
<Type "percent_bytes">
|
||||
Instance "used"
|
||||
WarningMax 80.00
|
||||
FailureMax 90.00
|
||||
Persist true
|
||||
PersistOK true
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
13
monitoring/collectd-extensions/src/example.conf
Normal file
13
monitoring/collectd-extensions/src/example.conf
Normal file
@ -0,0 +1,13 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "example">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 51.00
|
||||
FailureMax 75.00
|
||||
Hits 1
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
75
monitoring/collectd-extensions/src/example.py
Executable file
75
monitoring/collectd-extensions/src/example.py
Executable file
@ -0,0 +1,75 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import os
|
||||
import random
|
||||
import collectd
|
||||
|
||||
PLUGIN = 'random number plugin'
|
||||
|
||||
# static variables
|
||||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class ExampleObject:
|
||||
hostname = ""
|
||||
plugin_data = ['1', '100']
|
||||
|
||||
|
||||
obj = ExampleObject()
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""
|
||||
Configure the plugin
|
||||
"""
|
||||
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'data':
|
||||
obj.plugin_data = str(val).split(' ')
|
||||
collectd.info("%s configured data '%d:%d'" %
|
||||
(PLUGIN,
|
||||
int(obj.plugin_data[0]),
|
||||
int(obj.plugin_data[1])))
|
||||
return 0
|
||||
|
||||
collectd.info('%s config function' % PLUGIN)
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
|
||||
# do the work to create the sample
|
||||
low = int(obj.plugin_data[0])
|
||||
high = int(obj.plugin_data[1])
|
||||
sample = random.randint(low, high)
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'example'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[sample])
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
1191
monitoring/collectd-extensions/src/fm_notifier.py
Executable file
1191
monitoring/collectd-extensions/src/fm_notifier.py
Executable file
File diff suppressed because it is too large
Load Diff
13
monitoring/collectd-extensions/src/interface.conf
Normal file
13
monitoring/collectd-extensions/src/interface.conf
Normal file
@ -0,0 +1,13 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "interface">
|
||||
<Type "absolute">
|
||||
Instance "state"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMin 50
|
||||
FailureMin 0
|
||||
# Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
129
monitoring/collectd-extensions/src/interface.py
Executable file
129
monitoring/collectd-extensions/src/interface.py
Executable file
@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This is the Host Interface Monitor plugin for Collectd.
|
||||
#
|
||||
# Only mgmnt , infra and oam interfaces are supported with the following
|
||||
# mapping specified in /etc/platform/platform.conf
|
||||
#
|
||||
# mgmnt - management_interface | all hosts | manditory
|
||||
# infa - infrastructure_interface | any host | optional
|
||||
# oam - oam_interface | controller | manditory
|
||||
#
|
||||
# This plugin reports link state inb the following way.
|
||||
#
|
||||
# The plugin init function learns interface names from platform.conf
|
||||
#
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import random
|
||||
import collectd
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
PLUGIN = 'interface plugin'
|
||||
|
||||
# static variables
|
||||
|
||||
PLATFORM_CONF_MGMNT_LABEL = "management_interface="
|
||||
PLATFORM_CONF_INFRA_LABEL = "infrastructure_interface="
|
||||
PLATFORM_CONF_OAM_LABEL = "oam_interface="
|
||||
|
||||
NETWORK_MGMNT = 'mgmnt'
|
||||
NETWORK_INFRA = 'infra'
|
||||
NETWORK_OAM = 'oam'
|
||||
|
||||
|
||||
class iface:
|
||||
def __init__(self, n, m, s):
|
||||
self.master = {'network': n, 'name': m, 'state': 'down', 'slaves': s}
|
||||
self.slave1 = {}
|
||||
self.slave2 = {}
|
||||
self.state = int(100)
|
||||
|
||||
|
||||
class object:
|
||||
hostname = ''
|
||||
|
||||
def __init__(self):
|
||||
self.NETWORKS = {}
|
||||
self.NETWORKS[NETWORK_MGMNT] = None
|
||||
self.NETWORKS[NETWORK_INFRA] = None
|
||||
self.NETWORKS[NETWORK_OAM] = None
|
||||
|
||||
obj = object()
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""
|
||||
Configure the plugin
|
||||
"""
|
||||
|
||||
collectd.debug('%s config function' % PLUGIN)
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
|
||||
# get the master interface names from /etc/platform/platform.conf
|
||||
with open(tsc.PLATFORM_CONF_FILE, 'r') as infile:
|
||||
for line in infile:
|
||||
|
||||
# Management Interface
|
||||
if PLATFORM_CONF_MGMNT_LABEL in line:
|
||||
name = line.split('=')[1].replace('\n', '')
|
||||
obj.NETWORKS[NETWORK_MGMNT] = iface(NETWORK_MGMNT, name, 0)
|
||||
collectd.info("%s monitoring mgmnt interface : %s" %
|
||||
(PLUGIN,
|
||||
obj.NETWORKS[NETWORK_MGMNT].master['name']))
|
||||
|
||||
# Infrastructure Interface
|
||||
elif PLATFORM_CONF_INFRA_LABEL in line:
|
||||
name = line.split('=')[1].replace('\n', '')
|
||||
obj.NETWORKS[NETWORK_INFRA] = iface(NETWORK_INFRA, name, 0)
|
||||
collectd.info("%s monitoring infra interface : %s" %
|
||||
(PLUGIN,
|
||||
obj.NETWORKS[NETWORK_INFRA].master['name']))
|
||||
|
||||
# OAM Interface
|
||||
elif PLATFORM_CONF_OAM_LABEL in line:
|
||||
name = line.split('=')[1].replace('\n', '')
|
||||
obj.NETWORKS[NETWORK_OAM] = iface(NETWORK_OAM, name, 0)
|
||||
collectd.info("%s monitoring oam interface: %s" %
|
||||
(PLUGIN,
|
||||
obj.NETWORKS[NETWORK_OAM].master['name']))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
|
||||
if obj.NETWORKS[NETWORK_MGMNT].state == 0:
|
||||
obj.NETWORKS[NETWORK_MGMNT].state = 100
|
||||
else:
|
||||
obj.NETWORKS[NETWORK_MGMNT].state -= 25
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'interface'
|
||||
val.plugin_instance = 'mgmnt'
|
||||
val.type = 'absolute'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[obj.NETWORKS[NETWORK_MGMNT].state])
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
21
monitoring/collectd-extensions/src/memory.conf
Normal file
21
monitoring/collectd-extensions/src/memory.conf
Normal file
@ -0,0 +1,21 @@
|
||||
# For stock plugin only
|
||||
# Uncomment to compare stock to tiS plugin readings
|
||||
# ---------------------
|
||||
# <Plugin memory>
|
||||
# ValuesAbsolute false
|
||||
# ValuesPercentage true
|
||||
# </Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "memory">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 80.00
|
||||
FailureMax 90.00
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
181
monitoring/collectd-extensions/src/memory.py
Executable file
181
monitoring/collectd-extensions/src/memory.py
Executable file
@ -0,0 +1,181 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
||||
#
|
||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
||||
# platform core usable since the previous sample.
|
||||
#
|
||||
# Init Function:
|
||||
# - if 'compute_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import collectd
|
||||
|
||||
debug = False
|
||||
|
||||
# general return codes
|
||||
PASS = 0
|
||||
FAIL = 1
|
||||
|
||||
PLUGIN = 'platform memory usage'
|
||||
|
||||
|
||||
# CPU Control class
|
||||
class MEM:
|
||||
hostname = "" # hostname for sample notification message
|
||||
cmd = '/proc/meminfo' # the query comment
|
||||
value = float(0.0) # float value of memory usage
|
||||
|
||||
# meminfo values we care about
|
||||
memTotal_kB = 0
|
||||
memFree_kB = 0
|
||||
buffers = 0
|
||||
cached = 0
|
||||
SReclaimable = 0
|
||||
CommitLimit = 0
|
||||
Committed_AS = 0
|
||||
HugePages_Total = 0
|
||||
Hugepagesize = 0
|
||||
AnonPages = 0
|
||||
|
||||
# derived values
|
||||
avail = 0
|
||||
total = 0
|
||||
strict = 0
|
||||
|
||||
|
||||
# Instantiate the class
|
||||
obj = MEM()
|
||||
|
||||
|
||||
def config_func(config):
|
||||
"""
|
||||
Configure the memory usage plugin
|
||||
"""
|
||||
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'path':
|
||||
obj.cmd = str(val)
|
||||
collectd.info("%s configured query command: '%s'" %
|
||||
(PLUGIN, obj.cmd))
|
||||
return 0
|
||||
|
||||
collectd.info("%s no config command provided ; "
|
||||
"defaulting to '%s'" %
|
||||
(PLUGIN, obj.cmd))
|
||||
|
||||
|
||||
# Get the platform cpu list and number of cpus reported by /proc/cpuinfo
|
||||
def init_func():
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
|
||||
fn = '/proc/sys/vm/overcommit_memory'
|
||||
if os.path.exists(fn):
|
||||
with open(fn, 'r') as infile:
|
||||
for line in infile:
|
||||
obj.strict = int(line)
|
||||
break
|
||||
|
||||
collectd.info("%s strict:%d" % (PLUGIN, obj.strict))
|
||||
|
||||
|
||||
# Calculate the CPU usage sample
|
||||
def read_func():
|
||||
meminfo = {}
|
||||
try:
|
||||
with open(obj.cmd) as fd:
|
||||
for line in fd:
|
||||
meminfo[line.split(':')[0]] = line.split(':')[1].strip()
|
||||
|
||||
except EnvironmentError as e:
|
||||
collectd.error("%s unable to read from %s ; str(e)" %
|
||||
(PLUGIN, str(e)))
|
||||
return FAIL
|
||||
|
||||
# remove the 'unit' (kB) suffix that might be on some of the lines
|
||||
for line in meminfo:
|
||||
# remove the units from the value read
|
||||
value_unit = [u.strip() for u in meminfo[line].split(' ', 1)]
|
||||
if len(value_unit) == 2:
|
||||
value, unit = value_unit
|
||||
meminfo[line] = float(value)
|
||||
else:
|
||||
meminfo[line] = float(meminfo[line])
|
||||
|
||||
obj.memTotal_kB = float(meminfo['MemTotal'])
|
||||
obj.memFree_kB = float(meminfo['MemFree'])
|
||||
obj.buffers = float(meminfo['Buffers'])
|
||||
obj.cached = float(meminfo['Cached'])
|
||||
obj.SReclaimable = float(meminfo['SReclaimable'])
|
||||
obj.CommitLimit = float(meminfo['CommitLimit'])
|
||||
obj.Committed_AS = float(meminfo['Committed_AS'])
|
||||
obj.HugePages_Total = float(meminfo['HugePages_Total'])
|
||||
obj.Hugepagesize = float(meminfo['Hugepagesize'])
|
||||
obj.AnonPages = float(meminfo['AnonPages'])
|
||||
|
||||
# collectd.info("%s /proc/meminfo: %s" % (PLUGIN, meminfo))
|
||||
# collectd.info("%s ---------------------------" % PLUGIN)
|
||||
# collectd.info("%s memTotal_kB : %f" % (PLUGIN, obj.memTotal_kB))
|
||||
# collectd.info("%s memFree_kB : %f" % (PLUGIN, obj.memFree_kB))
|
||||
# collectd.info("%s Buffers : %f" % (PLUGIN, obj.buffers))
|
||||
# collectd.info("%s Cached : %f" % (PLUGIN, obj.cached))
|
||||
# collectd.info("%s SReclaimable : %f" % (PLUGIN, obj.SReclaimable))
|
||||
# collectd.info("%s CommitLimit : %f" % (PLUGIN, obj.CommitLimit))
|
||||
# collectd.info("%s Committed_AS : %f" % (PLUGIN, obj.Committed_AS))
|
||||
# collectd.info("%s HugePages_Total: %f" % (PLUGIN, obj.HugePages_Total))
|
||||
# collectd.info("%s AnonPages : %f" % (PLUGIN, obj.AnonPages))
|
||||
|
||||
obj.avail = float(float(obj.memFree_kB) +
|
||||
float(obj.buffers) +
|
||||
float(obj.cached) +
|
||||
float(obj.SReclaimable))
|
||||
obj.total = float(float(obj.avail) +
|
||||
float(obj.AnonPages))
|
||||
|
||||
# collectd.info("%s ---------------------------" % PLUGIN)
|
||||
# collectd.info("%s memTotal: %d" % (PLUGIN, obj.avail))
|
||||
# collectd.info("%s memAvail: %d" % (PLUGIN, obj.total))
|
||||
|
||||
if obj.strict == 1:
|
||||
obj.value = float(float(obj.Committed_AS) / float(obj.CommitLimit))
|
||||
else:
|
||||
obj.value = float(float(obj.AnonPages) / float(obj.total))
|
||||
|
||||
obj.value = float(float(obj.value) * 100)
|
||||
|
||||
# get numa node memory
|
||||
# numa_node_files = []
|
||||
# fn = "/sys/devices/system/node/"
|
||||
# files = os.listdir(fn)
|
||||
# for file in files:
|
||||
# if 'node' in file:
|
||||
# numa_node_files.append(fn + file)
|
||||
# collectd.info("%s numa node files: %s" %
|
||||
# (PLUGIN, numa_node_files))
|
||||
|
||||
collectd.debug('%s reports %.2f %% usage' %
|
||||
(PLUGIN, obj.value))
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'memory'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[obj.value])
|
||||
|
||||
return PASS
|
||||
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
379
monitoring/collectd-extensions/src/mtce_notifier.py
Executable file
379
monitoring/collectd-extensions/src/mtce_notifier.py
Executable file
@ -0,0 +1,379 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This file is the collectd 'Maintenance' Notifier.
|
||||
#
|
||||
# Collects provides information about each event as an object passed to the
|
||||
# notification handler ; the notification object.
|
||||
#
|
||||
# object.host - the hostname
|
||||
#
|
||||
# object.plugin - the name of the plugin aka resource
|
||||
# object.plugin_instance - plugin instance string i.e. say mountpoint
|
||||
# for df plugin
|
||||
# object.type, - the unit i.e. percent or absolute
|
||||
# object.type_instance - the attribute i.e. free, used, etc
|
||||
#
|
||||
# object.severity - a integer value 0=OK , 1=warning, 2=failure
|
||||
# object.message - a log-able message containing the above along
|
||||
# with the value
|
||||
#
|
||||
# This notifier manages requesting mtce to assert or clear its collectd
|
||||
# host-degrade-cause flag based on notification messages sent from collectd.
|
||||
#
|
||||
# Messages to maintenance are throttled ONE_EVERY while this state is the
|
||||
# same as last state.
|
||||
#
|
||||
# Message is sent on every state change
|
||||
# from clear to assert or
|
||||
# from assert to clear
|
||||
#
|
||||
# See code comments for details.
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# Import list
|
||||
|
||||
import os
|
||||
import socket
|
||||
import collectd
|
||||
|
||||
# This plugin name
|
||||
PLUGIN = 'degrade notifier'
|
||||
|
||||
# collectd severity definitions ;
|
||||
# Note: can't seem to pull then in symbolically with a header
|
||||
NOTIF_FAILURE = 1
|
||||
NOTIF_WARNING = 2
|
||||
NOTIF_OKAY = 4
|
||||
|
||||
# generic return codes
|
||||
PASS = 0
|
||||
FAIL = 1
|
||||
|
||||
# default mtce port.
|
||||
# ... with configuration override
|
||||
MTCE_CMD_RX_PORT = 2101
|
||||
|
||||
# same state message throttle count.
|
||||
# ... only send the the degrade message every 'this' number
|
||||
# while the state of assert or clear remains the same.
|
||||
ONE_EVERY = 10
|
||||
|
||||
PLUGIN__DF = 'df'
|
||||
PLUGIN__MEM = 'memory'
|
||||
PLUGIN__CPU = 'cpu'
|
||||
PLUGIN_INTERFACE = 'interface'
|
||||
PLUGIN__EXAMPLE = 'example'
|
||||
|
||||
|
||||
# The collectd Maintenance Notifier Object
|
||||
class collectdMtceNotifierObject:
|
||||
|
||||
def __init__(self, port):
|
||||
"""
|
||||
collectdMtceNotifierObject Class constructor
|
||||
"""
|
||||
# default maintenance port
|
||||
self.port = port
|
||||
self.addr = None
|
||||
|
||||
# specifies the protocol family to use when messaging maintenance.
|
||||
# if system is IPV6, then that is learned and this 'protocol' is
|
||||
# updated with AF_INET6
|
||||
self.protocol = socket.AF_INET
|
||||
|
||||
# List of plugin names that require degrade for specified severity.
|
||||
self.degrade_list__failure = [PLUGIN__DF,
|
||||
PLUGIN__MEM,
|
||||
PLUGIN__CPU,
|
||||
PLUGIN_INTERFACE,
|
||||
PLUGIN__EXAMPLE]
|
||||
self.degrade_list__warning = []
|
||||
|
||||
# the running list of resources that require degrade.
|
||||
# a degrade clear message is sent whenever this list is empty.
|
||||
# a degrade assert message is sent whenever this list is not empty.
|
||||
self.degrade_list = []
|
||||
|
||||
# throttle down sending of duplicate degrade assert/clear messages
|
||||
self.last_state = "undef"
|
||||
self.msg_throttle = 0
|
||||
|
||||
|
||||
# Instantiate the mtce_notifier object
|
||||
# This object persists from notificaiton to notification
|
||||
obj = collectdMtceNotifierObject(MTCE_CMD_RX_PORT)
|
||||
|
||||
|
||||
def _get_active_controller_ip():
|
||||
"""
|
||||
Get the active controller host IP
|
||||
"""
|
||||
|
||||
try:
|
||||
obj.addr = socket.getaddrinfo('controller', None)[0][4][0]
|
||||
collectd.info("%s controller ip: %s" % (PLUGIN, obj.addr))
|
||||
except Exception as ex:
|
||||
obj.addr = None
|
||||
collectd.error("%s failed to get controller ip ; %s" %
|
||||
(PLUGIN, str(ex)))
|
||||
return 0
|
||||
|
||||
|
||||
def _df_instance_to_path(df_inst):
|
||||
"""
|
||||
Convert a df instance name to a mountpoint
|
||||
"""
|
||||
|
||||
# df_root is not a dynamic file system. Ignore that one.
|
||||
if df_inst == 'df_root':
|
||||
return '/'
|
||||
else:
|
||||
# For all others replace all '-' with '/'
|
||||
return('/' + df_inst[3:].replace('-', '/'))
|
||||
|
||||
|
||||
# This function removes degraded file systems that are no longer present.
|
||||
def _clear_degrade_for_missing_filesystems():
|
||||
"""
|
||||
Remove degraded file systems that are no longer mounted or present.
|
||||
"""
|
||||
|
||||
for df_inst in obj.degrade_list:
|
||||
|
||||
# Only file system plugins are looked at.
|
||||
# File system plugin instance names are prefixed with 'df_'
|
||||
# as the first 3 chars in the instance name.
|
||||
if df_inst[0:3] == 'df_':
|
||||
path = _df_instance_to_path(df_inst)
|
||||
|
||||
# check the mount point.
|
||||
# if the mount point no longer exists then remove
|
||||
# this instance from the degrade list.
|
||||
if os.path.ismount(path) is False:
|
||||
collectd.info("%s clearing degrade for missing %s ; %s" %
|
||||
(PLUGIN, path, obj.degrade_list))
|
||||
obj.degrade_list.remove(df_inst)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# The collectd configuration interface
|
||||
#
|
||||
# Used to configure the maintenance port.
|
||||
# key = 'port'
|
||||
# val = port number
|
||||
#
|
||||
def config_func(config):
|
||||
"""
|
||||
Configure the maintenance degrade notifier plugin.
|
||||
"""
|
||||
|
||||
collectd.info('%s config function' % PLUGIN)
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'port':
|
||||
obj.port = int(val)
|
||||
collectd.info("%s configured mtce port: %d" %
|
||||
(PLUGIN, obj.port))
|
||||
return 0
|
||||
|
||||
obj.port = MTCE_CMD_RX_PORT
|
||||
collectd.error("%s no mtce port provided ; defaulting to %d" %
|
||||
(PLUGIN, obj.port))
|
||||
|
||||
|
||||
# Collectd calls this function on startup.
|
||||
def init_func():
|
||||
"""
|
||||
Collectd Mtce Notifier Initialization Function
|
||||
"""
|
||||
|
||||
collectd.debug("%s init function" % PLUGIN)
|
||||
|
||||
|
||||
# This is the Notifier function that is called by collectd.
|
||||
#
|
||||
# Handling steps are
|
||||
#
|
||||
# 1. build resource name from notification object.
|
||||
# 2. check resource against severity lists.
|
||||
# 3. manage this instance's degrade state.
|
||||
# 4. send mtcAgent the degrade state message.
|
||||
#
|
||||
def notifier_func(nObject):
|
||||
"""
|
||||
Collectd Mtce Notifier Handler Function
|
||||
"""
|
||||
|
||||
# Create the resource name from the notifier object.
|
||||
# format: <plugin name>_<plugin_instance_name>
|
||||
resource = nObject.plugin
|
||||
if nObject.plugin_instance:
|
||||
resource += "_" + nObject.plugin_instance
|
||||
|
||||
# This block looks at the current notification severity
|
||||
# and manages the degrade_list.
|
||||
# If the specified plugin name exists in each of the warnings
|
||||
# or failure lists and there is a current severity match then
|
||||
# add that resource instance to the degrade list.
|
||||
# Conversly if this notification is OKAY then make sure this
|
||||
# resource instance is not in the degrade list (remove it if it is)
|
||||
if nObject.severity is NOTIF_OKAY:
|
||||
if obj.degrade_list and resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
|
||||
elif nObject.severity is NOTIF_FAILURE:
|
||||
if obj.degrade_list__failure:
|
||||
if nObject.plugin in obj.degrade_list__failure:
|
||||
if resource not in obj.degrade_list:
|
||||
# handle dynamic filesystems going missing over a swact
|
||||
# or unmount and being reported as a transient error by
|
||||
# the df plugin. Don't add it to the failed list if the
|
||||
# mountpoint is gone.
|
||||
add = True
|
||||
if nObject.plugin == PLUGIN__DF:
|
||||
path = _df_instance_to_path(resource)
|
||||
add = os.path.ismount(path)
|
||||
if add is True:
|
||||
collectd.debug("%s %s added to degrade list" %
|
||||
(PLUGIN, resource))
|
||||
obj.degrade_list.append(resource)
|
||||
else:
|
||||
# If severity is failure and no failures cause degrade
|
||||
# then make sure this plugin is not in the degrade list,
|
||||
# Should never occur.
|
||||
if resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
|
||||
elif nObject.severity is NOTIF_WARNING:
|
||||
if obj.degrade_list__warning:
|
||||
if nObject.plugin in obj.degrade_list__warning:
|
||||
if resource not in obj.degrade_list:
|
||||
# handle dynamic filesystems going missing over a swact
|
||||
# or unmount and being reported as a transient error by
|
||||
# the df plugin. Don't add it to the failed list if the
|
||||
# mountpoint is gone.
|
||||
add = True
|
||||
if nObject.plugin == PLUGIN__DF:
|
||||
path = _df_instance_to_path(resource)
|
||||
add = os.path.ismount(path)
|
||||
if add is True:
|
||||
collectd.debug("%s %s added to degrade list" %
|
||||
(PLUGIN, resource))
|
||||
obj.degrade_list.append(resource)
|
||||
else:
|
||||
# If severity is warning and no warnings cause degrade
|
||||
# then make sure this plugin is not in the degrade list.
|
||||
# Should never occur..
|
||||
if resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
else:
|
||||
collectd.info("%s unsupported severity %d" %
|
||||
(PLUGIN, nObject.severity))
|
||||
return FAIL
|
||||
|
||||
# running counter of notifications.
|
||||
obj.msg_throttle += 1
|
||||
|
||||
# Support for Dynamic File Systems
|
||||
# --------------------------------
|
||||
# Some active controller mounted filesystems can become
|
||||
# unmounted under the watch of collectd. This can occur
|
||||
# as a result of a Swact. If an 'degrade' is raised at the
|
||||
# time an fs disappears then that state can become stuck
|
||||
# active until the next Swact. This call handles this case.
|
||||
#
|
||||
# Audit file system presence every time we get the
|
||||
# notification for the root file system.
|
||||
# Depending on the root filesystem always being there.
|
||||
if nObject.plugin == 'df' \
|
||||
and nObject.plugin_instance == 'root' \
|
||||
and len(obj.degrade_list):
|
||||
_clear_degrade_for_missing_filesystems()
|
||||
|
||||
# If degrade list is empty then a clear state is sent to maintenance.
|
||||
# If degrade list is NOT empty then an assert state is sent to maintenance
|
||||
# For logging and to ease debug the code below will create a list of
|
||||
# degraded resource instances to be included in the message to maintenance
|
||||
# for mtcAgent to optionally log it.
|
||||
resources = ""
|
||||
if obj.degrade_list:
|
||||
# loop over the list,
|
||||
# limit the degraded resource list being sent to mtce to 5
|
||||
for r in obj.degrade_list[0:1:5]:
|
||||
resources += r + ','
|
||||
resources = resources[:-1]
|
||||
state = "assert"
|
||||
else:
|
||||
state = "clear"
|
||||
|
||||
# Message throttling ....
|
||||
|
||||
# Avoid sending the same last state message for up to ONE_EVERY count.
|
||||
# Just reduce load on mtcAgent
|
||||
if obj.last_state == state and obj.msg_throttle < ONE_EVERY:
|
||||
return 0
|
||||
|
||||
# if the degrade state has changed then log it and proceed
|
||||
if obj.last_state != state:
|
||||
if obj.last_state != "undef":
|
||||
collectd.info("%s degrade %s %s" %
|
||||
(PLUGIN,
|
||||
state,
|
||||
obj.degrade_list))
|
||||
|
||||
# Save state for next time
|
||||
obj.last_state = state
|
||||
|
||||
# Clear the message throttle counter
|
||||
obj.msg_throttle = 0
|
||||
|
||||
# Send the degrade state ; assert or clear message to mtcAgent.
|
||||
# If we get a send failure then log it and set the addr to None
|
||||
# so it forces us to refresh the controller address on the next
|
||||
# notification
|
||||
try:
|
||||
mtce_socket = socket.socket(obj.protocol, socket.SOCK_DGRAM)
|
||||
if mtce_socket:
|
||||
if obj.addr is None:
|
||||
_get_active_controller_ip()
|
||||
if obj.addr is None:
|
||||
return 0
|
||||
|
||||
# Create the Maintenance message.
|
||||
message = "{\"service\":\"collectd_notifier\","
|
||||
message += "\"hostname\":\"" + nObject.host + "\","
|
||||
message += "\"degrade\":\"" + state + "\","
|
||||
message += "\"resource\":\"" + resources + "\"}"
|
||||
collectd.debug("%s: %s" % (PLUGIN, message))
|
||||
|
||||
mtce_socket.settimeout(1.0)
|
||||
mtce_socket.sendto(message, (obj.addr, obj.port))
|
||||
mtce_socket.close()
|
||||
else:
|
||||
collectd.error("%s %s failed to open socket (%s)" %
|
||||
(PLUGIN, resource, obj.addr))
|
||||
except socket.error as e:
|
||||
if e.args[0] == socket.EAI_ADDRFAMILY:
|
||||
# Handle IPV4 to IPV6 switchover:
|
||||
obj.protocol = socket.AF_INET6
|
||||
collectd.info("%s %s ipv6 addressing (%s)" %
|
||||
(PLUGIN, resource, obj.addr))
|
||||
else:
|
||||
collectd.error("%s %s socket error (%s) ; %s" %
|
||||
(PLUGIN, resource, obj.addr, str(e)))
|
||||
# try self correction
|
||||
obj.addr = None
|
||||
obj.protocol = socket.AF_INET
|
||||
|
||||
return 0
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_notification(notifier_func)
|
17
monitoring/collectd-extensions/src/ntpq.conf
Normal file
17
monitoring/collectd-extensions/src/ntpq.conf
Normal file
@ -0,0 +1,17 @@
|
||||
#<Plugin "ntpq">
|
||||
# Interval 60
|
||||
#</Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "ntpq">
|
||||
<Type "absolute">
|
||||
Instance "state"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMin 1
|
||||
FailureMin 0
|
||||
# Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
195
monitoring/collectd-extensions/src/ntpq.py
Executable file
195
monitoring/collectd-extensions/src/ntpq.py
Executable file
@ -0,0 +1,195 @@
|
||||
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import uuid
|
||||
import collectd
|
||||
from fm_api import constants as fm_constants
|
||||
from fm_api import fm_api
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
api = fm_api.FaultAPIs()
|
||||
|
||||
PLUGIN = 'NTP query plugin'
|
||||
|
||||
PLUGIN_SCRIPT = '/etc/rmonfiles.d/query_ntp_servers.sh'
|
||||
PLUGIN_RESULT = '/tmp/ntpq_server_info'
|
||||
|
||||
# static variables
|
||||
ALARM_ID__NTPQ = "100.114"
|
||||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class NtpqObject:
|
||||
hostname = ''
|
||||
base_eid = ''
|
||||
severity = 'clear'
|
||||
suppression = True
|
||||
service_affecting = False
|
||||
status = 0
|
||||
last_result = ''
|
||||
this_result = ''
|
||||
id = ALARM_ID__NTPQ
|
||||
name = "NTP"
|
||||
alarm_type = fm_constants.FM_ALARM_TYPE_1
|
||||
cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
|
||||
repair = "Monitor and if condition persists, "
|
||||
repair += "contact next level of support."
|
||||
|
||||
|
||||
obj = NtpqObject()
|
||||
|
||||
|
||||
def is_uuid_like(val):
|
||||
"""Returns validation of a value as a UUID."""
|
||||
try:
|
||||
return str(uuid.UUID(val)) == val
|
||||
except (TypeError, ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""
|
||||
Configure the plugin
|
||||
"""
|
||||
|
||||
collectd.debug('%s config function' % PLUGIN)
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
|
||||
# ntp query is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
obj.base_eid = 'host=' + obj.hostname + '.ntp'
|
||||
collectd.info("%s on %s with entity id '%s'" % PLUGIN, obj.hostname, obj.base_eid)
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
|
||||
# ntp query is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
result = int(0)
|
||||
# Query ntp
|
||||
try:
|
||||
result = os.system(PLUGIN_SCRIPT)
|
||||
except Exception as e:
|
||||
collectd.error("%s Could not run '%s' (%s)" %
|
||||
(PLUGIN, e))
|
||||
return 0
|
||||
|
||||
obj.status = int(result)/0x100
|
||||
|
||||
collectd.info("%s Query Result: %s" % (PLUGIN, obj.status))
|
||||
|
||||
if os.path.exists(PLUGIN_RESULT) is False:
|
||||
collectd.error("%s produced no result file '%s'" %
|
||||
(PLUGIN, PLUGIN_RESULT))
|
||||
return 0
|
||||
|
||||
# read the query result file.
|
||||
# format is in the PLUGIN_SCRIPT file.
|
||||
# This code only wants the second line.
|
||||
# It contains list of unreachable ntp servers that need alarm management.
|
||||
count = 0
|
||||
with open(PLUGIN_RESULT, 'r') as infile:
|
||||
for line in infile:
|
||||
count += 1
|
||||
collectd.info("%s Query Result: %s" % (PLUGIN, line))
|
||||
if count == 0:
|
||||
collectd.error("%s produced empty result file '%s'" %
|
||||
(PLUGIN, PLUGIN_RESULT))
|
||||
return 0
|
||||
|
||||
sample = 1
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'ntpq'
|
||||
val.plugin_instance = 'some.ntp.server.ip'
|
||||
val.type = 'absolute'
|
||||
val.type_instance = 'state'
|
||||
val.dispatch(values=[sample])
|
||||
|
||||
severity = 'clear'
|
||||
obj.severity = 'clear'
|
||||
|
||||
# if there is no severity change then consider exiting
|
||||
if obj.severity == severity:
|
||||
|
||||
# unless the current severity is 'minor'
|
||||
if severity == 'minor':
|
||||
# TODO: check to see if the failing IP address is changed
|
||||
collectd.info("%s NEED TO CHECK IP ADDRESSES" % (PLUGIN))
|
||||
else:
|
||||
return 0
|
||||
|
||||
# if current severity is clear but previous severity is not then
|
||||
# prepare to clear the alarms
|
||||
if severity == 'clear':
|
||||
_alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
|
||||
|
||||
# TODO: loop over all raises alarms and clear them
|
||||
collectd.info("%s NEED CLEAR ALL ALARMS" % (PLUGIN))
|
||||
if api.clear_fault(obj.id, obj.base_eid) is False:
|
||||
collectd.error("%s %s:%s clear_fault failed" %
|
||||
(PLUGIN, obj.id, obj.base_eid))
|
||||
return 0
|
||||
|
||||
elif severity == 'major':
|
||||
reason = "NTP configuration does not contain any valid "
|
||||
reason += "or reachable NTP servers."
|
||||
eid = obj.base_eid
|
||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
else:
|
||||
# TODO: There can be up to 3 inacessable servers
|
||||
ip = 'some.server.ip.addr'
|
||||
reason = "NTP address "
|
||||
reason += ip
|
||||
reason += " is not a valid or a reachable NTP server."
|
||||
eid = obj.base_eid + '=' + ip
|
||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
||||
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=obj.id,
|
||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=eid,
|
||||
severity=fm_severity,
|
||||
reason_text=reason,
|
||||
alarm_type=obj.alarm_type,
|
||||
probable_cause=obj.cause,
|
||||
proposed_repair_action=obj.repair,
|
||||
service_affecting=obj.service_affecting,
|
||||
suppression=obj.suppression)
|
||||
|
||||
alarm_uuid = api.set_fault(fault)
|
||||
if is_uuid_like(alarm_uuid) is False:
|
||||
collectd.error("%s %s:%s set_fault failed:%s" %
|
||||
(PLUGIN, obj.id, eid, alarm_uuid))
|
||||
return 0
|
||||
|
||||
# TODO: clear the object alarm state
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
20
monitoring/collectd-extensions/src/python_plugins.conf
Normal file
20
monitoring/collectd-extensions/src/python_plugins.conf
Normal file
@ -0,0 +1,20 @@
|
||||
LoadPlugin python
|
||||
<Plugin python>
|
||||
ModulePath "/opt/collectd/extensions/python"
|
||||
Import "cpu"
|
||||
<Module "cpu">
|
||||
Path "/proc/cpuinfo"
|
||||
</Module>
|
||||
Import "memory"
|
||||
<Module "memory">
|
||||
Path "/proc/meminfo"
|
||||
</Module>
|
||||
# Import "example"
|
||||
# <Module "example">
|
||||
# Data "1 50"
|
||||
# </Module>
|
||||
# Import "interface"
|
||||
# Import "ntpq"
|
||||
LogTraces = true
|
||||
Encoding "utf-8"
|
||||
</Plugin>
|
10
monitoring/influxdb-extensions/PKG-INFO
Normal file
10
monitoring/influxdb-extensions/PKG-INFO
Normal file
@ -0,0 +1,10 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: influxdb-extensions
|
||||
Version: 1.0
|
||||
Summary: influxdb-extensions
|
||||
Home-page:
|
||||
Author: Windriver
|
||||
Author-email: info@windriver.com
|
||||
License: windriver
|
||||
Description: Titanium Cloud influxdb extensions.
|
||||
Platform: UNKNOWN
|
7
monitoring/influxdb-extensions/centos/build_srpm.data
Normal file
7
monitoring/influxdb-extensions/centos/build_srpm.data
Normal file
@ -0,0 +1,7 @@
|
||||
SRC_DIR="$PKG_BASE"
|
||||
|
||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
||||
$PKG_BASE/src/influxdb.conf.pmon \
|
||||
$PKG_BASE/src/influxdb.service"
|
||||
|
||||
TIS_PATCH_VER=1
|
@ -0,0 +1,46 @@
|
||||
Summary: Titanuim Server influxdb Extensions Package
|
||||
Name: influxdb-extensions
|
||||
Version: 1.0
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
License: windriver
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
|
||||
# create the files tarball
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
|
||||
source1: influxdb.service
|
||||
Source2: influxdb.conf.pmon
|
||||
|
||||
Requires: systemd
|
||||
Requires: influxdb
|
||||
Requires: /bin/systemctl
|
||||
|
||||
%description
|
||||
Titanium Cloud influxdb extensions
|
||||
|
||||
%define debug_package %{nil}
|
||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
|
||||
%install
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}/influxdb
|
||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
||||
|
||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
||||
install -m 600 %{SOURCE2} %{buildroot}%{_sysconfdir}/influxdb
|
||||
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%config(noreplace) %{local_unit_dir}/influxdb.service
|
||||
%{_sysconfdir}/influxdb/*
|
202
monitoring/influxdb-extensions/src/LICENSE
Normal file
202
monitoring/influxdb-extensions/src/LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
322
monitoring/influxdb-extensions/src/influxdb.conf
Normal file
322
monitoring/influxdb-extensions/src/influxdb.conf
Normal file
@ -0,0 +1,322 @@
|
||||
### Welcome to the InfluxDB configuration file.
|
||||
|
||||
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
|
||||
# The data includes raft id (random 8 bytes), os, arch, version, and metadata.
|
||||
# We don't track ip addresses of servers reporting. This is only used
|
||||
# to track the number of instances running and the versions, which
|
||||
# is very helpful for us.
|
||||
# Change this option to true to disable reporting.
|
||||
reporting-disabled = false
|
||||
|
||||
###
|
||||
### Enterprise registration control
|
||||
###
|
||||
|
||||
[registration]
|
||||
# enabled = true
|
||||
# url = "https://enterprise.influxdata.com" # The Enterprise server URL
|
||||
# token = "" # Registration token for Enterprise server
|
||||
|
||||
###
|
||||
### [meta]
|
||||
###
|
||||
### Controls the parameters for the Raft consensus group that stores metadata
|
||||
### about the InfluxDB cluster.
|
||||
###
|
||||
|
||||
[meta]
|
||||
dir = "/var/lib/influxdb/meta"
|
||||
hostname = "localhost"
|
||||
bind-address = ":8088"
|
||||
retention-autocreate = true
|
||||
election-timeout = "1s"
|
||||
heartbeat-timeout = "1s"
|
||||
leader-lease-timeout = "500ms"
|
||||
commit-timeout = "50ms"
|
||||
cluster-tracing = false
|
||||
|
||||
# If enabled, when a Raft cluster loses a peer due to a `DROP SERVER` command,
|
||||
# the leader will automatically ask a non-raft peer node to promote to a raft
|
||||
# peer. This only happens if there is a non-raft peer node available to promote.
|
||||
# This setting only affects the local node, so to ensure if operates correctly, be sure to set
|
||||
# it in the config of every node.
|
||||
raft-promotion-enabled = true
|
||||
|
||||
###
|
||||
### [data]
|
||||
###
|
||||
### Controls where the actual shard data for InfluxDB lives and how it is
|
||||
### flushed from the WAL. "dir" may need to be changed to a suitable place
|
||||
### for your system, but the WAL settings are an advanced configuration. The
|
||||
### defaults should work for most systems.
|
||||
###
|
||||
|
||||
[data]
|
||||
dir = "/var/lib/influxdb/data"
|
||||
|
||||
# Controls the engine type for new shards. Options are b1, bz1, or tsm1.
|
||||
# b1 is the 0.9.2 storage engine, bz1 is the 0.9.3 and 0.9.4 engine.
|
||||
# tsm1 is the 0.9.5 engine and is currenly EXPERIMENTAL. Until 0.9.5 is
|
||||
# actually released data written into a tsm1 engine may be need to be wiped
|
||||
# between upgrades.
|
||||
# engine ="bz1"
|
||||
|
||||
# The following WAL settings are for the b1 storage engine used in 0.9.2. They won't
|
||||
# apply to any new shards created after upgrading to a version > 0.9.3.
|
||||
max-wal-size = 104857600 # Maximum size the WAL can reach before a flush. Defaults to 100MB.
|
||||
wal-flush-interval = "10m" # Maximum time data can sit in WAL before a flush.
|
||||
wal-partition-flush-delay = "2s" # The delay time between each WAL partition being flushed.
|
||||
|
||||
# These are the WAL settings for the storage engine >= 0.9.3
|
||||
wal-dir = "/var/lib/influxdb/wal"
|
||||
wal-enable-logging = true
|
||||
|
||||
# When a series in the WAL in-memory cache reaches this size in bytes it is marked as ready to
|
||||
# flush to the index
|
||||
# wal-ready-series-size = 25600
|
||||
|
||||
# Flush and compact a partition once this ratio of series are over the ready size
|
||||
# wal-compaction-threshold = 0.6
|
||||
|
||||
# Force a flush and compaction if any series in a partition gets above this size in bytes
|
||||
# wal-max-series-size = 2097152
|
||||
|
||||
# Force a flush of all series and full compaction if there have been no writes in this
|
||||
# amount of time. This is useful for ensuring that shards that are cold for writes don't
|
||||
# keep a bunch of data cached in memory and in the WAL.
|
||||
# wal-flush-cold-interval = "10m"
|
||||
|
||||
# Force a partition to flush its largest series if it reaches this approximate size in
|
||||
# bytes. Remember there are 5 partitions so you'll need at least 5x this amount of memory.
|
||||
# The more memory you have, the bigger this can be.
|
||||
# wal-partition-size-threshold = 20971520
|
||||
|
||||
# Whether queries should be logged before execution. Very useful for troubleshooting, but will
|
||||
# log any sensitive data contained within a query.
|
||||
# query-log-enabled = true
|
||||
|
||||
###
|
||||
### [hinted-handoff]
|
||||
###
|
||||
### Controls the hinted handoff feature, which allows nodes to temporarily
|
||||
### store queued data when one node of a cluster is down for a short period
|
||||
### of time.
|
||||
###
|
||||
|
||||
[hinted-handoff]
|
||||
enabled = true
|
||||
dir = "/var/lib/influxdb/hh"
|
||||
max-size = 1073741824
|
||||
max-age = "168h"
|
||||
retry-rate-limit = 0
|
||||
|
||||
# Hinted handoff will start retrying writes to down nodes at a rate of once per second.
|
||||
# If any error occurs, it will backoff in an exponential manner, until the interval
|
||||
# reaches retry-max-interval. Once writes to all nodes are successfully completed the
|
||||
# interval will reset to retry-interval.
|
||||
retry-interval = "1s"
|
||||
retry-max-interval = "1m"
|
||||
|
||||
# Interval between running checks for data that should be purged. Data is purged from
|
||||
# hinted-handoff queues for two reasons. 1) The data is older than the max age, or
|
||||
# 2) the target node has been dropped from the cluster. Data is never dropped until
|
||||
# it has reached max-age however, for a dropped node or not.
|
||||
purge-interval = "1h"
|
||||
|
||||
###
|
||||
### [cluster]
|
||||
###
|
||||
### Controls non-Raft cluster behavior, which generally includes how data is
|
||||
### shared across shards.
|
||||
###
|
||||
|
||||
[cluster]
|
||||
shard-writer-timeout = "10s" # The time within which a shard must respond to write.
|
||||
write-timeout = "5s" # The time within which a write operation must complete on the cluster.
|
||||
|
||||
###
|
||||
### [retention]
|
||||
###
|
||||
### Controls the enforcement of retention policies for evicting old data.
|
||||
###
|
||||
|
||||
[retention]
|
||||
enabled = true
|
||||
check-interval = "30m"
|
||||
|
||||
###
|
||||
### [shard-precreation]
|
||||
###
|
||||
### Controls the precreation of shards, so they are created before data arrives.
|
||||
### Only shards that will exist in the future, at time of creation, are precreated.
|
||||
|
||||
[shard-precreation]
|
||||
enabled = true
|
||||
check-interval = "10m"
|
||||
advance-period = "30m"
|
||||
|
||||
###
|
||||
### Controls the system self-monitoring, statistics and diagnostics.
|
||||
###
|
||||
### The internal database for monitoring data is created automatically if
|
||||
### if it does not already exist. The target retention within this database
|
||||
### is called 'monitor' and is also created with a retention period of 7 days
|
||||
### and a replication factor of 1, if it does not exist. In all cases the
|
||||
### this retention policy is configured as the default for the database.
|
||||
|
||||
[monitor]
|
||||
store-enabled = true # Whether to record statistics internally.
|
||||
store-database = "_internal" # The destination database for recorded statistics
|
||||
store-interval = "10s" # The interval at which to record statistics
|
||||
|
||||
###
|
||||
### [admin]
|
||||
###
|
||||
### Controls the availability of the built-in, web-based admin interface. If HTTPS is
|
||||
### enabled for the admin interface, HTTPS must also be enabled on the [http] service.
|
||||
###
|
||||
|
||||
[admin]
|
||||
enabled = true
|
||||
bind-address = ":8083"
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
###
|
||||
### [http]
|
||||
###
|
||||
### Controls how the HTTP endpoints are configured. These are the primary
|
||||
### mechanism for getting data into and out of InfluxDB.
|
||||
###
|
||||
|
||||
[http]
|
||||
enabled = true
|
||||
bind-address = ":8086"
|
||||
auth-enabled = false
|
||||
log-enabled = true
|
||||
write-tracing = false
|
||||
pprof-enabled = false
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
###
|
||||
### [[graphite]]
|
||||
###
|
||||
### Controls one or many listeners for Graphite data.
|
||||
###
|
||||
|
||||
[[graphite]]
|
||||
enabled = false
|
||||
# database = "graphite"
|
||||
# bind-address = ":2003"
|
||||
# protocol = "tcp"
|
||||
# consistency-level = "one"
|
||||
# name-separator = "."
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
## "name-schema" configures tag names for parsing the metric name from graphite protocol;
|
||||
## separated by `name-separator`.
|
||||
## The "measurement" tag is special and the corresponding field will become
|
||||
## the name of the metric.
|
||||
## e.g. "type.host.measurement.device" will parse "server.localhost.cpu.cpu0" as
|
||||
## {
|
||||
## measurement: "cpu",
|
||||
## tags: {
|
||||
## "type": "server",
|
||||
## "host": "localhost,
|
||||
## "device": "cpu0"
|
||||
## }
|
||||
## }
|
||||
# name-schema = "type.host.measurement.device"
|
||||
|
||||
## If set to true, when the input metric name has more fields than `name-schema` specified,
|
||||
## the extra fields will be ignored.
|
||||
## Otherwise an error will be logged and the metric rejected.
|
||||
# ignore-unnamed = true
|
||||
|
||||
###
|
||||
### [collectd]
|
||||
###
|
||||
### Controls the listener for collectd data.
|
||||
###
|
||||
|
||||
[collectd]
|
||||
enabled = true
|
||||
bind-address = "127.0.0.1:25826"
|
||||
database = "collectd"
|
||||
typesdb = "/usr/share/collectd/types.db"
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
###
|
||||
### [opentsdb]
|
||||
###
|
||||
### Controls the listener for OpenTSDB data.
|
||||
###
|
||||
|
||||
[opentsdb]
|
||||
enabled = false
|
||||
# bind-address = ":4242"
|
||||
# database = "opentsdb"
|
||||
# retention-policy = ""
|
||||
# consistency-level = "one"
|
||||
# tls-enabled = false
|
||||
# certificate= ""
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Only points
|
||||
# metrics received over the telnet protocol undergo batching.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
|
||||
###
|
||||
### [[udp]]
|
||||
###
|
||||
### Controls the listeners for InfluxDB line protocol data via UDP.
|
||||
###
|
||||
|
||||
[[udp]]
|
||||
enabled = false
|
||||
# bind-address = ""
|
||||
# database = "udp"
|
||||
# retention-policy = ""
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
###
|
||||
### [continuous_queries]
|
||||
###
|
||||
### Controls how continuous queries are run within InfluxDB.
|
||||
###
|
||||
|
||||
[continuous_queries]
|
||||
log-enabled = true
|
||||
enabled = true
|
||||
recompute-previous-n = 2
|
||||
recompute-no-older-than = "10m"
|
||||
compute-runs-per-interval = 10
|
||||
compute-no-more-than = "2m"
|
17
monitoring/influxdb-extensions/src/influxdb.conf.pmon
Normal file
17
monitoring/influxdb-extensions/src/influxdb.conf.pmon
Normal file
@ -0,0 +1,17 @@
|
||||
[process]
|
||||
process = influxdb
|
||||
service = influxdb
|
||||
style = lsb
|
||||
pidfile = /var/run/influxdb/influxdb.pid
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restart retries before error assertion
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 10 ; number of seconds that a process needs to remain
|
||||
; running before degrade is removed and retry count
|
||||
; is cleared.
|
||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
||||
mode = passive ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; ignore : do not monitor or stop monitoring
|
||||
quorum = 0 ; process is in the host watchdog quorum
|
16
monitoring/influxdb-extensions/src/influxdb.logrotate
Normal file
16
monitoring/influxdb-extensions/src/influxdb.logrotate
Normal file
@ -0,0 +1,16 @@
|
||||
#daily
|
||||
nodateext
|
||||
|
||||
/var/log/influxdb/influxdb.log
|
||||
{
|
||||
size 20M
|
||||
start 1
|
||||
missingok
|
||||
rotate 20
|
||||
compress
|
||||
sharedscripts
|
||||
postrotate
|
||||
systemctl reload syslog-ng > /dev/null 2>&1 || true
|
||||
endscript
|
||||
}
|
||||
|
20
monitoring/influxdb-extensions/src/influxdb.service
Normal file
20
monitoring/influxdb-extensions/src/influxdb.service
Normal file
@ -0,0 +1,20 @@
|
||||
[Unit]
|
||||
Description=InfluxDB open-source, distributed, time series database
|
||||
Documentation=https://influxdb.com/docs/
|
||||
After=local-fs.target network.target
|
||||
Before=collectd.service
|
||||
|
||||
[Service]
|
||||
User=influxdb
|
||||
Group=influxdb
|
||||
LimitNOFILE=65536
|
||||
Environment='STDOUT=/dev/null'
|
||||
Environment='STDERR=/var/log/influxdb/influxd.log'
|
||||
EnvironmentFile=-/etc/default/influxdb
|
||||
ExecStart=/bin/sh -c "/usr/bin/influxd -config /etc/influxdb/influxdb.conf -pidfile /var/run/influxdb/influxdb.pid ${INFLUXD_OPTS} >> ${STDOUT} 2>> ${STDERR}"
|
||||
ExecStopPost=/bin/bash -c 'rm /var/run/influxdb/influxdb.pid'
|
||||
KillMode=control-group
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Alias=influxd.service
|
Loading…
Reference in New Issue
Block a user