browbeat/ansible/install/roles/collectd-openstack/templates/compute.collectd.conf.j2
venkata anil b132b62274 Add OSP resource metrics
This patch adds 'OSP Resources' dashbaord with 'OSP DB resources'
and 'OSP REST API' panels which displays neutron and nova resources
by tracking DB and REST api calls.

A new dashbaord 'OVN Metrics' will collect ovn metrics.

Collectd is enhanced to issue mysql statements and capture OSP
resources count as metrics. Collectd also monitors REST API calls
on logs and adds monitors.

'Unreasonably long 1014ms poll interval' message in OVN processes
indicates how busy OVN is and capturing these values for grafana
plotting will be very helpful in performance analysis.

We usually avoid enabling collectd on computes as this will
saturate graphite. So we need to have a light weight config to
enable OVN analysis on compute nodes. This patch adds
ovn_compute.collectd.conf.j2 to collect only ovn metrics on
compute nodes. We can enhance it later to include other important
metrics if needed.

Change-Id: If6cc162f5017c81ac62a7c7f5bd4f92be717ea6f
2021-09-02 17:45:31 +05:30

385 lines
9.2 KiB
Django/Jinja

# Installed by Browbeat Ansible Installer
# Config type: {{config_type}}
# Interval default is 10s
Interval {{collectd_interval}}
# Hostname for this machine, if not defined, use gethostname(2) system call
{% if not collectd_prometheus %}
Hostname "{{inventory_hostname}}"
{% endif %}
{% if collectd_prometheus %}
Hostname "{{prometheus_prefix}}_{{inventory_hostname}}"
{% endif %}
# Loaded Plugins:
LoadPlugin "logfile"
<Plugin "logfile">
File "/var/log/collectd.log"
LogLevel "info"
PrintSeverity true
Timestamp true
</Plugin>
LoadPlugin write_graphite
LoadPlugin cpu
{% if not collectd_prometheus %}
LoadPlugin conntrack
{% endif %}
{% if collectd_prometheus %}
LoadPlugin network
{% endif %}
LoadPlugin df
LoadPlugin disk
LoadPlugin exec
LoadPlugin interface
LoadPlugin irq
LoadPlugin load
LoadPlugin match_regex
LoadPlugin memory
LoadPlugin numa
LoadPlugin processes
{%if iostat_compute_collectd_plugin %}
<LoadPlugin python>
Globals true
</LoadPlugin>
{% endif %}
LoadPlugin swap
LoadPlugin tail
LoadPlugin turbostat
LoadPlugin unixsock
LoadPlugin uptime
# Open unix domain socket for collectdctl
<Plugin unixsock>
SocketFile "/var/run/collectd-unixsock"
SocketGroup "collectd"
SocketPerms "0770"
DeleteSocket true
</Plugin>
PreCacheChain "PreCache"
<Chain "PreCache">
<Rule "ignore_tap">
<Match "regex">
Plugin "^interface$"
PluginInstance "^tap*"
</Match>
Target "stop"
</Rule>
<Rule "ignore_interfaces_q">
<Match "regex">
Plugin "^interface$"
PluginInstance "^q.*"
</Match>
Target "stop"
</Rule>
Target "return"
</Chain>
# Graphite Host Configuration
{% if collectd_graphite %}
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true
StoreRates true
AlwaysAppendDS false
EscapeCharacter "_"
</Carbon>
</Plugin>
{% endif %}
# Prometheus Host Configuration
{% if collectd_prometheus %}
<Plugin network>
Server "{{prometheus_host}}" "{{prometheus_port}}"
</Plugin>
{% endif %}
<Plugin df>
FSType anon_inodefs
FSType bdev
FSType cgroup
FSType cpuset
FSType debugfs
FSType devpts
FSType devtmpfs
FSType ecryptfs
FSType fuse
FSType fusectl
FSType hugetlbfs
FSType mqueue
FSType nfs
FSType nfs4
FSType nfsd
FSType overlay
FSType pipefs
FSType proc
FSType pstore
FSType ramfs
#FSType rootfs
FSType rpc_pipefs
FSType securityfs
FSType sockfs
FSType sysfs
FSType tmpfs
FSType vboxsf
MountPoint "/^/var/lib/docker/.*/"
IgnoreSelected true
ValuesPercentage true
ReportInodes true
</Plugin>
{%if disk_compute_collectd_plugin %}
<Plugin disk>
Disk "/^[hsv]d[a-z]+[0-9]?$/"
Disk "/^nvm/"
IgnoreSelected false
</Plugin>
{% endif %}
{% if ovs_flows_monitoring %}
<Plugin exec>
Exec "heat-admin:heat-admin" "/usr/local/bin/ovs_flows.sh"
</Plugin>
{% endif %}
{%if iostat_compute_collectd_plugin %}
<Plugin python>
ModulePath "/usr/local/bin/"
Import "collectd_iostat_python"
<Module collectd_iostat_python>
Path "/usr/bin/iostat"
Interval {{iostat_compute_collectd_interval}}
IostatInterval 2
Count 2
Verbose false
NiceNames false
PluginName collectd_iostat_python
</Module>
</Plugin>
{% endif %}
# (akrzos) Including the version of OpenStack that the process was verified as running after
# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement.
# A Minus before the version means the process was not found in that version. (Ex -10)
<Plugin processes>
# Ceilometer (OpenStack Installed)
ProcessMatch "ceilometer-polling" "ceilometer-polling" # 10,11,12,13
# Collectd (Browbeat Installed)
ProcessMatch "collectd" "/usr/sbin/collectd"
# Conmon (OpenStack Installed)
ProcessMatch "conmon" "/usr/bin/conmon" # 16
# Neutron (OpenStack Installed)
ProcessMatch "neutron-l3-agent" "python.+neutron-l3-agent" # 10 with DVR
ProcessMatch "neutron-ns-metadata-proxy" "python.+neutron-ns-metadata-proxy" # 10 with DVR
ProcessMatch "neutron-metadata-agent" "python.+neutron-metadata-agent" # 10 with DVR
ProcessMatch "neutron-openvswitch-agent" "python.+neutron-openvswitch-agent" # 10,11,12,13
# Nova (OpenStack Installed)
ProcessMatch "nova-compute" "python.+nova-compute" # 10,11,12,13
ProcessMatch "privsep-helper" "python.+/bin/privsep-helper" # 11,12,13
# OVS (OpenStack Installed)
ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12,13
ProcessMatch "ovsdb-client" "ovsdb-client" # 10,11,12,13
ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12,13
ProcessMatch "ovn-controller" "ovn-controller.+openvswitch" # 9,10
ProcessMatch "ovn-controller-vtep" "ovn-controller-vtep.+openvswitch" # 9,10
# QEMU / libvirt (OpenStack Installed)
ProcessMatch "qemu-kvm" "/usr/libexec/qemu-kvm" # 10,11,12,13
ProcessMatch "libvirtd" "/usr/sbin/libvirtd" # 10,11,12,13
ProcessMatch "virtlockd" "/usr/sbin/virtlockd" # 10,11,-12,-13
ProcessMatch "virtlogd" "/usr/sbin/virtlogd" # 10,11,12,13
</Plugin>
<Plugin swap>
ReportBytes true
ValuesPercentage true
</Plugin>
# Tail plugin configuration
<Plugin "tail">
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
{%if 'Queens' in osp_version['content'] | b64decode or 'Pike' in osp_version['content'] | b64decode %}
<File "/var/log/containers/ceilometer/compute.log">
{% else %}
<File "/var/log/ceilometer/compute.log">
{% endif %}
Instance "ceilometer-compute"
<Match>
Regex " ERROR "
DSType "CounterInc"
Type "counter"
Instance "error"
</Match>
{%if regex_warn %}
<Match>
Regex " WARNING "
DSType "CounterInc"
Type "counter"
Instance "warn"
</Match>
{% endif %}
{%if regex_info %}
<Match>
Regex " INFO "
DSType "CounterInc"
Type "counter"
Instance "info"
</Match>
{% endif %}
</File>
<File "/var/log/neutron/l3-agent.log">
Instance "neutron-l3-agent"
<Match>
Regex " ERROR "
DSType "CounterInc"
Type "counter"
Instance "error"
</Match>
{%if regex_warn %}
<Match>
Regex " WARNING "
DSType "CounterInc"
Type "counter"
Instance "warn"
</Match>
{% endif %}
{%if regex_info %}
<Match>
Regex " INFO "
DSType "CounterInc"
Type "counter"
Instance "info"
</Match>
{% endif %}
</File>
<File "/var/log/neutron/metadata-agent.log">
Instance "neutron-metadata-agent"
<Match>
Regex " ERROR "
DSType "CounterInc"
Type "counter"
Instance "error"
</Match>
{%if regex_warn %}
<Match>
Regex " WARNING "
DSType "CounterInc"
Type "counter"
Instance "warn"
</Match>
{% endif %}
{%if regex_info %}
<Match>
Regex " INFO "
DSType "CounterInc"
Type "counter"
Instance "info"
</Match>
{% endif %}
</File>
{%if 'Queens' in osp_version['content'] | b64decode %}
<File "/var/log/containers/neutron/openvswitch-agent.log">
{% else %}
<File "/var/log/neutron/openvswitch-agent.log">
{% endif %}
Instance "neutron-openvswitch-agent"
<Match>
Regex " ERROR "
DSType "CounterInc"
Type "counter"
Instance "error"
</Match>
{%if regex_warn %}
<Match>
Regex " WARNING "
DSType "CounterInc"
Type "counter"
Instance "warn"
</Match>
{% endif %}
{%if regex_info %}
<Match>
Regex " INFO "
DSType "CounterInc"
Type "counter"
Instance "info"
</Match>
{% endif %}
</File>
{%if 'Queens' in osp_version['content'] | b64decode or 'Pike' in osp_version['content'] | b64decode %}
<File "/var/log/containers/nova/nova-compute.log">
{% else %}
<File "/var/log/nova/nova-compute.log">
{% endif %}
Instance "nova-compute"
<Match>
Regex " ERROR "
DSType "CounterInc"
Type "counter"
Instance "error"
</Match>
{%if regex_warn %}
<Match>
Regex " WARNING "
DSType "CounterInc"
Type "counter"
Instance "warn"
</Match>
{% endif %}
{%if regex_info %}
<Match>
Regex " INFO "
DSType "CounterInc"
Type "counter"
Instance "info"
</Match>
{% endif %}
</File>
</Plugin>
{% if ovsagent_compute_monitor %}
<Plugin python>
ModulePath "/usr/local/bin/"
LogTraces true
Interactive false
Import "collectd_ovsagent"
<Module collectd_ovsagent>
prefix ovsagent
interval 10
interfaces {% for int in compute_monitored_ints %} {{int}} {% endfor %}
</Module>
</Plugin>
{% endif %}
# Include other collectd configuration files
Include "/etc/collectd.d"