Add OSP resource metrics

This patch adds 'OSP Resources' dashbaord with 'OSP DB resources'
and 'OSP REST API' panels which displays neutron and nova resources
by tracking DB and REST api calls.

A new dashbaord 'OVN Metrics' will collect ovn metrics.

Collectd is enhanced to issue mysql statements and capture OSP
resources count as metrics. Collectd also monitors REST API calls
on logs and adds monitors.

'Unreasonably long 1014ms poll interval' message in OVN processes
indicates how busy OVN is and capturing these values for grafana
plotting will be very helpful in performance analysis.

We usually avoid enabling collectd on computes as this will
saturate graphite. So we need to have a light weight config to
enable OVN analysis on compute nodes. This patch adds
ovn_compute.collectd.conf.j2 to collect only ovn metrics on
compute nodes. We can enhance it later to include other important
metrics if needed.

Change-Id: If6cc162f5017c81ac62a7c7f5bd4f92be717ea6f
This commit is contained in:
venkata anil 2021-08-31 19:18:16 +05:30
parent 9381dd4156
commit b132b62274
12 changed files with 941 additions and 2 deletions

View File

@ -347,6 +347,23 @@ regex_info: false
ping_plugin: false
ping_interval: 1
########################################################
# OSP resources collectd plugin on controllers
########################################################
# Monitors OSP resources (networks, instances ..) created in DB.
osp_resources_collectd_plugin: true
########################################################
# OVN compute Plugin for OVN monitoring on computes
########################################################
# Usually we avoid running collectd on computes as cluster
# will have large number of compute nodes and the metrics
# from these nodes will saturate the graphite host.
# This plugin allows to collect only OVN statistics on
# compute nodes. Turn it off when you want to collect
# all metrics on all compute nodes
ovn_compute_collectd_plugin: false
############################################
# Prometheus/Collectd-Exporter Configuration
############################################

View File

@ -344,6 +344,27 @@
group: root
mode: 0644
become: true
when: "config_type != 'compute'"
- name: Configure compute collectd.conf
template:
src: "{{config_type}}.collectd.conf.j2"
dest: /etc/collectd.conf
owner: root
group: root
mode: 0644
become: true
when: "config_type == 'compute' and not ovn_compute_collectd_plugin"
- name: Configure compute collectd.conf
template:
src: "ovn_compute.collectd.conf.j2"
dest: /etc/collectd.conf
owner: root
group: root
mode: 0644
become: true
when: "config_type == 'compute' and ovn_compute_collectd_plugin"
#
# Configure selinux bits

View File

@ -212,6 +212,16 @@ PreCacheChain "PreCache"
# Tail plugin configuration
<Plugin "tail">
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
{%if 'Queens' in osp_version['content'] | b64decode or 'Pike' in osp_version['content'] | b64decode %}
<File "/var/log/containers/ceilometer/compute.log">
{% else %}

View File

@ -38,7 +38,7 @@ LoadPlugin conntrack
{% if collectd_prometheus %}
LoadPlugin network
{% endif %}
{% if keystone_overcloud_collectd_plugin %}
{% if keystone_overcloud_collectd_plugin or osp_resources_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
LoadPlugin dbi
{% endif %}
@ -168,6 +168,213 @@ PreCacheChain "PreCache"
</Plugin>
{% endif %}
{% endif %}
{% if osp_resources_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
<Plugin dbi>
<Query instances_count>
Statement "select count(*) as instances from instances where deleted=0"
<Result>
Type gauge
InstancePrefix "instances"
ValuesFrom "instances"
</Result>
</Query>
<Query instance_faults_count>
Statement "select count(*) as instance_faults from instance_faults"
<Result>
Type gauge
InstancePrefix "instance_faults"
ValuesFrom "instance_faults"
</Result>
</Query>
<Database "nova">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "nova"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query instances_count
Query instance_faults_count
</Database>
<Query network_count>
Statement "select count(*) as network from networks"
<Result>
Type gauge
InstancePrefix "network"
ValuesFrom "network"
</Result>
</Query>
<Query subnet_count>
Statement "select count(*) as subnet from subnets"
<Result>
Type gauge
InstancePrefix "subnet"
ValuesFrom "subnet"
</Result>
</Query>
<Query port_others_count>
Statement "select count(*) as port_others from ports where device_owner not in ('network:dhcp','compute:nova')"
<Result>
Type gauge
InstancePrefix "port_others"
ValuesFrom "port_others"
</Result>
</Query>
<Query port_dhcp_count>
Statement "select count(*) as port_dhcp from ports where device_owner='network:dhcp'"
<Result>
Type gauge
InstancePrefix "port_dhcp"
ValuesFrom "port_dhcp"
</Result>
</Query>
<Query port_compute_count>
Statement "select count(*) as port_compute from ports where device_owner='compute:nova'"
<Result>
Type gauge
InstancePrefix "port_compute"
ValuesFrom "port_compute"
</Result>
</Query>
<Query floatingips_count>
Statement "select count(*) as floatingips from floatingips"
<Result>
Type gauge
InstancePrefix "floatingips"
ValuesFrom "floatingips"
</Result>
</Query>
<Query router_count>
Statement "select count(*) as router from routers"
<Result>
Type gauge
InstancePrefix "router"
ValuesFrom "router"
</Result>
</Query>
<Query router_iface_count>
Statement "select count(*) as router_iface from routerports where port_type='network:router_interface'"
<Result>
Type gauge
InstancePrefix "router_iface"
ValuesFrom "router_iface"
</Result>
</Query>
<Query router_gw_count>
Statement "select count(*) as router_gw from routerports where port_type='network:router_gateway'"
<Result>
Type gauge
InstancePrefix "router_gw"
ValuesFrom "router_gw"
</Result>
</Query>
<Query sg_count>
Statement "select count(*) as sg from securitygroups"
<Result>
Type gauge
InstancePrefix "sg"
ValuesFrom "sg"
</Result>
</Query>
<Query sgr_count>
Statement "select count(*) as sgr from securitygrouprules"
<Result>
Type gauge
InstancePrefix "sgr"
ValuesFrom "sgr"
</Result>
</Query>
<Query trunk_count>
Statement "select count(*) as trunk from trunks"
<Result>
Type gauge
InstancePrefix "trunk"
ValuesFrom "trunk"
</Result>
</Query>
<Query subports_count>
Statement "select count(*) as subports from subports"
<Result>
Type gauge
InstancePrefix "subports"
ValuesFrom "subports"
</Result>
</Query>
<Database "ovs_neutron">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "ovs_neutron"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query network_count
Query port_others_count
Query subnet_count
Query port_dhcp_count
Query port_compute_count
Query floatingips_count
Query router_count
Query router_iface_count
Query router_gw_count
Query sg_count
Query sgr_count
Query trunk_count
Query subports_count
</Database>
<Query load_balancer_count>
Statement "select count(*) as load_balancer from load_balancer"
<Result>
Type gauge
InstancePrefix "load_balancer"
ValuesFrom "load_balancer"
</Result>
</Query>
<Query amphora_count>
Statement "select count(*) as amphora from amphora"
<Result>
Type gauge
InstancePrefix "amphora"
ValuesFrom "amphora"
</Result>
</Query>
<Query pool_count>
Statement "select count(*) as pool from pool"
<Result>
Type gauge
InstancePrefix "pool"
ValuesFrom "pool"
</Result>
</Query>
<Query member_count>
Statement "select count(*) as member from member"
<Result>
Type gauge
InstancePrefix "member"
ValuesFrom "member"
</Result>
</Query>
<Database "octavia">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "octavia"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query load_balancer_count
Query amphora_count
Query pool_count
Query member_count
</Database>
</Plugin>
{% else %}
# Neutron Nova Octavia Count plugin installed and enabled on {{groups['Controller'][0]}}
{% endif %}
{% endif %}
{% if keystone_overcloud_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
@ -503,6 +710,102 @@ PreCacheChain "PreCache"
# Tail plugin configuration
<Plugin "tail">
{%if inventory_hostname == groups['Controller'][0] or inventory_hostname == groups['Controller'][1] or inventory_hostname == groups['Controller'][2] %}
<File "/var/log/containers/openvswitch/ovsdb-server-nb.log">
Instance "ovnnbdb"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovsdb-server-sb.log">
Instance "ovnsbdb"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovn-northd.log">
Instance "ovnnorthd"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
{% endif %}
{%if inventory_hostname == groups['Controller'][0] or inventory_hostname == groups['Controller'][1] or inventory_hostname == groups['Controller'][2] %}
<File "/var/log/containers/httpd/nova-api/nova_api_wsgi_access.log">
Instance "nova_resources"
<Match>
Regex "POST /v2.1"
DSType "CounterInc"
Type "counter"
Instance "post"
</Match>
<Match>
Regex "PUT /v2.1"
DSType "CounterInc"
Type "counter"
Instance "put"
</Match>
<Match>
Regex "GET /v2.1"
DSType "CounterInc"
Type "counter"
Instance "get"
</Match>
<Match>
Regex "DELETE /v2.1"
DSType "CounterInc"
Type "counter"
Instance "delete"
</Match>
</File>
<File "/var/log/containers/neutron/server.log">
Instance "neutron_resources"
<Match>
Regex "POST /v2.0"
DSType "CounterInc"
Type "counter"
Instance "post"
</Match>
<Match>
Regex "PUT /v2.0"
DSType "CounterInc"
Type "counter"
Instance "put"
</Match>
<Match>
Regex "GET /v2.0"
DSType "CounterInc"
Type "counter"
Instance "get"
</Match>
<Match>
Regex "DELETE /v2.0"
DSType "CounterInc"
Type "counter"
Instance "delete"
</Match>
</File>
{% endif %}
# Tail httpd request time
{%if apache_controller_collectd_request_time %}

View File

@ -0,0 +1,82 @@
# Installed by Browbeat Ansible Installer
# Config type: {{config_type}}
# Interval default is 10s
Interval {{collectd_interval}}
# Hostname for this machine, if not defined, use gethostname(2) system call
{% if not collectd_prometheus %}
Hostname "{{inventory_hostname}}"
{% endif %}
{% if collectd_prometheus %}
Hostname "{{prometheus_prefix}}_{{inventory_hostname}}"
{% endif %}
# Loaded Plugins:
LoadPlugin "logfile"
<Plugin "logfile">
File "/var/log/collectd.log"
LogLevel "info"
PrintSeverity true
Timestamp true
</Plugin>
LoadPlugin write_graphite
LoadPlugin tail
LoadPlugin unixsock
# Open unix domain socket for collectdctl
<Plugin unixsock>
SocketFile "/var/run/collectd-unixsock"
SocketGroup "collectd"
SocketPerms "0770"
DeleteSocket true
</Plugin>
# Graphite Host Configuration
{% if collectd_graphite %}
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true
StoreRates true
AlwaysAppendDS false
EscapeCharacter "_"
</Carbon>
</Plugin>
{% endif %}
# Prometheus Host Configuration
{% if collectd_prometheus %}
<Plugin network>
Server "{{prometheus_host}}" "{{prometheus_port}}"
</Plugin>
{% endif %}
{% if ovs_flows_monitoring %}
<Plugin exec>
Exec "heat-admin:heat-admin" "/usr/local/bin/ovs_flows.sh"
</Plugin>
{% endif %}
# Tail plugin configuration
<Plugin "tail">
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
</Plugin>
# Include other collectd configuration files
Include "/etc/collectd.d"

View File

@ -69,6 +69,19 @@
template:
src: "{{ config_type }}.collectd.conf.j2"
dest: "{{ browbeat_containers_path }}/collectd-openstack/config/collectd.conf"
when: "config_type != 'compute'"
- name: Configure compute collectd.conf
template:
src: "{{ config_type }}.collectd.conf.j2"
dest: "{{ browbeat_containers_path }}/collectd-openstack/config/collectd.conf"
when: "config_type == 'compute' and not ovn_compute_collectd_plugin"
- name: Configure compute collectd.conf
template:
src: "{{ config_type }}.collectd.conf.j2"
dest: "{{ browbeat_containers_path }}/collectd-openstack/config/collectd.conf"
when: "config_type == 'compute' and ovn_compute_collectd_plugin"
- name: Build and Run container
block:

View File

@ -204,6 +204,15 @@ PreCacheChain "PreCache"
# Tail plugin configuration
<Plugin "tail">
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
{% for item in collectd_logs[config_type] %}
<File "{{ item.log_path }}">

View File

@ -38,7 +38,7 @@ LoadPlugin cpu
{% if not collectd_prometheus %}
LoadPlugin conntrack
{% endif %}
{% if keystone_overcloud_collectd_plugin %}
{% if keystone_overcloud_collectd_plugin or osp_resources_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
LoadPlugin dbi
{% endif %}
@ -152,6 +152,214 @@ PreCacheChain "PreCache"
</Plugin>
{% endif %}
{% endif %}
{% if osp_resources_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
<Plugin dbi>
<Query instances_count>
Statement "select count(*) as instances from instances where deleted=0"
<Result>
Type gauge
InstancePrefix "instances"
ValuesFrom "instances"
</Result>
</Query>
<Query instance_faults_count>
Statement "select count(*) as instance_faults from instance_faults"
<Result>
Type gauge
InstancePrefix "instance_faults"
ValuesFrom "instance_faults"
</Result>
</Query>
<Database "nova">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "nova"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query instances_count
Query instance_faults_count
</Database>
<Query network_count>
Statement "select count(*) as network from networks"
<Result>
Type gauge
InstancePrefix "network"
ValuesFrom "network"
</Result>
</Query>
<Query subnet_count>
Statement "select count(*) as subnet from subnets"
<Result>
Type gauge
InstancePrefix "subnet"
ValuesFrom "subnet"
</Result>
</Query>
<Query port_others_count>
Statement "select count(*) as port_others from ports where device_owner not in ('network:dhcp','compute:nova')"
<Result>
Type gauge
InstancePrefix "port_others"
ValuesFrom "port_others"
</Result>
</Query>
<Query port_dhcp_count>
Statement "select count(*) as port_dhcp from ports where device_owner='network:dhcp'"
<Result>
Type gauge
InstancePrefix "port_dhcp"
ValuesFrom "port_dhcp"
</Result>
</Query>
<Query port_compute_count>
Statement "select count(*) as port_compute from ports where device_owner='compute:nova'"
<Result>
Type gauge
InstancePrefix "port_compute"
ValuesFrom "port_compute"
</Result>
</Query>
<Query floatingips_count>
Statement "select count(*) as floatingips from floatingips"
<Result>
Type gauge
InstancePrefix "floatingips"
ValuesFrom "floatingips"
</Result>
</Query>
<Query router_count>
Statement "select count(*) as router from routers"
<Result>
Type gauge
InstancePrefix "router"
ValuesFrom "router"
</Result>
</Query>
<Query router_iface_count>
Statement "select count(*) as router_iface from routerports where port_type='network:router_interface'"
<Result>
Type gauge
InstancePrefix "router_iface"
ValuesFrom "router_iface"
</Result>
</Query>
<Query router_gw_count>
Statement "select count(*) as router_gw from routerports where port_type='network:router_gateway'"
<Result>
Type gauge
InstancePrefix "router_gw"
ValuesFrom "router_gw"
</Result>
</Query>
<Query sg_count>
Statement "select count(*) as sg from securitygroups"
<Result>
Type gauge
InstancePrefix "sg"
ValuesFrom "sg"
</Result>
</Query>
<Query sgr_count>
Statement "select count(*) as sgr from securitygrouprules"
<Result>
Type gauge
InstancePrefix "sgr"
ValuesFrom "sgr"
</Result>
</Query>
<Query trunk_count>
Statement "select count(*) as trunk from trunks"
<Result>
Type gauge
InstancePrefix "trunk"
ValuesFrom "trunk"
</Result>
</Query>
<Query subports_count>
Statement "select count(*) as subports from subports"
<Result>
Type gauge
InstancePrefix "subports"
ValuesFrom "subports"
</Result>
</Query>
<Database "ovs_neutron">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "ovs_neutron"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query network_count
Query port_others_count
Query subnet_count
Query port_dhcp_count
Query port_compute_count
Query floatingips_count
Query router_count
Query router_iface_count
Query router_gw_count
Query sg_count
Query sgr_count
Query trunk_count
Query subports_count
</Database>
<Query load_balancer_count>
Statement "select count(*) as load_balancer from load_balancer"
<Result>
Type gauge
InstancePrefix "load_balancer"
ValuesFrom "load_balancer"
</Result>
</Query>
<Query amphora_count>
Statement "select count(*) as amphora from amphora"
<Result>
Type gauge
InstancePrefix "amphora"
ValuesFrom "amphora"
</Result>
</Query>
<Query pool_count>
Statement "select count(*) as pool from pool"
<Result>
Type gauge
InstancePrefix "pool"
ValuesFrom "pool"
</Result>
</Query>
<Query member_count>
Statement "select count(*) as member from member"
<Result>
Type gauge
InstancePrefix "member"
ValuesFrom "member"
</Result>
</Query>
<Database "octavia">
Driver "mysql"
DriverOption "host" "localhost"
DriverOption "dbname" "octavia"
DriverOption "username" "root"
DriverOption "password" "{{mysql_root_password.stdout}}"
DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock"
Query load_balancer_count
Query amphora_count
Query pool_count
Query member_count
</Database>
</Plugin>
{% else %}
# Neutron Nova Octavia Count plugin installed and enabled on {{groups['Controller'][0]}}
{% endif %}
{% endif %}
{% if keystone_overcloud_collectd_plugin %}
{%if inventory_hostname == groups['Controller'][0] %}
@ -497,6 +705,104 @@ PreCacheChain "PreCache"
# Tail plugin configuration
<Plugin "tail">
{%if inventory_hostname == groups['Controller'][0] or inventory_hostname == groups['Controller'][1] or inventory_hostname == groups['Controller'][2] %}
<File "/var/log/containers/openvswitch/ovsdb-server-nb.log">
Instance "ovnnbdb"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovsdb-server-sb.log">
Instance "ovnsbdb"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovn-northd.log">
Instance "ovnnorthd"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
{% endif %}
{%if inventory_hostname == groups['Controller'][0] or inventory_hostname == groups['Controller'][1] or inventory_hostname == groups['Controller'][2] %}
<File "/var/log/containers/httpd/nova-api/nova_api_wsgi_access.log">
Instance "nova_resources"
<Match>
Regex "POST /v2.1"
DSType "CounterInc"
Type "counter"
Instance "post"
</Match>
<Match>
Regex "PUT /v2.1"
DSType "CounterInc"
Type "counter"
Instance "put"
</Match>
<Match>
Regex "GET /v2.1"
DSType "CounterInc"
Type "counter"
Instance "get"
</Match>
<Match>
Regex "DELETE /v2.1"
DSType "CounterInc"
Type "counter"
Instance "delete"
</Match>
</File>
<File "/var/log/containers/neutron/server.log">
Instance "neutron_resources"
<Match>
Regex "POST /v2.0"
DSType "CounterInc"
Type "counter"
Instance "post"
</Match>
<Match>
Regex "PUT /v2.0"
DSType "CounterInc"
Type "counter"
Instance "put"
</Match>
<Match>
Regex "GET /v2.0"
DSType "CounterInc"
Type "counter"
Instance "get"
</Match>
<Match>
Regex "DELETE /v2.0"
DSType "CounterInc"
Type "counter"
Instance "delete"
</Match>
</File>
{% endif %}
{% if 'rabbitmq-server' in collectd_logs[config_type] | items2dict(key_name='instance',value_name='log_path') %}
<File "{{ collectd_logs[config_type] | items2dict(key_name='instance',value_name='log_path') | json_query('"rabbitmq-server"') }}">
Instance "rabbitmq"

View File

@ -0,0 +1,72 @@
# Installed by Browbeat Ansible Installer
# Config type: {{config_type}}
# Interval default is 10s
Interval {{collectd_interval}}
# Hostname for this machine, if not defined, use gethostname(2) system call
{% if not collectd_prometheus %}
Hostname "{{inventory_hostname}}"
{% endif %}
{% if collectd_prometheus %}
Hostname "{{prometheus_prefix}}_{{inventory_hostname}}"
{% endif %}
# Loaded Plugins:
LoadPlugin "logfile"
<Plugin "logfile">
File "/var/log/collectd.log"
LogLevel "info"
PrintSeverity true
Timestamp true
</Plugin>
LoadPlugin write_graphite
LoadPlugin tail
# Graphite Host Configuration
{% if collectd_graphite %}
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true
StoreRates true
AlwaysAppendDS false
EscapeCharacter "_"
</Carbon>
</Plugin>
{% endif %}
# Prometheus Host Configuration
{% if collectd_prometheus %}
<Plugin network>
Server "{{prometheus_host}}" "{{prometheus_port}}"
</Plugin>
{% endif %}
{% if ovs_flows_monitoring %}
<Plugin exec>
Exec stack "/usr/local/bin/ovs_flows.sh"
</Plugin>
{% endif %}
# Tail plugin configuration
<Plugin "tail">
<File "/var/log/containers/openvswitch/ovn-controller.log">
Instance "ovncontroller"
<Match>
Regex "Unreasonably long ([0-9]*)ms poll interval"
DSType "GaugeLast"
Type "count"
Instance "poll"
</Match>
</File>
</Plugin>
# Include other collectd configuration files
Include "/etc/collectd.d"

View File

@ -9,6 +9,7 @@
{% set ovsagent_groups = ['controller', 'compute', '*'] %}
{% set rabbitmq_groups = ['undercloud', 'controller', '*'] %}
{% set swift_stat_groups = ['controller', '*'] %}
{% set controller_groups = ['controller', '*'] %}
---
dashboard:
title: {{item.process_list_name}} General System Performance
@ -159,4 +160,10 @@ dashboard:
{% include 'partials/neutron_resources.yaml' %}
{% endif %}
{% if item.template_node_type in controller_groups %}
{% include 'partials/osp_resources.yaml' %}
{% endif %}
{% include 'partials/ovn_metrics.yaml' %}
{% include 'partials/tail.yaml' %}

View File

@ -0,0 +1,76 @@
- title: OSP Resources
collapse: true
height: 200px
showTitle: true
panels:
- title: $Cloud - $Node - OSP DB resources
type: graph
legend:
alignAsTable: true
avg: false
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
nullPointMode: 'null'
targets:
- target: alias($Cloud.$Node.dbi-nova.gauge-instances, 'instances')
- target: alias($Cloud.$Node.dbi-nova.gauge-instance_faults, 'instance_faults')
- target: alias($Cloud.$Node.dbi-nova.gauge-virtual_interfaces, 'virtual_interfaces')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-network, 'network')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-subnet, 'subnet')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-port_dhcp, 'port_dhcp')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-port_compute, 'port_compute')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-port_others, 'port_others')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-floatingips, 'floatingips')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-router, 'router')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-router_iface, 'router_iface')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-router_gw, 'router_gw')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-sg, 'sg')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-sgr, 'sgr')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-trunk, 'trunk')
- target: alias($Cloud.$Node.dbi-ovs_neutron.gauge-subports, 'subports')
- target: alias($Cloud.$Node.dbi-octavia.gauge-load_balancer, 'load_balancer')
- target: alias($Cloud.$Node.dbi-octavia.gauge-amphora, 'amphora')
- target: alias($Cloud.$Node.dbi-octavia.gauge-pool, 'pool')
- target: alias($Cloud.$Node.dbi-octavia.gauge-member, 'member')
- title: $Cloud - $Node - OSP Additional DB resources
type: graph
legend:
alignAsTable: true
avg: false
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
nullPointMode: 'null'
targets:
- target: alias($Cloud.$Node.dbi-nova.gauge-instance_faults, 'instance_faults')
- title: $Cloud - $Node - OSP REST API
type: graph
legend:
alignAsTable: true
avg: false
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
nullPointMode: 'null'
targets:
- target: alias($Cloud.$Node.tail-nova_resources.counter-get, 'nova_get')
- target: alias($Cloud.$Node.tail-nova_resources.counter-post, 'nova_post')
- target: alias($Cloud.$Node.tail-nova_resources.counter-put, 'nova_put')
- target: alias($Cloud.$Node.tail-nova_resources.counter-delete, 'nova_delete')
- target: alias($Cloud.$Node.tail-neutron_resources.counter-get, 'neutron_get')
- target: alias($Cloud.$Node.tail-neutron_resources.counter-post, 'neutron_post')
- target: alias($Cloud.$Node.tail-neutron_resources.counter-put, 'neutron_put')
- target: alias($Cloud.$Node.tail-neutron_resources.counter-delete, 'neutron_delete')

View File

@ -0,0 +1,23 @@
- title: OVN Metrics
collapse: true
height: 200px
showTitle: true
panels:
- title: $Cloud - $Node - OVN Metrics
type: graph
legend:
alignAsTable: true
avg: false
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
nullPointMode: 'null'
targets:
- target: alias($Cloud.$Node.tail-ovnnbdb.count-poll, 'ovnnbdb_poll')
- target: alias($Cloud.$Node.tail-ovnsbdb.count-poll, 'ovnsbdb_poll')
- target: alias($Cloud.$Node.tail-ovnnorthd.count-poll, 'ovnnorthd_poll')
- target: alias($Cloud.$Node.tail-ovncontroller.count-poll, 'ovncontroller_poll')