Cleanup collectd-generic graphite playbook

* No need for selinux policy if collectd_t domain is set to permissive
* Graphite dashboard defaults to graphite "cloud name"
* Added to Monitor Host docs to monitor the monitor host
* collectd.conf for Graphite fixes (collectd, carbon-relay, carbon-aggregator)
* collectd.conf auto ship to localhost and use graphite as prefix
* Allow collectd write_graphite port to be configured from playbooks thus
  allowing for future use of carbon-relay
* Improve Graphite Carbon-cache metrics to expose multiple carbon-cache metrics
* Update Graphite/Baremetal/Guest Dashboards Disk/DF portion of Dashboard

Change-Id: I9286bee506045fa89d5c9919327d3e4f77fb07f3
This commit is contained in:
akrzos 2017-06-05 11:49:32 -04:00
parent c30e8b950c
commit 03c7976685
18 changed files with 401 additions and 88 deletions

View File

@ -95,6 +95,8 @@ connmon_host: 192.0.2.1
collectd_from_epel: true
# Interval in seconds
collectd_interval: 10
# Typically: carbon-cache port=2003 or Graphite with carbon-relay=2013
collectd_write_graphite_port: 2003
# Run collectd on specific openstack nodes:
collectd_undercloud: true
collectd_controller: true

View File

@ -1,29 +1,30 @@
---
#
# Install/run collectd for browbeat (Generic)
# Install/run Collectd for Browbeat (Generic)
#
#
# (akrzos) yum module works at this point due to the fact the EPEL repo now exists. EPEL rpm is
# installed at this point in time.
#
- name: Install collectd rpms
yum: name={{ item }} state=present
yum:
name: "{{ item }}"
state: present
become: true
with_items: "{{collectd_packages[config_type]}}"
- name: Configure collectd.conf
template:
src={{config_type}}.collectd.conf.j2
dest=/etc/collectd.conf
owner=root
group=root
mode="0644"
- name: Install package that provides semanage
yum:
name: policycoreutils-python
state: present
become: true
- name: Configure collectd.conf
template:
src: "{{config_type}}.collectd.conf.j2"
dest: /etc/collectd.conf
owner: root
group: root
mode: 0644
become: true
#
# Configure selinux bits
#
- name: Check for collectd permissive
shell: semodule -l | grep -q permissive_collectd_t
become: true
@ -31,41 +32,15 @@
ignore_errors: true
changed_when: false
# This command is not always found?
- name: Set permissive for collectd
command: semanage permissive -a collectd_t
become: true
when: collectd_permissive.rc != 0
ignore_errors: true
#
# Additional policy bits may be needed for exec
#
- name: Collectd policy customization
copy:
src=custom-collectd.pp
dest=/root/custom-collectd.pp
owner=root
group=root
mode="0644"
become: true
- name: Check for collectd custom
command: semodule -l | grep -q custom-collectd
become: true
register: collectd_custom
ignore_errors: true
changed_when: false
- name: Set custom policy for collectd
command: semodule -i /root/custom-collectd.pp
become: true
when: collectd_custom.rc != 0
#
# Start collectd service
#
- name: Setup collectd service
service: name=collectd state=restarted enabled=true
service:
name: collectd
state: restarted
enabled: true
become: true

View File

@ -42,7 +42,7 @@ LoadPlugin uptime
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true
@ -53,11 +53,40 @@ LoadPlugin uptime
</Plugin>
<Plugin df>
FSType anon_inodefs
FSType bdev
FSType cgroup
FSType cpuset
FSType debugfs
FSType devpts
FSType devtmpfs
FSType ecryptfs
FSType fuse
FSType fusectl
FSType hugetlbfs
FSType mqueue
FSType nfs
FSType nfs4
FSType nfsd
FSType pipefs
FSType proc
FSType pstore
FSType ramfs
#FSType rootfs
FSType rpc_pipefs
FSType securityfs
FSType sockfs
FSType sysfs
FSType tmpfs
FSType vboxsf
IgnoreSelected true
ValuesPercentage true
ReportInodes true
</Plugin>
<Plugin disk>
Disk "/^[hsv]d[a-z]+[0-9]?$/"
Disk "/^nvm/"
IgnoreSelected false
</Plugin>

View File

@ -4,9 +4,6 @@
# Interval default is 10s
Interval {{collectd_interval}}
# Hostname for this machine, if not defined, use gethostname(2) system call
Hostname "{{inventory_hostname}}"
# Loaded Plugins:
LoadPlugin "logfile"
<Plugin "logfile">
@ -41,9 +38,9 @@ LoadPlugin uptime
# Graphite Host Configuration
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Prefix "{{graphite_prefix}}."
Host "localhost"
Port "{{collectd_write_graphite_port}}"
Prefix "graphite."
Protocol "tcp"
LogSendErrors true
StoreRates true
@ -53,20 +50,51 @@ LoadPlugin uptime
</Plugin>
<Plugin df>
FSType anon_inodefs
FSType bdev
FSType cgroup
FSType cpuset
FSType debugfs
FSType devpts
FSType devtmpfs
FSType ecryptfs
FSType fuse
FSType fusectl
FSType hugetlbfs
FSType mqueue
FSType nfs
FSType nfs4
FSType nfsd
FSType pipefs
FSType proc
FSType pstore
FSType ramfs
#FSType rootfs
FSType rpc_pipefs
FSType securityfs
FSType sockfs
FSType sysfs
FSType tmpfs
FSType vboxsf
IgnoreSelected true
ValuesPercentage true
ReportInodes true
</Plugin>
<Plugin disk>
Disk "/^[hsv]d[a-z]+[0-9]?$/"
Disk "/^nvm/"
IgnoreSelected false
</Plugin>
<Plugin processes>
ProcessMatch "carbon-cache" "python.+carbon-cache"
ProcessMatch "carbon-relay" "python.+carbon-relay"
ProcessMatch "carbon-aggregator" "python.+carbon-aggregator"
Process "grafana-server"
Process "httpd"
# Collect on collectd process
ProcessMatch "collectd" "/usr/sbin/collectd.+-C.+/etc/collectd.conf"
Process "collectd"
</Plugin>
<Plugin swap>

View File

@ -40,7 +40,7 @@ LoadPlugin uptime
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true
@ -51,11 +51,40 @@ LoadPlugin uptime
</Plugin>
<Plugin df>
FSType anon_inodefs
FSType bdev
FSType cgroup
FSType cpuset
FSType debugfs
FSType devpts
FSType devtmpfs
FSType ecryptfs
FSType fuse
FSType fusectl
FSType hugetlbfs
FSType mqueue
FSType nfs
FSType nfs4
FSType nfsd
FSType pipefs
FSType proc
FSType pstore
FSType ramfs
#FSType rootfs
FSType rpc_pipefs
FSType securityfs
FSType sockfs
FSType sysfs
FSType tmpfs
FSType vboxsf
IgnoreSelected true
ValuesPercentage true
ReportInodes true
</Plugin>
<Plugin disk>
Disk "/^[hsv]d[a-z]+[0-9]?$/"
Disk "/^nvm/"
IgnoreSelected false
</Plugin>

View File

@ -5,6 +5,8 @@
collectd_from_epel: true
# Interval in seconds
collectd_interval: 10
# Typically: carbon-cache port=2003 or Graphite with carbon-relay=2013
collectd_write_graphite_port: 2003
# Run collectd on specific openstack nodes:
collectd_undercloud: true
collectd_controller: true

View File

@ -71,7 +71,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -74,7 +74,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -71,7 +71,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -85,7 +85,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -71,7 +71,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -87,7 +87,7 @@ PreCacheChain "PreCache"
<Plugin write_graphite>
<Carbon>
Host "{{graphite_host}}"
Port "2003"
Port "{{collectd_write_graphite_port}}"
Prefix "{{graphite_prefix}}."
Protocol "tcp"
LogSendErrors true

View File

@ -1243,7 +1243,16 @@
"percent",
"ms"
]
},
}
],
"showTitle": true,
"title": "Disk"
},
{
"collapse": true,
"editable": true,
"height": "200px",
"panels": [
{
"aliasColors": {},
"bars": false,
@ -1263,7 +1272,7 @@
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
@ -1297,8 +1306,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)",
"textEditor": false
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
@ -1315,10 +1323,81 @@
"percent",
"short"
]
},
{
"aliasColors": {},
"bars": false,
"datasource": null,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"leftLogBase": 1,
"leftMax": 100,
"leftMin": 0,
"rightLogBase": 1,
"rightMax": null,
"rightMin": null,
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Write",
"transform": "negative-Y"
}
],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_inodes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
"timeShift": null,
"title": "$Cloud - $Node - Inodes % Used",
"tooltip": {
"shared": true,
"value_type": "individual"
},
"type": "graph",
"x-axis": true,
"y-axis": true,
"y_formats": [
"percent",
"short"
]
}
],
"showTitle": true,
"title": "Disk"
"title": "DF"
},
{
"collapse": true,

View File

@ -1243,7 +1243,16 @@
"percent",
"ms"
]
},
}
],
"showTitle": true,
"title": "Disk"
},
{
"collapse": true,
"editable": true,
"height": "200px",
"panels": [
{
"aliasColors": {},
"bars": false,
@ -1263,7 +1272,7 @@
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
@ -1297,8 +1306,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)",
"textEditor": false
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
@ -1315,10 +1323,81 @@
"percent",
"short"
]
},
{
"aliasColors": {},
"bars": false,
"datasource": null,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"leftLogBase": 1,
"leftMax": 100,
"leftMin": 0,
"rightLogBase": 1,
"rightMax": null,
"rightMin": null,
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Write",
"transform": "negative-Y"
}
],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_inodes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
"timeShift": null,
"title": "$Cloud - $Node - Inodes % Used",
"tooltip": {
"shared": true,
"value_type": "individual"
},
"type": "graph",
"x-axis": true,
"y-axis": true,
"y_formats": [
"percent",
"short"
]
}
],
"showTitle": true,
"title": "Disk"
"title": "DF"
},
{
"collapse": true,
@ -2504,7 +2583,7 @@
{% endfor %}
{# End Loop over per-process options here #}
{
"title": "Carbon Metrics",
"title": "Carbon Cache Metrics",
"height": "250px",
"editable": true,
"collapse": true,
@ -2568,15 +2647,15 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(carbon.agents.*.metricsReceived, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.metricsReceived, '[a-zA-Z0-9_]+-', ''), 2, 3)"
},
{
"refId": "B",
"target": "aliasByNode(carbon.agents.*.committedPoints, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.committedPoints, '[a-zA-Z0-9_]+-', ''), 2, 3)"
},
{
"refId": "C",
"target": "aliasByNode(carbon.agents.*.updateOperations, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.updateOperations, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -2643,7 +2722,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(carbon.agents.*.avgUpdateTime, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.avgUpdateTime, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -2710,7 +2789,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(carbon.agents.*.creates, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.creates, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -2777,7 +2856,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(carbon.agents.*.cache.*, 4)"
"target": "aliasByNode(aliasSub(carbon.agents.*.cache.*, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -2844,11 +2923,11 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(carbon.agents.*.blacklistMatches, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.blacklistMatches, '[a-zA-Z0-9_]+-', ''), 2, 3)"
},
{
"refId": "B",
"target": "aliasByNode(carbon.agents.*.whitelistRejects, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.whitelistRejects, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -2915,7 +2994,7 @@
"targets": [
{
"refId": "C",
"target": "aliasByNode(carbon.agents.*.errors, 3)"
"target": "aliasByNode(aliasSub(carbon.agents.*.errors, '[a-zA-Z0-9_]+-', ''), 2, 3)"
}
],
"aliasColors": {},
@ -3011,8 +3090,8 @@
{
"allFormat": "glob",
"current": {
"text": "None",
"value": "None"
"text": "graphite",
"value": "graphite"
},
"datasource": null,
"includeAll": false,

View File

@ -1243,7 +1243,16 @@
"percent",
"ms"
]
},
}
],
"showTitle": true,
"title": "Disk"
},
{
"collapse": true,
"editable": true,
"height": "200px",
"panels": [
{
"aliasColors": {},
"bars": false,
@ -1263,7 +1272,7 @@
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
@ -1297,8 +1306,7 @@
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)",
"textEditor": false
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_bytes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
@ -1315,10 +1323,81 @@
"percent",
"short"
]
},
{
"aliasColors": {},
"bars": false,
"datasource": null,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"leftLogBase": 1,
"leftMax": 100,
"leftMin": 0,
"rightLogBase": 1,
"rightMax": null,
"rightMin": null,
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
{% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %}
"id": {{vars.panel_idx}},
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Write",
"transform": "negative-Y"
}
],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"refId": "A",
"target": "aliasByNode(aliasSub($Cloud.$Node.df-*.percent_inodes-used, 'df-', ''), 2)"
}
],
"timeFrom": null,
"timeShift": null,
"title": "$Cloud - $Node - Inodes % Used",
"tooltip": {
"shared": true,
"value_type": "individual"
},
"type": "graph",
"x-axis": true,
"y-axis": true,
"y_formats": [
"percent",
"short"
]
}
],
"showTitle": true,
"title": "Disk"
"title": "DF"
},
{
"collapse": true,

View File

@ -553,11 +553,15 @@ per_process_panels:
- name: "Summerized"
processes:
- carbon-cache
- carbon-relay
- carbon-aggregator
- grafana-server
- httpd
- name: "Carbon"
processes:
- carbon-cache
- carbon-relay
- carbon-aggregator
- name: "Grafana"
processes:
- grafana-server

View File

@ -492,6 +492,13 @@ variables - graphite_host and grafana_host
[root@dhcp23-93 ansible]# ansible-playbook -i hosts install/grafana-dashboards.yml -e 'cloud_dashboards=false'
...
10. (Optional) Monitor the Monitor Host
::
[root@dhcp23-93 ansible]# ansible-playbook -i hosts install/collectd-generic.yml --tags graphite
...
Now navigate to http://monitoring-host-address:3000 to verify Grafana is
installed, the Graphite data source exists and custom dashboards are uploaded.