From 32b7c23c36a2519863e38146d80ae1626a2b5e26 Mon Sep 17 00:00:00 2001 From: akrzos Date: Wed, 21 Jun 2017 10:37:40 -0400 Subject: [PATCH] Collectd Ceph-Storage Python Plugin Multiple collectd read callbacks in a single python plugin to grab the following stats: collectd-ceph-storage-cluster/gauge-total_avail collectd-ceph-storage-cluster/gauge-total_space collectd-ceph-storage-cluster/gauge-total_used collectd-ceph-storage-mon/gauge-number collectd-ceph-storage-mon/gauge-quorum collectd-ceph-storage-osd-(*)/gauge-apply_latency_ms collectd-ceph-storage-osd-(*)/gauge-commit_latency_ms collectd-ceph-storage-osd-(*)/gauge-kb_total collectd-ceph-storage-osd-(*)/gauge-kb_used collectd-ceph-storage-osd-(*)/gauge-num_snap_trimming collectd-ceph-storage-osd-(*)/gauge-snap_trim_queue_len collectd-ceph-storage-osd/gauge-down collectd-ceph-storage-osd/gauge-in collectd-ceph-storage-osd/gauge-out collectd-ceph-storage-osd/gauge-up collectd-ceph-storage-pg/gauge-active collectd-ceph-storage-pg/gauge-clean collectd-ceph-storage-pg/gauge-scrubbing collectd-ceph-storage-pool-(pool name)/gauge-bytes_used collectd-ceph-storage-pool-(pool name)/gauge-kb_used collectd-ceph-storage-pool-(pool name)/gauge-objects collectd-ceph-storage-pool-(pool name)/gauge-pg_num collectd-ceph-storage-pool-(pool name)/gauge-pgp_num collectd-ceph-storage-pool-(pool name)/gauge-read_bytes_sec collectd-ceph-storage-pool-(pool name)/gauge-read_op_per_sec collectd-ceph-storage-pool-(pool name)/gauge-size collectd-ceph-storage-pool-(pool name)/gauge-write_bytes_sec collectd-ceph-storage-pool-(pool name)/gauge-write_op_per_sec collectd-ceph-storage-pool/gauge-number Change-Id: Ie61bb79650d96aee1420d0e29f5bbd180ed2a4b5 --- ansible/install/group_vars/all.yml | 12 + .../collectd-openstack/defaults/main.yml | 12 + .../files/collectd_ceph_storage.py | 324 +++++ .../roles/collectd-openstack/tasks/main.yml | 23 +- .../templates/controller.collectd.conf.j2 | 27 +- .../files/cloud_ceph_monitoring.json | 1130 ++++++++++++++++- ...enstack_general_system_performance.json.j2 | 1110 ++++++++++++++++ .../graphite/files/storage-aggregation.conf | 22 + 8 files changed, 2637 insertions(+), 23 deletions(-) create mode 100644 ansible/install/roles/collectd-openstack/files/collectd_ceph_storage.py diff --git a/ansible/install/group_vars/all.yml b/ansible/install/group_vars/all.yml index ec51e4ff2..fef865958 100644 --- a/ansible/install/group_vars/all.yml +++ b/ansible/install/group_vars/all.yml @@ -141,6 +141,18 @@ apache_controller_collectd_request_time: false # Ceph plugin ######################## # Overcloud Controller +# Python plugin is prefered (At the Current Moment) +ceph_controller_collectd_radosbench_plugin: false +ceph_controller_collectd_radosbench_interval: 30 +ceph_controller_collectd_mon_plugin: false +ceph_controller_collectd_mon_interval: 10 +ceph_controller_collectd_osd_plugin: false +ceph_controller_collectd_osd_interval: 10 +ceph_controller_collectd_pg_plugin: false +ceph_controller_collectd_pg_interval: 10 +ceph_controller_collectd_pool_plugin: false +ceph_controller_collectd_pool_interval: 10 +# Collectd provided Ceph plugins ceph_controller_collectd_plugin: false ceph_storage_collectd_plugin: false diff --git a/ansible/install/roles/collectd-openstack/defaults/main.yml b/ansible/install/roles/collectd-openstack/defaults/main.yml index 45b54331d..80afa63b3 100644 --- a/ansible/install/roles/collectd-openstack/defaults/main.yml +++ b/ansible/install/roles/collectd-openstack/defaults/main.yml @@ -38,6 +38,18 @@ apache_controller_collectd_request_time: false # Ceph plugin ######################## # Overcloud Controller +# Python plugin is prefered (At the Current Moment) +ceph_controller_collectd_radosbench_plugin: false +ceph_controller_collectd_radosbench_interval: 30 +ceph_controller_collectd_mon_plugin: false +ceph_controller_collectd_mon_interval: 10 +ceph_controller_collectd_osd_plugin: false +ceph_controller_collectd_osd_interval: 10 +ceph_controller_collectd_pg_plugin: false +ceph_controller_collectd_pg_interval: 10 +ceph_controller_collectd_pool_plugin: false +ceph_controller_collectd_pool_interval: 10 +# Collectd provided Ceph plugins ceph_controller_collectd_plugin: false ceph_storage_collectd_plugin: false diff --git a/ansible/install/roles/collectd-openstack/files/collectd_ceph_storage.py b/ansible/install/roles/collectd-openstack/files/collectd_ceph_storage.py new file mode 100644 index 000000000..0235b07fd --- /dev/null +++ b/ansible/install/roles/collectd-openstack/files/collectd_ceph_storage.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collectd python plugin to read ceph storage stats from ceph command line for +an OpenStack Cloud. +""" + +import collectd +import json +import os +import subprocess +import time +import traceback + + +class CollectdCephStorage(object): + def __init__(self): + self.ceph_cluster = None + self.ceph_rados_bench = False + self.ceph_rados_bench_interval = 60 + self.ceph_mon_stats = False + self.ceph_mon_stats_interval = 10 + self.ceph_osd_stats = False + self.ceph_osd_stats_interval = 10 + self.ceph_pg_stats = False + self.ceph_pg_stats_interval = 10 + self.ceph_pool_stats = False + self.ceph_pool_stats_interval = 10 + + def configure_callback(self, config): + for node in config.children: + val = str(node.values[0]) + if node.key == 'CephRadosBench': + self.ceph_rados_bench = val in ['True', 'true'] + elif node.key == 'CephMONStats': + self.ceph_mon_stats = val in ['True', 'true'] + elif node.key == 'CephOSDStats': + self.ceph_osd_stats = val in ['True', 'true'] + elif node.key == 'CephPGStats': + self.ceph_pg_stats = val in ['True', 'true'] + elif node.key == 'CephPoolStats': + self.ceph_pool_stats = val in ['True', 'true'] + elif node.key == 'CephCluster': + self.ceph_cluster = val + elif node.key == 'CephRadosBenchInterval': + self.ceph_rados_bench_interval = int(float(val)) + elif node.key == 'CephMONStatsInterval': + self.ceph_mon_stats_interval = int(float(val)) + elif node.key == 'CephOSDStatsInterval': + self.ceph_osd_stats_interval = int(float(val)) + elif node.key == 'CephPGStatsInterval': + self.ceph_pg_stats_interval = int(float(val)) + elif node.key == 'CephPoolStatsInterval': + self.ceph_pool_stats_interval = int(float(val)) + else: + collectd.warning( + 'collectd-ceph-storage: Unknown config key: {}' + .format(node.key)) + + if not self.ceph_cluster: + collectd.warning('collectd-ceph-storage: CephCluster Undefined') + + if self.ceph_rados_bench: + collectd.info('Registered Ceph Rados Bench') + collectd.register_read( + self.read_ceph_rados_bench, + self.ceph_rados_bench_interval, name='ceph-rados-bench') + if self.ceph_mon_stats: + collectd.info('Registered Ceph Mon') + collectd.register_read( + self.read_ceph_mon, self.ceph_mon_stats_interval, + name='ceph-monitor') + if self.ceph_osd_stats: + collectd.info('Registered Ceph OSD') + collectd.register_read( + self.read_ceph_osd, self.ceph_osd_stats_interval, + name='ceph-osd') + if self.ceph_pg_stats: + collectd.info('Registered Ceph PG') + collectd.register_read( + self.read_ceph_pg, self.ceph_pg_stats_interval, name='ceph-pg') + if self.ceph_pool_stats: + collectd.info('Registered Ceph Pool') + collectd.register_read( + self.read_ceph_pool, self.ceph_pool_stats_interval, + name='ceph-pool') + + def dispatch_value(self, plugin_instance, type_instance, value, interval): + metric = collectd.Values() + metric.plugin = 'collectd-ceph-storage' + metric.interval = interval + metric.type = 'gauge' + metric.plugin_instance = plugin_instance + metric.type_instance = type_instance + metric.values = [value] + metric.dispatch() + + def read_ceph_rados_bench(self): + """Runs "rados bench" and collects latencies reported.""" + rados_bench_ran, output = self.run_command( + ['timeout', '30s', 'rados', '-p', 'rbd', 'bench', '10', + 'write', '-t', '1', '-b', '65536', '2>/dev/null', '|', + 'grep', '-i', 'latency', '|', 'awk', + '\'{print 1000*$3}\''], False) + + if rados_bench_ran: + results = output.split('\n') + + self.dispatch_value( + 'cluster', 'avg_latency', results[0], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'stddev_latency', results[1], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'max_latency', results[2], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'min_latency', results[3], + self.ceph_rados_bench_interval) + + def read_ceph_mon(self): + """Reads stats from "ceph mon dump" command.""" + mon_dump_ran, output = self.run_command( + ['ceph', 'mon', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if mon_dump_ran: + json_data = json.loads(output) + + self.dispatch_value( + 'mon', 'number', len(json_data['mons']), + self.ceph_mon_stats_interval) + self.dispatch_value( + 'mon', 'quorum', len(json_data['quorum']), + self.ceph_mon_stats_interval) + + def read_ceph_osd(self): + """Reads stats from "ceph osd dump" command.""" + osd_dump_ran, output = self.run_command( + ['ceph', 'osd', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if osd_dump_ran: + json_data = json.loads(output) + + self.dispatch_value( + 'pool', 'number', len(json_data['pools']), + self.ceph_osd_stats_interval) + + for pool in json_data['pools']: + pool_name = 'pool-{}'.format(pool['pool_name']) + self.dispatch_value( + pool_name, 'size', pool['size'], + self.ceph_osd_stats_interval) + self.dispatch_value( + pool_name, 'pg_num', pool['pg_num'], + self.ceph_osd_stats_interval) + self.dispatch_value( + pool_name, 'pgp_num', pool['pg_placement_num'], + self.ceph_osd_stats_interval) + + osds_up = 0 + osds_down = 0 + osds_in = 0 + osds_out = 0 + for osd in json_data['osds']: + if osd['up'] == 1: + osds_up += 1 + else: + osds_down += 1 + if osd['in'] == 1: + osds_in += 1 + else: + osds_out += 1 + + self.dispatch_value( + 'osd', 'up', osds_up, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'down', osds_down, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'in', osds_in, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'out', osds_out, self.ceph_osd_stats_interval) + + def read_ceph_pg(self): + """Reads stats from "ceph pg dump" command.""" + pg_dump_ran, output = self.run_command( + ['ceph', 'pg', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if pg_dump_ran: + json_data = json.loads(output) + + pgs = {} + for pg in json_data['pg_stats']: + for state in pg['state'].split('+'): + if state not in pgs: + pgs[state] = 0 + pgs[state] += 1 + + for state in pgs: + self.dispatch_value( + 'pg', state, pgs[state], self.ceph_pg_stats_interval) + + for osd in json_data['osd_stats']: + osd_id = 'osd-{}'.format(osd['osd']) + self.dispatch_value( + osd_id, 'kb_used', osd['kb_used'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'kb_total', osd['kb'], self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'snap_trim_queue_len', osd['snap_trim_queue_len'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'num_snap_trimming', osd['num_snap_trimming'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'apply_latency_ms', + osd['fs_perf_stat']['apply_latency_ms'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'commit_latency_ms', + osd['fs_perf_stat']['commit_latency_ms'], + self.ceph_pg_stats_interval) + + def read_ceph_pool(self): + """Reads stats from "ceph osd pool" and "ceph df" commands.""" + stats_ran, stats_output = self.run_command( + ['ceph', 'osd', 'pool', 'stats', '-f', 'json']) + df_ran, df_output = self.run_command(['ceph', 'df', '-f', 'json']) + + if stats_ran: + json_stats_data = json.loads(stats_output) + + for pool in json_stats_data: + pool_key = 'pool-{}'.format(pool['pool_name']) + for stat in ( + 'read_bytes_sec', 'write_bytes_sec', 'read_op_per_sec', + 'write_op_per_sec'): + value = 0 + if stat in pool['client_io_rate']: + value = pool['client_io_rate'][stat] + self.dispatch_value( + pool_key, stat, value, self.ceph_pool_stats_interval) + + if df_ran: + json_df_data = json.loads(df_output) + + for pool in json_df_data['pools']: + pool_key = 'pool-{}'.format(pool['name']) + for stat in ('bytes_used', 'kb_used', 'objects'): + value = pool['stats'][stat] if stat in pool['stats'] else 0 + self.dispatch_value( + pool_key, stat, value, self.ceph_pool_stats_interval) + + if 'total_bytes' in json_df_data['stats']: + # ceph 0.84+ + self.dispatch_value( + 'cluster', 'total_space', + int(json_df_data['stats']['total_bytes']), + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_used', + int(json_df_data['stats']['total_used_bytes']), + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_avail', + int(json_df_data['stats']['total_avail_bytes']), + self.ceph_pool_stats_interval) + else: + # ceph < 0.84 + self.dispatch_value( + 'cluster', 'total_space', + int(json_df_data['stats']['total_space']) * 1024.0, + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_used', + int(json_df_data['stats']['total_used']) * 1024.0, + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_avail', + int(json_df_data['stats']['total_avail']) * 1024.0, + self.ceph_pool_stats_interval) + + def run_command(self, command, check_output=True): + """Run a command for this collectd plugin. Returns a tuple with command + success and output or False and None for output. + """ + output = None + try: + if check_output: + output = subprocess.check_output(command) + else: + stdin, stdout, stderr = os.popen3(' '.join(command)) + output = stdout.read() + except Exception as exc: + collectd.error( + 'collectd-ceph-storage: {} exception: {}'.format(command, exc)) + collectd.error( + 'collectd-ceph-storage: {} traceback: {}' + .format(command, traceback.format_exc())) + return False, None + + if output is None: + collectd.error( + 'collectd-ceph-storage: failed to {}: output is None' + .format(command)) + return False, None + return True, output + +collectd_ceph_storage = CollectdCephStorage() +collectd.register_config(collectd_ceph_storage.configure_callback) diff --git a/ansible/install/roles/collectd-openstack/tasks/main.yml b/ansible/install/roles/collectd-openstack/tasks/main.yml index 36ecbe348..f149a7d86 100644 --- a/ansible/install/roles/collectd-openstack/tasks/main.yml +++ b/ansible/install/roles/collectd-openstack/tasks/main.yml @@ -20,7 +20,9 @@ # installed at this point in time. # - name: Install collectd rpms - yum: name={{ item }} state=present + yum: + name: "{{ item }}" + state: present become: true with_items: - collectd @@ -159,7 +161,19 @@ become: true when: "((gnocchi_status_controller_collectd_plugin == true and inventory_hostname == groups['controller'][0]) or ('undercloud' in group_names and {{gnocchi_status_undercloud_collectd_plugin}} == true))" -- name: Copy python plugins +- name: (All Nodes) Copy python plugins + copy: + src: "{{item.src}}" + dest: "{{item.dest}}" + owner: root + group: root + mode: 0755 + become: true + with_items: + - src: collectd_iostat_python.py + dest: /usr/local/bin/collectd_iostat_python.py + +- name: (Undercloud/Controller-0) Copy python plugins copy: src: "{{item.src}}" dest: "{{item.dest}}" @@ -172,8 +186,9 @@ dest: /usr/local/bin/collectd_gnocchi_status.py - src: collectd_rabbitmq_monitoring.py dest: /usr/local/bin/collectd_rabbitmq_monitoring.py - - src: collectd_iostat_python.py - dest: /usr/local/bin/collectd_iostat_python.py + - src: collectd_ceph_storage.py + dest: /usr/local/bin/collectd_ceph_storage.py + when: "('controller' in group_names and inventory_hostname == groups['controller'][0]) or ('undercloud' in group_names)" # Rabbitmq monitoring - name: Install pyrabbit diff --git a/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 b/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 index b7b9539c1..9d02c93bf 100644 --- a/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 +++ b/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 @@ -43,11 +43,9 @@ LoadPlugin memory LoadPlugin mysql LoadPlugin numa LoadPlugin processes -{%if gnocchi_status_controller_collectd_plugin or iostat_controller_collectd_plugin or rabbitmq_controller_collectd_plugin %} Globals true -{% endif %} LoadPlugin swap LoadPlugin tail LoadPlugin turbostat @@ -113,6 +111,31 @@ PreCacheChain "PreCache" +{% endif %} +{% endif %} +{% if ceph_controller_collectd_radosbench_plugin or ceph_controller_collectd_mon_plugin or ceph_controller_collectd_osd_plugin or ceph_controller_collectd_pg_plugin or ceph_controller_collectd_pool_plugin %} +{% if inventory_hostname == groups['controller'][0] %} + + + LogTraces true + Interactive false + ModulePath "/usr/local/bin/" + Import "collectd_ceph_storage" + + CephCluster "ceph" + + CephRadosBench {{ceph_controller_collectd_radosbench_plugin}} + CephRadosBenchInterval {{ceph_controller_collectd_radosbench_interval}} + CephMONStats {{ceph_controller_collectd_mon_plugin}} + CephMONStatsInterval {{ceph_controller_collectd_mon_interval}} + CephOSDStats {{ceph_controller_collectd_osd_plugin}} + CephOSDStatsInterval {{ceph_controller_collectd_osd_interval}} + CephPGStats {{ceph_controller_collectd_pg_plugin}} + CephPGStatsInterval {{ceph_controller_collectd_pg_interval}} + CephPoolStats {{ceph_controller_collectd_pool_plugin}} + CephPoolStatsInterval {{ceph_controller_collectd_pool_interval}} + + {% endif %} {% endif %} {% if keystone_overcloud_collectd_plugin %} diff --git a/ansible/install/roles/grafana-dashboards/files/cloud_ceph_monitoring.json b/ansible/install/roles/grafana-dashboards/files/cloud_ceph_monitoring.json index bd78bc772..788bd5528 100644 --- a/ansible/install/roles/grafana-dashboards/files/cloud_ceph_monitoring.json +++ b/ansible/install/roles/grafana-dashboards/files/cloud_ceph_monitoring.json @@ -525,6 +525,1038 @@ "title": "Ceph Process Metrics", "titleSize": "h6" }, + { + "collapse": true, + "height": "200", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-mon.*, 'gauge-', ''), 3)" + }, + { + "refId": "B", + "target": "alias($Cloud.$Node.collectd-ceph-storage-pool.gauge-number, 'Pools')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mons & Pool Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-osd.*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-pg.*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PGs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-cluster.gauge-total_*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Space", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 0, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-min_latency, 'Min')" + }, + { + "refId": "B", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-avg_latency, 'Avg')" + }, + { + "refId": "C", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-max_latency, 'Max')" + }, + { + "refId": "D", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-stddev_latency, 'stddev')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-objects, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-read_op_per_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(scale(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-write_op_per_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), -1), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool iops", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-read_bytes_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(scale(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-write_bytes_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), -1), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-pg_num, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), '_num', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-pgp_num, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), '_num', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool PG/PGP Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-size, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-kb_total, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-kb_used, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Space", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-apply_latency_ms, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-commit_latency_ms, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-num_snap_trimming, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-snap_trim_queue_len, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Snap Trim", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-bytes_used, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Bytes Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ceph Storage Python Plugins", + "titleSize": "h6" + }, { "collapse": true, "editable": true, @@ -535,7 +1567,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 7, + "id": 21, "legend": { "alignAsTable": true, "avg": false, @@ -625,7 +1657,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 8, + "id": 22, "legend": { "alignAsTable": true, "avg": false, @@ -707,7 +1739,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 9, + "id": 23, "legend": { "alignAsTable": true, "avg": false, @@ -789,7 +1821,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 10, + "id": 24, "legend": { "alignAsTable": true, "avg": false, @@ -871,7 +1903,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 11, + "id": 25, "legend": { "alignAsTable": true, "avg": false, @@ -953,7 +1985,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 12, + "id": 26, "legend": { "alignAsTable": true, "avg": false, @@ -1035,7 +2067,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 13, + "id": 27, "legend": { "alignAsTable": true, "avg": false, @@ -1105,7 +2137,7 @@ "bars": false, "datasource": null, "fill": 1, - "id": 14, + "id": 28, "legend": { "alignAsTable": true, "avg": false, @@ -1192,7 +2224,7 @@ "bars": false, "datasource": null, "fill": 1, - "id": 15, + "id": 29, "legend": { "alignAsTable": true, "avg": false, @@ -1262,7 +2294,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 16, + "id": 30, "legend": { "alignAsTable": true, "avg": false, @@ -1342,7 +2374,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 17, + "id": 31, "legend": { "alignAsTable": true, "avg": false, @@ -1412,7 +2444,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 18, + "id": 32, "legend": { "alignAsTable": true, "avg": false, @@ -1497,7 +2529,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 19, + "id": 33, "legend": { "alignAsTable": true, "avg": false, @@ -1567,7 +2599,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 20, + "id": 34, "legend": { "alignAsTable": true, "avg": false, @@ -1637,7 +2669,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 21, + "id": 35, "legend": { "alignAsTable": true, "avg": false, @@ -1724,7 +2756,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 22, + "id": 36, "legend": { "alignAsTable": true, "avg": false, @@ -1811,7 +2843,7 @@ "bars": false, "datasource": null, "fill": 0, - "id": 23, + "id": 37, "legend": { "alignAsTable": true, "avg": false, @@ -1952,6 +2984,70 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "OSD", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + } + ], + "query": "$Cloud.$Node.collectd-ceph-storage-osd-*", + "refresh": 1, + "regex": "", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Pool", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + } + ], + "query": "$Cloud.$Node.collectd-ceph-storage-pool-*", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, diff --git a/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.json.j2 b/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.json.j2 index ecec2c94d..93da73100 100644 --- a/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.json.j2 +++ b/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.json.j2 @@ -3898,6 +3898,1052 @@ }, {% endif %} {% if item.template_node_type in cephmon_groups %} + { + "collapse": true, + "height": "200", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-mon.*, 'gauge-', ''), 3)" + }, + { + "refId": "B", + "target": "alias($Cloud.$Node.collectd-ceph-storage-pool.gauge-number, 'Pools')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mons & Pool Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-osd.*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-pg.*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PGs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.collectd-ceph-storage-cluster.gauge-total_*, 'gauge-', ''), 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Space", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 0, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-min_latency, 'Min')" + }, + { + "refId": "B", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-avg_latency, 'Avg')" + }, + { + "refId": "C", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-max_latency, 'Max')" + }, + { + "refId": "D", + "target": "alias($Cloud.$Node.collectd-ceph-storage-cluster.gauge-stddev_latency, 'stddev')" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-objects, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-read_op_per_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(scale(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-write_op_per_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), -1), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool iops", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-read_bytes_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(scale(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-write_bytes_sec, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), -1), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-pg_num, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), '_num', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub(aliasSub($Cloud.$Node.$Pool.gauge-pgp_num, 'collectd-ceph-storage-pool-', ''), 'gauge-', ''), '_num', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool PG/PGP Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-size, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-kb_total, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-kb_used, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Space", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-apply_latency_ms, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-commit_latency_ms, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-num_snap_trimming, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + }, + { + "refId": "B", + "target": "aliasByNode(aliasSub(aliasSub($Cloud.$Node.$OSD.gauge-snap_trim_queue_len, 'collectd-ceph-storage-osd-', ''), 'gauge-', ''), 2, 3)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Snap Trim", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": null, + "fill": 0, + {% if vars.update({'panel_idx': (vars.panel_idx + 1)}) %} {% endif %} + "id": {{vars.panel_idx}}, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "aliasByNode(aliasSub($Cloud.$Node.$Pool.gauge-bytes_used, 'collectd-ceph-storage-pool-', ''), 2)" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool Bytes Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ceph Storage Python Plugins", + "titleSize": "h6" + }, { "title": "Ceph Mon", "height": "200px", @@ -7945,6 +8991,70 @@ "refresh": true, "type": "query" }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "OSD", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + } + ], + "query": "$Cloud.$Node.collectd-ceph-storage-osd-*", + "refresh": 1, + "regex": "", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Pool", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + } + ], + "query": "$Cloud.$Node.collectd-ceph-storage-pool-*", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allFormat": "glob", "current": { diff --git a/ansible/install/roles/graphite/files/storage-aggregation.conf b/ansible/install/roles/graphite/files/storage-aggregation.conf index 2b6d7260d..a1b70c2de 100644 --- a/ansible/install/roles/graphite/files/storage-aggregation.conf +++ b/ansible/install/roles/graphite/files/storage-aggregation.conf @@ -10,3 +10,25 @@ pattern = ^.*\.gnocchi_status\. xFilesFactor = 0.0 aggregationMethod = max + +# Similiar to above, the expectation is to run Ceph rados benchmarks at a +# rate slower than the first retention period. +[collectd-ceph-storage-cluster_gauge_max_latency] +pattern = ^.*\.collectd-ceph-storage-cluster\.gauge-max_latency +xFilesFactor = 0.0 +aggregationMethod = max + +[collectd-ceph-storage-cluster_gauge_avg_latency] +pattern = ^.*\.collectd-ceph-storage-cluster\.gauge-avg_latency +xFilesFactor = 0.0 +aggregationMethod = average + +[collectd-ceph-storage-cluster_gauge_min_latency] +pattern = ^.*\.collectd-ceph-storage-cluster\.gauge-min_latency +xFilesFactor = 0.0 +aggregationMethod = min + +[collectd-ceph-storage-cluster_gauge_stddev_latency] +pattern = ^.*\.collectd-ceph-storage-cluster\.gauge-stddev_latency +xFilesFactor = 0.0 +aggregationMethod = average