Grafana: Support multiple Ceph clusters with dashboards

This updates the Grafana Ceph dashboards to use templating to
determine which ceph-mgr to use for displaying ceph related
metrics.  This required setting the appropriate labels on the
ceph-mgr service to be able to distinguish between releases

Change-Id: Id2eceacadc5b6366d7bc6668bc16ccf5ba878e4a
This commit is contained in:
Steve Wilkerson 2018-10-16 09:50:54 -05:00
parent a4111037b0
commit f3d8bda9d6
2 changed files with 81 additions and 49 deletions

View File

@ -22,6 +22,8 @@ apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: ceph-mgr name: ceph-mgr
labels:
{{ tuple $envAll "ceph" "manager" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
annotations: annotations:
{{- if .Values.monitoring.prometheus.enabled }} {{- if .Values.monitoring.prometheus.enabled }}
{{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} {{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}

View File

@ -3289,7 +3289,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: count(ceph_health_status) - expr: count(ceph_health_status{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
refId: A refId: A
@ -3355,7 +3355,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: ceph_mon_quorum_count - expr: ceph_mon_quorum_count{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3416,7 +3416,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: true show: true
targets: targets:
- expr: count(ceph_pool_max_avail) - expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3477,7 +3477,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: true show: true
targets: targets:
- expr: ceph_cluster_total_bytes - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3538,7 +3538,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: true show: true
targets: targets:
- expr: ceph_cluster_total_used_bytes - expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3599,7 +3599,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes - expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3665,7 +3665,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: count(ceph_osd_in) - expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3725,7 +3725,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: count(ceph_osd_metadata) - count(ceph_osd_in) - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3785,7 +3785,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: sum(ceph_osd_up) - expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3845,7 +3845,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: count(ceph_osd_metadata) - count(ceph_osd_up) - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3905,7 +3905,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: true show: true
targets: targets:
- expr: avg(ceph_osd_numpg) - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -3973,7 +3973,7 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_cluster_total_bytes - ceph_cluster_total_used_bytes - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Available legendFormat: Available
@ -4060,13 +4060,13 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: sum(ceph_osd_op_w) - expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Write legendFormat: Write
refId: A refId: A
step: 60 step: 60
- expr: sum(ceph_osd_op_r) - expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Read legendFormat: Read
@ -4133,13 +4133,13 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: sum(ceph_osd_op_in_bytes) - expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Write legendFormat: Write
refId: A refId: A
step: 60 step: 60
- expr: sum(ceph_osd_op_out_bytes) - expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Read legendFormat: Read
@ -4214,7 +4214,7 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_cluster_total_objects - expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Total legendFormat: Total
@ -4282,37 +4282,37 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: sum(ceph_osd_numpg) - expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Total legendFormat: Total
refId: A refId: A
step: 60 step: 60
- expr: sum(ceph_pg_active) - expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Active legendFormat: Active
refId: B refId: B
step: 60 step: 60
- expr: sum(ceph_pg_inconsistent) - expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Inconsistent legendFormat: Inconsistent
refId: C refId: C
step: 60 step: 60
- expr: sum(ceph_pg_creating) - expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Creating legendFormat: Creating
refId: D refId: D
step: 60 step: 60
- expr: sum(ceph_pg_recovering) - expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Recovering legendFormat: Recovering
refId: E refId: E
step: 60 step: 60
- expr: sum(ceph_pg_down) - expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Down legendFormat: Down
@ -4380,19 +4380,19 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: sum(ceph_pg_degraded) - expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Degraded legendFormat: Degraded
refId: A refId: A
step: 60 step: 60
- expr: sum(ceph_pg_stale) - expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Stale legendFormat: Stale
refId: B refId: B
step: 60 step: 60
- expr: sum(ceph_pg_undersized) - expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Undersized legendFormat: Undersized
@ -4450,6 +4450,16 @@ conf:
- 30d - 30d
templating: templating:
list: list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true - auto: true
auto_count: 10 auto_count: 10
auto_min: 1m auto_min: 1m
@ -4599,7 +4609,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: ceph_osd_up{ceph_daemon="osd.$osd"} - expr: ceph_osd_up{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
refId: A refId: A
@ -4672,7 +4682,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: ceph_osd_in{ceph_daemon="osd.$osd"} - expr: ceph_osd_in{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
refId: A refId: A
@ -4739,7 +4749,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: count(ceph_osd_metadata) - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
refId: A refId: A
@ -4807,13 +4817,13 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd"} - expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Number of PGs - {{ osd.$osd }} legendFormat: Number of PGs - {{ osd.$osd }}
refId: A refId: A
step: 60 step: 60
- expr: avg(ceph_osd_numpg) - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Average Number of PGs in the Cluster legendFormat: Average Number of PGs in the Cluster
@ -4888,7 +4898,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: true show: true
targets: targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"})*100 - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"})*100
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: '' legendFormat: ''
@ -4948,14 +4958,14 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"} - expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Used - {{ osd.$osd }} legendFormat: Used - {{ osd.$osd }}
metric: ceph_osd_used_bytes metric: ceph_osd_used_bytes
refId: A refId: A
step: 60 step: 60
- expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"} - expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
hide: false hide: false
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
@ -5024,7 +5034,7 @@ conf:
stack: false stack: false
steppedLine: false steppedLine: false
targets: targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"}) - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Available - {{ osd.$osd }} legendFormat: Available - {{ osd.$osd }}
@ -5082,6 +5092,16 @@ conf:
- 30d - 30d
templating: templating:
list: list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true - auto: true
auto_count: 10 auto_count: 10
auto_min: 1m auto_min: 1m
@ -5140,7 +5160,7 @@ conf:
multi: false multi: false
name: osd name: osd
options: [] options: []
query: label_values(ceph_osd_metadata, id) query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, id)
refresh: 1 refresh: 1
regex: '' regex: ''
type: query type: query
@ -5239,25 +5259,25 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_pool_max_avail{pool_id=~"$pool"} - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Total - {{ $pool }} legendFormat: Total - {{ $pool }}
refId: A refId: A
step: 60 step: 60
- expr: ceph_pool_bytes_used{pool_id=~"$pool"} - expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Used - {{ $pool }} legendFormat: Used - {{ $pool }}
refId: B refId: B
step: 60 step: 60
- expr: ceph_pool_max_avail{pool_id=~"$pool"} - ceph_pool_bytes_used{pool_id=~"$pool"} - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Available - {{ $pool }} legendFormat: Available - {{ $pool }}
refId: C refId: C
step: 60 step: 60
- expr: ceph_pool_raw_bytes_used{pool_id=~"$pool"} - expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Raw - {{ $pool }} legendFormat: Raw - {{ $pool }}
@ -5333,7 +5353,7 @@ conf:
lineColor: rgb(31, 120, 193) lineColor: rgb(31, 120, 193)
show: false show: false
targets: targets:
- expr: (ceph_pool_bytes_used{pool_id=~"$pool"} / ceph_pool_max_avail{pool_id=~"$pool"}) - expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"})
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
refId: A refId: A
@ -5388,13 +5408,13 @@ conf:
stack: false stack: false
steppedLine: false steppedLine: false
targets: targets:
- expr: ceph_pool_objects{pool_id=~"$pool"} - expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Objects - {{ $pool_name }} legendFormat: Objects - {{ $pool_name }}
refId: A refId: A
step: 60 step: 60
- expr: ceph_pool_dirty{pool_id=~"$pool"} - expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Dirty Objects - {{ $pool_name }} legendFormat: Dirty Objects - {{ $pool_name }}
@ -5462,13 +5482,13 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: irate(ceph_pool_rd{pool_id=~"$pool"}[3m]) - expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Read - {{ $pool_name}} legendFormat: Read - {{ $pool_name}}
refId: B refId: B
step: 60 step: 60
- expr: irate(ceph_pool_wr{pool_id=~"$pool"}[3m]) - expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Write - {{ $pool_name }} legendFormat: Write - {{ $pool_name }}
@ -5535,13 +5555,13 @@ conf:
stack: true stack: true
steppedLine: false steppedLine: false
targets: targets:
- expr: irate(ceph_pool_rd_bytes{pool_id="$pool"}[3m]) - expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Read Bytes - {{ $pool_name }} legendFormat: Read Bytes - {{ $pool_name }}
refId: A refId: A
step: 60 step: 60
- expr: irate(ceph_pool_wr_bytes{pool_id="$pool"}[3m]) - expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval" interval: "$interval"
intervalFactor: 1 intervalFactor: 1
legendFormat: Written Bytes - {{ $pool_name }} legendFormat: Written Bytes - {{ $pool_name }}
@ -5599,6 +5619,16 @@ conf:
- 30d - 30d
templating: templating:
list: list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true - auto: true
auto_count: 10 auto_count: 10
auto_min: 1m auto_min: 1m
@ -5657,7 +5687,7 @@ conf:
multi: false multi: false
name: pool name: pool
options: [] options: []
query: label_values(ceph_pool_objects, pool_id) query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id)
refresh: 1 refresh: 1
regex: '' regex: ''
type: query type: query
@ -5669,7 +5699,7 @@ conf:
multi: false multi: false
name: pool_name name: pool_name
options: [] options: []
query: label_values(ceph_pool_metadata{pool_id="[[pool]]" }, name) query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name)
refresh: 1 refresh: 1
regex: '' regex: ''
type: query type: query