Steve Wilkerson cd4ec0b4b2 Grafana: Update Ceph dashboards for Mimic release
This updates the Ceph dashboards for Grafana, as some of the ceph
metrics have changed with the Mimic release.  This fixes issues
with the ceph OSD metrics that broke some Grafana panels, and also
removes the Ceph panel for displaying the number of monitors in
quorum, as that metric has been removed in Mimic

Change-Id: If6cbbfa7d2972ddd0e44b29a6c8277188d2d9ff0
2019-01-21 09:25:57 -06:00

16700 lines
428 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for grafana
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
grafana: docker.io/grafana/grafana:5.0.0
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
db_init: docker.io/openstackhelm/heat:newton
grafana_db_session_sync: docker.io/openstackhelm/heat:newton
helm_tests: docker.io/openstackhelm/heat:newton
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
grafana:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
user:
grafana:
uid: 104
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
grafana:
init_container: null
grafana:
replicas:
grafana: 1
lifecycle:
upgrades:
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
grafana:
timeout: 600
resources:
enabled: false
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
bootstrap:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
db_init:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
db_init_session:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
grafana_db_session_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
grafana:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
oslo_db:
namespace: null
auth:
admin:
username: root
password: password
user:
username: grafana
password: password
hosts:
default: mariadb
host_fqdn_override:
default: null
path: /grafana
scheme: mysql+pymysql
port:
mysql:
default: 3306
oslo_db_session:
namespace: null
auth:
admin:
username: root
password: password
user:
username: grafana_session
password: password
hosts:
default: mariadb
host_fqdn_override:
default: null
path: /grafana_session
scheme: mysql+pymysql
port:
mysql:
default: 3306
grafana:
name: grafana
namespace: null
auth:
admin:
username: admin
password: password
hosts:
default: grafana-dashboard
public: grafana
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: http
port:
grafana:
default: 3000
public: 80
monitoring:
name: prometheus
namespace: null
auth:
user:
username: admin
password: changeme
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
path:
default: null
scheme:
default: http
port:
api:
default: 80
public: 80
ldap:
hosts:
default: ldap
auth:
admin:
bind_dn: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- grafana-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
db_init:
services:
- endpoint: internal
service: oslo_db
db_init_session:
services:
- endpoint: internal
service: oslo_db
db_session_sync:
jobs:
- grafana-db-init-session
services:
- endpoint: internal
service: oslo_db
grafana:
jobs:
- grafana-db-init
- grafana-db-session-sync
services:
- endpoint: internal
service: oslo_db
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
tests:
services:
- endpoint: internal
service: grafana
network:
grafana:
node_port:
enabled: false
port: 30902
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
secrets:
oslo_db:
admin: grafana-db-admin
user: grafana-db-user
oslo_db_session:
admin: grafana-session-db-admin
user: grafana-session-db-user
tls:
grafana:
grafana:
public: grafana-tls-public
prometheus:
user: prometheus-user-creds
manifests:
configmap_bin: true
configmap_dashboards: true
configmap_etc: true
deployment: true
ingress: true
helm_tests: true
job_db_init: true
job_db_init_session: true
job_db_session_sync: true
job_image_repo_sync: true
network_policy: false
secret_db: true
secret_db_session: true
secret_admin_creds: true
secret_ingress_tls: true
secret_prom_creds: true
service: true
service_ingress: true
conf:
ldap:
config:
base_dns:
search: "dc=cluster,dc=local"
group_search: "ou=Groups,dc=cluster,dc=local"
filters:
search: "(uid=%s)"
group_search: "(&(objectclass=posixGroup)(memberUID=uid=%s,ou=People,dc=cluster,dc=local))"
template: |
verbose_logging = false
[[servers]]
host = "{{ tuple "ldap" "internal" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}"
port = {{ tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
use_ssl = false
start_tls = false
ssl_skip_verify = false
bind_dn = "{{ .Values.endpoints.ldap.auth.admin.bind_dn }}"
bind_password = '{{ .Values.endpoints.ldap.auth.admin.password }}'
search_filter = "{{ .Values.conf.ldap.config.filters.search }}"
search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.search }}"]
group_search_filter = "{{ .Values.conf.ldap.config.filters.group_search }}"
group_search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.group_search }}"]
[servers.attributes]
username = "uid"
surname = "sn"
member_of = "cn"
email = "mail"
[[servers.group_mappings]]
group_dn = "{{.Values.endpoints.ldap.auth.admin.bind_dn }}"
org_role = "Admin"
[[servers.group_mappings]]
group_dn = "*"
org_role = "Viewer"
provisioning:
dashboards:
apiVersion: 1
providers:
- name: 'osh-infra-dashboards'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: false
options:
path: /var/lib/grafana/dashboards
datasources:
#NOTE(srwilkers): The top key for each datasource (eg: monitoring) must
# map to the key name for the datasource's endpoint entry in the endpoints
# tree
monitoring:
name: prometheus
type: prometheus
access: proxy
orgId: 1
editable: true
basicAuth: true
grafana:
auth.ldap:
enabled: true
config_file: /etc/grafana/ldap.toml
paths:
data: /var/lib/grafana/data
plugins: /var/lib/grafana/plugins
provisioning: /var/lib/grafana/provisioning
server:
protocol: http
http_port: 3000
database:
type: mysql
session:
provider: mysql
provider_config: null
cookie_name: grafana_sess
cookie_secure: false
session_life_time: 86400
security:
admin_user: ${GF_SECURITY_ADMIN_USER}
admin_password: ${GF_SECURITY_ADMIN_PASSWORD}
cookie_username: grafana_user
cookie_remember_name: grafana_remember
login_remember_days: 7
users:
allow_sign_up: false
allow_org_create: false
auto_assign_org: true
default_theme: dark
log:
mode: console
level: info
grafana_net:
url: https://grafana.net
dashboards:
prometheus:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus which you want to monitor
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.6.0
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: text
name: Text
version: ''
annotations:
list:
- builtIn: 1
datasource: "-- Grafana --"
enable: true
hide: true
iconColor: rgba(0, 211, 255, 1)
name: Annotations & Alerts
type: dashboard
- datasource: "$datasource"
enable: true
expr: count(sum(up{instance="$instance"}) by (instance) < 1)
hide: false
iconColor: rgb(250, 44, 18)
limit: 100
name: downage
showIn: 0
step: 30s
tagKeys: instance
textFormat: prometheus down
titleFormat: Downage
type: alert
- datasource: "$datasource"
enable: true
expr: sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m]))
by (instance)
hide: false
iconColor: "#fceaca"
limit: 100
name: Reload
showIn: 0
step: 5m
tagKeys: instance
tags: []
titleFormat: Reload
type: tags
description: Dashboard for monitoring of Prometheus v2.x.x
editable: true
gnetId: 3681
graphTooltip: 1
hideControls: false
id:
links:
- icon: info
tags: []
targetBlank: true
title: 'Dashboard''s Github '
tooltip: Github repo of this dashboard
type: link
url: https://github.com/FUSAKLA/Prometheus2-grafana-dashboard
- icon: doc
tags: []
targetBlank: true
title: Prometheus Docs
tooltip: ''
type: link
url: http://prometheus.io/docs/introduction/overview/
refresh: 5m
rows:
- collapse: false
height: 161
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#bf1b00"
datasource: prometheus
decimals: 1
format: s
gauge:
maxValue: 1000000
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 41
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: time() - process_start_time_seconds{instance="$instance"}
format: time_series
instant: false
intervalFactor: 2
refId: A
thresholds: ''
title: Uptime
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#bf1b00"
datasource: prometheus
format: short
gauge:
maxValue: 1000000
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 42
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: prometheus_tsdb_head_series{instance="$instance"}
format: time_series
instant: false
intervalFactor: 2
refId: A
thresholds: '500000,800000,1000000'
title: Total count of time series
type: singlestat
valueFontSize: 150%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#d44a3a"
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 48
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: version
targets:
- expr: prometheus_build_info{instance="$instance"}
format: table
instant: true
intervalFactor: 2
refId: A
thresholds: ''
title: Version
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#d44a3a"
datasource: prometheus
decimals: 2
format: ms
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 49
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: prometheus_tsdb_head_max_time{instance="$instance"} - prometheus_tsdb_head_min_time{instance="$instance"}
format: time_series
instant: true
intervalFactor: 2
refId: A
thresholds: ''
title: Actual head block length
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- content: <img src="https://cdn.worldvectorlogo.com/logos/prometheus.svg"/ height="140px">
height: ''
id: 50
links: []
mode: html
span: 1
title: ''
transparent: true
type: text
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- "#e6522c"
- rgba(237, 129, 40, 0.89)
- "#299c46"
datasource: prometheus
decimals: 1
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 52
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: '2'
format: time_series
intervalFactor: 2
refId: A
thresholds: '10,20'
title: ''
transparent: true
type: singlestat
valueFontSize: 200%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Header instance info
titleSize: h6
- collapse: false
height: '250'
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 15
legend:
avg: true
current: false
max: false
min: false
show: false
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: true
steppedLine: false
targets:
- expr: max(prometheus_engine_query_duration_seconds{instance="$instance"}) by
(instance, slice)
format: time_series
intervalFactor: 1
legendFormat: max duration for {{slice}}
metric: prometheus_local_storage_rushed_mode
refId: A
step: 900
thresholds: []
timeFrom:
timeShift:
title: Query elapsed time
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: ''
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 17
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_head_series_created_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: created on {{ instance }}
metric: prometheus_local_storage_maintain_series_duration_seconds_count
refId: A
step: 1800
- expr: sum(increase(prometheus_tsdb_head_series_removed_total{instance="$instance"}[$aggregation_interval]))
by (instance) * -1
format: time_series
intervalFactor: 2
legendFormat: removed on {{ instance }}
refId: B
thresholds: []
timeFrom:
timeShift:
title: Head series created/deleted
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 13
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: exceeded_sample_limit on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: duplicate_timestamp on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: B
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: out_of_bounds on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: C
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: out_of_order on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: D
step: 1800
- expr: sum(increase(prometheus_rule_evaluation_failures_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: rule_evaluation_failure on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: G
step: 1800
- expr: sum(increase(prometheus_tsdb_compactions_failed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: tsdb_compactions_failed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: K
step: 1800
- expr: sum(increase(prometheus_tsdb_reloads_failures_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: tsdb_reloads_failures on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: L
step: 1800
- expr: sum(increase(prometheus_tsdb_head_series_not_found{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: head_series_not_found on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: N
step: 1800
- expr: sum(increase(prometheus_evaluator_iterations_missed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: evaluator_iterations_missed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: O
step: 1800
- expr: sum(increase(prometheus_evaluator_iterations_skipped_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: evaluator_iterations_skipped on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: P
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Prometheus errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Main info
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
description: ''
editable: true
error: false
fill: 1
grid: {}
id: 25
legend:
alignAsTable: true
avg: true
current: true
max: true
min: false
show: false
sort: max
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: prometheus_target_interval_length_seconds{instance="$instance",quantile="0.99"}
- 60
format: time_series
interval: 2m
intervalFactor: 1
legendFormat: "{{instance}}"
metric: ''
refId: A
step: 300
thresholds: []
timeFrom:
timeShift:
title: Scrape delay (counts with 1m scrape interval)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min:
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 14
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: Queue length
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_evaluator_duration_seconds{instance="$instance"}) by (instance,
quantile)
format: time_series
intervalFactor: 2
legendFormat: Queue length
metric: prometheus_local_storage_indexing_queue_length
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Rule evaulation duration
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min: '0'
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Scrape & rule duration
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 18
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(http_requests_total{instance="$instance"}[$aggregation_interval]))
by (instance, handler) > 0
format: time_series
intervalFactor: 2
legendFormat: "{{ handler }} on {{ instance }}"
metric: ''
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Request count
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 16
legend:
avg: false
current: false
hideEmpty: true
hideZero: true
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: max(sum(http_request_duration_microseconds{instance="$instance"}) by (instance,
handler, quantile)) by (instance, handler) > 0
format: time_series
hide: false
intervalFactor: 2
legendFormat: "{{ handler }} on {{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Request duration per handler
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: µs
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 19
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(http_request_size_bytes{instance="$instance", quantile="0.99"}[$aggregation_interval]))
by (instance, handler) > 0
format: time_series
hide: false
intervalFactor: 2
legendFormat: "{{ handler }} in {{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Request size by handler
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Allocated bytes: "#F9BA8F"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max count collector: "#bf1b00"
Max count harvester: "#bf1b00"
Max to persist: "#3F6833"
RSS: "#890F02"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 8
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/Max.*/"
fill: 0
linewidth: 2
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_engine_queries{instance="$instance"}) by (instance, handler)
format: time_series
intervalFactor: 2
legendFormat: 'Current count '
metric: last
refId: A
step: 1800
- expr: sum(prometheus_engine_queries_concurrent_max{instance="$instance"}) by
(instance, handler)
format: time_series
intervalFactor: 2
legendFormat: Max count
metric: last
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Cont of concurent queries
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Requests & queries
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Alert queue capacity on o collector: "#bf1b00"
Alert queue capacity on o harvester: "#bf1b00"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 20
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/.*capacity.*/"
fill: 0
linewidth: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_notifications_queue_capacity{instance="$instance"})by (instance)
format: time_series
intervalFactor: 2
legendFormat: 'Alert queue capacity '
metric: prometheus_local_storage_checkpoint_last_size_bytes
refId: A
step: 1800
- expr: sum(prometheus_notifications_queue_length{instance="$instance"})by (instance)
format: time_series
intervalFactor: 2
legendFormat: 'Alert queue size on '
metric: prometheus_local_storage_checkpoint_last_size_bytes
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Alert queue size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 21
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_notifications_alertmanagers_discovered{instance="$instance"})
by (instance)
format: time_series
intervalFactor: 2
legendFormat: Checkpoint chunks written/s
metric: prometheus_local_storage_checkpoint_series_chunks_written_sum
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Count of discovered alertmanagers
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 39
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_notifications_dropped_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: notifications_dropped on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: F
step: 1800
- expr: sum(increase(prometheus_rule_evaluation_failures_total{rule_type="alerting",instance="$instance"}[$aggregation_interval]))
by (rule_type,instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: rule_evaluation_failures on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Alerting errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Alerting
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 45
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: increase(prometheus_target_sync_length_seconds_count{scrape_job="kubernetes-service-endpoints"}[$aggregation_interval])
format: time_series
intervalFactor: 2
legendFormat: Count of target synces
refId: A
step: 240
thresholds: []
timeFrom:
timeShift:
title: Kubernetes SD sync count
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 46
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: exceeded_sample_limit on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
- expr: sum(increase(prometheus_sd_file_read_errors_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: sd_file_read_error on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: E
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Service discovery errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Service discovery
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 36
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_reloads_total{instance="$instance"}[30m]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Reloaded block from disk
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_blocks_loaded{instance="$instance"}) by (instance)
format: time_series
intervalFactor: 2
legendFormat: Loaded data blocks
metric: prometheus_local_storage_memory_chunkdescs
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Loaded data blocks
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 3
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: prometheus_tsdb_head_series{instance="$instance"}
format: time_series
intervalFactor: 2
legendFormat: Time series count
metric: prometheus_local_storage_memory_series
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Time series total count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 1
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(rate(prometheus_tsdb_head_samples_appended_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: samples/s {{instance}}
metric: prometheus_local_storage_ingested_samples_total
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Samples Appended per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: ''
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: TSDB stats
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
To persist: "#9AC48A"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 2
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/Max.*/"
fill: 0
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_head_chunks{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Head chunk count
metric: prometheus_local_storage_memory_chunks
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Head chunks count
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 35
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: max(prometheus_tsdb_head_max_time{instance="$instance"}) by (instance)
- min(prometheus_tsdb_head_min_time{instance="$instance"}) by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Length of head block
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 4
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(prometheus_tsdb_head_chunks_created_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: created on {{ instance }}
refId: B
- expr: sum(rate(prometheus_tsdb_head_chunks_removed_total{instance="$instance"}[$aggregation_interval]))
by (instance) * -1
format: time_series
intervalFactor: 2
legendFormat: deleted on {{ instance }}
refId: C
thresholds: []
timeFrom:
timeShift:
title: Head Chunks Created/Deleted per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Head block stats
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 33
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_compaction_duration_sum{instance="$instance"}[30m])
/ increase(prometheus_tsdb_compaction_duration_count{instance="$instance"}[30m]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Compaction duration
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 34
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_head_gc_duration_seconds{instance="$instance"}) by
(instance, quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} on {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Go Garbage collection duration
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 37
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_wal_truncate_duration_seconds{instance="$instance"})
by (instance, quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} on {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: WAL truncate duration seconds
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 38
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(tsdb_wal_fsync_duration_seconds{instance="$instance"}) by (instance,
quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: WAL fsync duration seconds
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Data maintenance
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Allocated bytes: "#7EB26D"
Allocated bytes - 1m max: "#BF1B00"
Allocated bytes - 1m min: "#BF1B00"
Allocated bytes - 5m max: "#BF1B00"
Allocated bytes - 5m min: "#BF1B00"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
RSS: "#447EBC"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
decimals:
editable: true
error: false
fill: 1
id: 6
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/-/"
fill: 0
- alias: collector heap size
color: "#E0752D"
fill: 0
linewidth: 2
- alias: collector kubernetes memory limit
color: "#BF1B00"
fill: 0
linewidth: 3
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(process_resident_memory_bytes{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Total resident memory - {{instance}}
metric: process_resident_memory_bytes
refId: B
step: 1800
- expr: sum(go_memstats_alloc_bytes{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Total llocated bytes - {{instance}}
metric: go_memstats_alloc_bytes
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Allocated bytes: "#F9BA8F"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
RSS: "#890F02"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(go_memstats_alloc_bytes_total{instance="$instance"}[$aggregation_interval])
format: time_series
intervalFactor: 2
legendFormat: Allocated Bytes/s
metric: go_memstats_alloc_bytes
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Allocations per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
decimals: 2
editable: true
error: false
fill: 1
id: 9
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
max: false
min: false
rightSide: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(process_cpu_seconds_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: CPU/s
metric: prometheus_local_storage_ingested_samples_total
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: CPU per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values:
- avg
yaxes:
- format: none
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: RAM&CPU
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 47
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(increase(net_conntrack_dialer_conn_failed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
hide: false
interval: ''
intervalFactor: 2
legendFormat: conntrack_dialer_conn_failed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: M
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Net errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Contrac errors
titleSize: h6
schemaVersion: 14
style: dark
tags:
- prometheus
templating:
list:
- auto: true
auto_count: 30
auto_min: 2m
current:
text: auto
value: "$__auto_interval"
hide: 0
label: aggregation intarval
name: aggregation_interval
options:
- selected: true
text: auto
value: "$__auto_interval"
- selected: false
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 2
type: interval
- allValue:
current: {}
datasource: "$datasource"
hide: 0
includeAll: false
label: Instance
multi: false
name: instance
options: []
query: label_values(prometheus_build_info, instance)
refresh: 2
regex: ''
sort: 2
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
- current:
text: Prometheus
value: Prometheus
hide: 0
label: Prometheus datasource
name: datasource
options: []
query: prometheus
refresh: 1
regex: ''
type: datasource
- current:
text: influxdb(heapster) - kokura
value: influxdb(heapster) - kokura
hide: 0
label: InfluxDB datasource
name: influx_datasource
options: []
query: influxdb
refresh: 1
regex: ''
type: datasource
time:
from: now-7d
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Prometheus2.0 (v1.0.0 by FUSAKLA)
version: 8
ceph_cluster:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Cluster
tags:
- ceph
- cluster
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 150px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_health_status{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
title: Status
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: WARNING
value: '0'
- op: "="
text: HEALTHY
value: '1'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: Pools
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 33
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Cluster Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 34
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Used Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '70,80'
title: Current Utilization
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 26
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs IN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 27
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs OUT
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 28
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 29
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs DOWN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 30
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '250,300'
title: Average PGs per OSD
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors:
Available: "#EAB839"
Total Capacity: "#447EBC"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 1
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: Total Capacity
fill: 0
linewidth: 3
stack: false
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available
refId: A
step: 60
- expr: ceph_cluster_total_used_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Used
refId: B
step: 60
- expr: ceph_cluster_total_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Total Capacity
refId: C
step: 60
timeFrom:
timeShift:
title: Capacity
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Total Capacity: "#7EB26D"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
decimals: 0
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: '300'
id: 3
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: IOPS
tooltip:
msResolution: true
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 7
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: Throughput
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
repeat:
showTitle: true
title: CLUSTER
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 18
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 12
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
timeFrom:
timeShift:
title: Objects in the Cluster
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 19
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
- expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Active
refId: B
step: 60
- expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Inconsistent
refId: C
step: 60
- expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Creating
refId: D
step: 60
- expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Recovering
refId: E
step: 60
- expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Down
refId: F
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: A
step: 60
- expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Stale
refId: B
step: 60
- expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Undersized
refId: C
step: 60
timeFrom:
timeShift:
title: Stuck PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
title: New row
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
tags: []
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 26
links: []
gnetId: 917
description: "Ceph Cluster overview.\r\n"
ceph_osd:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - OSD
tags:
- ceph
- osd
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: DOWN
to: '0.99'
- from: '0.99'
text: UP
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_up{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Status
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: OUT
to: '0.99'
- from: '0.99'
text: IN
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_in{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Available
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Total OSDs
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1: 250
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2: 300
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: true
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Average.*/"
fill: 0
stack: false
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_osd_numpg{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Number of PGs - {{ $osd }}
refId: A
step: 60
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Average Number of PGs in the Cluster
refId: B
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})*100
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '60,80'
timeFrom:
title: Utilization
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'OSD: $osd'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: true
steppedLine: false
targets:
- expr: ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ osd.$osd }}
metric: ceph_osd_used_bytes
refId: A
step: 60
- expr: ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
hide: false
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $osd }}
metric: ceph_osd_avail_bytes
refId: B
step: 60
timeFrom:
timeShift:
title: OSD Storage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 5
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 9
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: false
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 2
points: true
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $osd }}
metric: ceph_osd_avail_bytes
refId: A
step: 60
timeFrom:
timeShift:
title: Utilization Variance
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label:
logBase: 1
max:
min:
show: true
- format: none
label:
logBase: 1
max:
min:
show: true
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: OSD
multi: false
name: osd
options: []
query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, ceph_daemon)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 15m
schemaVersion: 12
version: 18
links: []
gnetId: 923
description: CEPH OSD Status.
ceph_pool:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Pools
tags:
- ceph
- pools
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
fill: 0
linewidth: 4
stack: false
- alias: "/^Raw.*$/"
color: "#BF1B00"
fill: 0
linewidth: 4
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total - {{ $pool }}
refId: A
step: 60
- expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ $pool }}
refId: B
step: 60
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $pool }}
refId: C
step: 60
- expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Raw - {{ $pool }}
refId: D
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool Storage"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: ''
title: "[[pool_name]] Pool Usage"
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'Pool: $pool'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 7
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Objects - {{ $pool_name }}
refId: A
step: 60
- expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Dirty Objects - {{ $pool_name }}
refId: B
step: 60
timeFrom:
timeShift:
title: Objects in Pool [[pool_name]]
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
id: 4
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read - {{ $pool_name }}
refId: B
step: 60
- expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Write - {{ $pool_name }}
refId: A
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool IOPS"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: IOPS
logBase: 1
max:
min: 0
show: true
- format: short
label: IOPS
logBase: 1
max:
min: 0
show: false
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read Bytes - {{ $pool_name }}
refId: A
step: 60
- expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Written Bytes - {{ $pool_name }}
refId: B
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool Throughput"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
title: New row
time:
from: now-3h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: Pool
multi: false
name: pool
options: []
query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id)
refresh: 1
regex: ''
type: query
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: Pool
multi: false
name: pool_name
options: []
query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 22
links: []
gnetId: 926
description: Ceph Pools dashboard.
elasticsearch:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.6.3
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list:
- builtIn: 1
datasource: "-- Grafana --"
enable: true
hide: true
iconColor: rgba(0, 211, 255, 1)
name: Annotations & Alerts
type: dashboard
editable: true
gnetId: 4358
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height:
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(178, 49, 13, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 5
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: (sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="green"})*2)+sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="yellow"})
format: time_series
intervalFactor: 3
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: '0,1,2'
title: Cluster health status
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: GREEN
value: '2'
- op: "="
text: YELLOW
value: '1'
- op: "="
text: RED
value: '0'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 10
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"})
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Nodes
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 9
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_number_of_data_nodes{cluster="$cluster"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Data nodes
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
hideTimeOverride: true
id: 16
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_number_of_pending_tasks{cluster="$cluster"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Pending tasks
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Cluster
titleSize: h6
- collapse: false
height: ''
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 11
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
repeat: shard_type
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_active_primary_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: active primary shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 39
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_active_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: active shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 40
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_initializing_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: initializing shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 41
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_relocating_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: relocating shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 42
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_delayed_unassigned_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: unassigned shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Shards
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 30
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_master_node="true",name=~"$node"}
format: time_series
instant: false
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - master"
metric: ''
refId: A
step: 10
- expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_data_node="true",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - data"
metric: ''
refId: B
step: 10
thresholds: []
timeFrom:
timeShift:
title: CPU usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: CPU usage
logBase: 1
max: 100
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
height: '400'
id: 31
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_jvm_memory_used_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - used: {{area}}"
metric: ''
refId: A
step: 10
- expr: elasticsearch_jvm_memory_committed_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - committed: {{area}}"
refId: B
step: 10
- expr: elasticsearch_jvm_memory_max_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - max: {{area}}"
refId: C
step: 10
thresholds: []
timeFrom:
timeShift:
title: JVM memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Memory
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 32
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: 1-(elasticsearch_filesystem_data_available_bytes{cluster="$cluster"}/elasticsearch_filesystem_data_size_bytes{cluster="$cluster",name=~"$node"})
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - {{path}}"
metric: ''
refId: A
step: 10
thresholds:
- colorMode: custom
fill: true
fillColor: rgba(216, 200, 27, 0.27)
op: gt
value: 0.8
- colorMode: custom
fill: true
fillColor: rgba(234, 112, 112, 0.22)
op: gt
value: 0.9
timeFrom:
timeShift:
title: Disk usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label: Disk Usage %
logBase: 1
max: 1
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 47
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: max
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: sent
transform: negative-Y
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_transport_tx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} -sent"
refId: D
step: 10
- expr: irate(elasticsearch_transport_rx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} -received"
refId: C
step: 10
thresholds: []
timeFrom:
timeShift:
title: Network usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: Bps
label: Bytes/sec
logBase: 1
max:
min:
show: true
- format: pps
label: ''
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: System
titleSize: h6
- collapse: false
height: ''
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 1
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_docs{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 24
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents indexed rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: index calls/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 25
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_docs_deleted{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents deleted rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 26
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents merged rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Documents
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 48
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - indexing"
metric: ''
refId: A
step: 4
- expr: irate(elasticsearch_indices_search_query_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - query"
refId: B
step: 4
- expr: irate(elasticsearch_indices_search_fetch_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - fetch"
refId: C
step: 4
- expr: irate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - merges"
refId: D
step: 4
- expr: irate(elasticsearch_indices_refresh_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - refresh"
refId: E
step: 4
- expr: irate(elasticsearch_indices_flush_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - flush"
refId: F
step: 4
thresholds: []
timeFrom:
timeShift:
title: Total Operations rate
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Operations/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 49
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - indexing"
metric: ''
refId: A
step: 4
- expr: irate(elasticsearch_indices_search_query_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - query"
refId: B
step: 4
- expr: irate(elasticsearch_indices_search_fetch_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - fetch"
refId: C
step: 4
- expr: irate(elasticsearch_indices_merges_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - merges"
refId: D
step: 4
- expr: irate(elasticsearch_indices_refresh_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - refresh"
refId: E
step: 4
- expr: irate(elasticsearch_indices_flush_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - flush"
refId: F
step: 4
thresholds: []
timeFrom:
timeShift:
title: Total Operations time
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Total Operations stats
titleSize: h6
- collapse: false
height: ''
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 33
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: 'rate(elasticsearch_indices_search_query_time_seconds{cluster="$cluster",name=~"$node"}[$interval]) '
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Query time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 5
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Indexing time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 3
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_merges_total_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Merging time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Times
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 4
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_fielddata_memory_size_bytes{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Field data memory size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Memory
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 34
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_fielddata_evictions{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Field data evictions
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Evictions/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 35
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_query_cache_memory_size_bytes{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Query cache size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Size
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 36
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_query_cache_evictions{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Query cache evictions
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Evictions/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Caches
titleSize: h6
- collapse: false
height: 728
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 45
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: ' irate(elasticsearch_thread_pool_rejected_count{cluster="$cluster",name=~"$node"}[$interval])'
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations rejected
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 46
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations queued
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
height: ''
id: 43
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool threads active
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 44
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_thread_pool_completed_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations completed
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Thread Pool
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 7
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}} - {{gc}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: GC count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: GCs
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 27
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}} - {{gc}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: GC time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: JVM Garbage Collection
titleSize: h6
schemaVersion: 14
style: dark
tags:
- elasticsearch
- App
templating:
list:
- auto: true
auto_count: 30
auto_min: 10s
current:
text: auto
value: "$__auto_interval"
hide: 0
label: Interval
name: interval
options:
- selected: true
text: auto
value: "$__auto_interval"
- selected: false
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 2
type: interval
- allValue:
current: {}
datasource: "prometheus"
hide: 0
includeAll: false
label: Instance
multi: false
name: cluster
options: []
query: label_values(elasticsearch_cluster_health_status,cluster)
refresh: 1
regex: ''
sort: 1
tagValuesQuery:
tags: []
tagsQuery:
type: query
useTags: false
- allValue:
current: {}
datasource: "prometheus"
hide: 0
includeAll: true
label: node
multi: true
name: node
options: []
query: label_values(elasticsearch_process_cpu_percent,name)
refresh: 1
regex: ''
sort: 1
tagValuesQuery:
tags: []
tagsQuery:
type: query
useTags: false
time:
from: now-12h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Elasticsearch
version: 1
description: Elasticsearch detailed dashboard
hosts_containers:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.3.0
id:
title: Container Metrics (cAdvisor)
description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU
/ Memory / Filesystem usage as well as individual pod, containers, systemd services
statistics. Uses cAdvisor metrics only.
tags:
- kubernetes
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 200px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: 200px
id: 32
isNew: true
legend:
alignAsTable: false
avg: true
current: true
max: false
min: false
rightSide: false
show: false
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
legendFormat: Received
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))'
interval: 10s
intervalFactor: 1
legendFormat: Sent
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Network I/O pressure
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: Bps
label:
logBase: 1
max:
min:
show: false
title: Network I/O pressure
- collapse: false
editable: true
height: 250px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 4
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
/ sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster memory usage
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 6
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
/ sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster CPU usage (5m avg)
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
/ sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
* 100
interval: 10s
intervalFactor: 1
legendFormat: ''
metric: ''
refId: A
step: 10
thresholds: 65, 90
title: Cluster filesystem usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 9
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 20%
prefix: ''
prefixFontSize: 20%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 11
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 12
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 13
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 14
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: false
title: Total usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 17
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: Pods CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
showTitle: false
title: Pods CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 23
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (systemd_service_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: System services CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 24
isNew: true
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: true
show: true
sideWidth:
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_cpu
refId: A
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_cpu
refId: B
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_cpu
refId: C
step: 10
timeFrom:
timeShift:
title: Containers CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers CPU usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: All processes CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
showTitle: false
title: All processes CPU usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 25
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: Pods memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 26
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"})
by (systemd_service_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: System services memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 27
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"})
by (container_name, pod_name)
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_memory_usage:sort_desc
refId: A
step: 10
- expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, name, image)
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_memory_usage:sort_desc
refId: B
step: 10
- expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_memory_usage:sort_desc
refId: C
step: 10
timeFrom:
timeShift:
title: Containers memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers memory usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 28
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"})
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: All processes memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes memory usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 16
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ pod_name }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ pod_name }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Pods network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods network I/O
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 30
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: B
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: D
step: 10
- expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: C
step: 10
- expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: E
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: F
step: 10
timeFrom:
timeShift:
title: Containers network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers network I/O
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 29
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ id }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ id }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: All processes network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes network I/O
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: true
multi: false
name: Node
options: []
query: label_values(kubernetes_io_hostname)
refresh: 1
type: query
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 13
links: []
gnetId: 315
rabbitmq:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.2.0
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId: 2121
graphTooltip: 0
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 266
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
refId: A
step: 2
thresholds: Up,Down
timeFrom: 30s
title: RabbitMQ Server
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: Down
value: '0'
- op: "="
text: Up
value: '1'
valueName: current
- alert:
conditions:
- evaluator:
params:
- 1
type: lt
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
- evaluator:
params: []
type: no_value
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
executionErrorState: alerting
frequency: 60s
handler: 1
message: Some of the RabbitMQ node is down
name: Node Stats alert
noDataState: no_data
notifications: []
aliasColors: {}
bars: true
datasource: prometheus
decimals: 0
fill: 1
id: 12
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
show: true
total: false
values: true
lines: false
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 9
stack: false
steppedLine: false
targets:
- expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}"
metric: rabbitmq_running
refId: A
step: 2
thresholds:
- colorMode: critical
fill: true
line: true
op: lt
value: 1
timeFrom: 30s
timeShift:
title: Node up Stats
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 6
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:exchanges"
metric: rabbitmq_exchangesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Exchanges
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 4
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:channels"
metric: rabbitmq_channelsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Channels
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 3
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:consumers"
metric: rabbitmq_consumersTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Consumers
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 5
legend:
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:connections"
metric: rabbitmq_connectionsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Connections
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 7
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:queues"
metric: rabbitmq_queuesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Queues
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 8
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:ready"
metric: rabbitmq_queue_messages_ready
refId: A
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:published"
metric: rabbitmq_queue_messages_published_total
refId: B
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:delivered"
metric: rabbitmq_queue_messages_delivered_total
refId: C
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:unack"
metric: ack
refId: D
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages/host
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 2
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{queue}}:{{durable}}"
metric: rabbitmq_queue_messages
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages / Queue
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 9
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: rabbitmq_node_mem_used
refId: A
step: 2
- expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:limit"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: decbytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 10
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: FIle descriptors
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 11
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: ''
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Sockets
tooltip:
shared: true
sort: 0
value_type: individual
transparent: false
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- current:
tags: []
text: Prometheus
value: Prometheus
hide: 0
label:
name: datasource
options: []
query: prometheus
refresh: 1
regex: ''
type: datasource
- current: {}
hide: 0
label: null
name: rabbit
options: []
type: query
query: label_values(rabbitmq_up, release_group)
refresh: 1
sort: 1
datasource: prometheus
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: RabbitMQ Metrics
version: 17
description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections,
Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.'
kubernetes_capacity_planning:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: ''
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100
hide: false
intervalFactor: 10
legendFormat: ''
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(node_load1)
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: sum(node_load5)
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: sum(node_load15)
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)
intervalFactor: 2
legendFormat: memory usage
metric: memo
refId: A
step: 10
target: ''
- expr: sum(node_memory_Buffers)
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: memo
refId: B
step: 10
target: ''
- expr: sum(node_memory_Cached)
interval: ''
intervalFactor: 2
legendFormat: memory cached
metric: memo
refId: C
step: 10
target: ''
- expr: sum(node_memory_MemFree)
interval: ''
intervalFactor: 2
legendFormat: memory free
metric: memo
refId: D
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
intervalFactor: 2
metric: ''
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 246
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_disk_bytes_read[5m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum(rate(node_disk_bytes_written[5m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum(rate(node_disk_io_time_ms[5m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 12
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 276
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 11
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(kube_pod_info)
format: time_series
intervalFactor: 2
legendFormat: Current number of Pods
refId: A
step: 10
- expr: sum(kube_node_status_capacity_pods)
format: time_series
intervalFactor: 2
legendFormat: Maximum capacity of pods
refId: B
step: 10
thresholds: []
timeFrom:
timeShift:
title: Cluster Pod Utilization
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 60
target: ''
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Kubernetes Capacity Planning
version: 4
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
kubernetes_cluster_status:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 129
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Control Plane UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: UP
value: 'null'
valueName: total
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '3,5'
title: Alerts Firing
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Cluster Health
titleSize: h6
- collapse: false
height: 168
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 1
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: API Servers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 2
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Controller Managers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 3
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Schedulers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
hideTimeOverride: false
id: 4
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h])
> 5)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Crashlooping Control Plane Pods
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Control Plane Status
titleSize: h6
- collapse: false
height: 158
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m]))
* 100)) / count(node_cpu{job="node-exporter",mode="idle"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: CPU Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Memory Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 9
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Filesystem Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Capacity Planing
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: ''
title: Kubernetes Cluster Status
version: 3
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
nodes:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: Dashboard to get an overview of one server
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m]))
* 100)
hide: false
intervalFactor: 10
legendFormat: "{{cpu}}"
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max: 100
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: node_load1{instance="$server"}
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: node_load5{instance="$server"}
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: node_load15{instance="$server"}
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"}
- node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}
hide: false
interval: ''
intervalFactor: 2
legendFormat: memory used
metric: ''
refId: C
step: 10
- expr: node_memory_Buffers{instance="$server"}
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: ''
refId: E
step: 10
- expr: node_memory_Cached{instance="$server"}
intervalFactor: 2
legendFormat: memory cached
metric: ''
refId: F
step: 10
- expr: node_memory_MemFree{instance="$server"}
intervalFactor: 2
legendFormat: memory free
metric: ''
refId: D
step: 10
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} -
node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"})
/ node_memory_MemTotal{instance="$server"}) * 100
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"}))
/ sum(node_filesystem_size{device!="rootfs",instance="$server"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: server
options: []
query: label_values(node_boot_time, instance)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Nodes
version: 2
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
openstack_control_plane:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: text
name: Text
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 24
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=keystone
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_keystone_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Keystone
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=glance
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_glance_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Glance
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(202, 58, 40, 0.86)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=heat
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_heat_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Heat
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=neutron
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_neutron_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Neutron
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 20
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=nova
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_nova_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Nova
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 19
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=swift
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_swift_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Ceph
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 18
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=cinder
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_cinder_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Cinder
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 17
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=placement
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_placement_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Placement
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 16
interval: "> 60s"
links:
- dashboard: RabbitMQ Metrics
name: Drilldown dashboard
title: RabbitMQ Metrics
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(rabbitmq_up)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: RabbitMQ
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 15
interval: "> 60s"
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(mysql_global_status_wsrep_ready)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: MariaDB
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(225, 177, 40, 0.59)
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 14
interval: "> 60s"
links:
- dashboard: Nginx Stats
name: Drilldown dashboard
title: Nginx Stats
type: dashboard
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: '1'
text: OK
to: '99999999999999'
- from: '0'
text: CRIT
to: '0'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: sum_over_time(nginx_connections_total{type="active", namespace="openstack"}[5m])
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '0,1'
title: Nginx
type: singlestat
valueFontSize: 50%
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval: "> 60s"
links:
- dashboard: Memcached
name: Drilldown dashboard
title: Memcached
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(memcached_up)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Memcached
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: OpenStack Services
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 11
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + openstack_total_free_vcpus{job="openstack-metrics",
region="$region"}
format: time_series
function: min
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"}
format: time_series
function: max
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: VCPUs (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 12
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + openstack_total_free_ram_MB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: RAM (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: mbytes
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 13
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + openstack_total_free_disk_GB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: Disk (used vs total)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: gbytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Virtual resources
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
enable: true
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: region
options: []
query: label_values(openstack_exporter_cache_refresh_duration_seconds, region)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
collapse: false
enable: true
notice: false
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
status: Stable
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
type: timepicker
timezone: browser
title: OpenStack Metrics
version: 2
nginx_stats:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
annotations:
list: []
description: Show stats from the hnlq715/nginx-vts-exporter.
editable: true
gnetId: 2949
graphTooltip: 0
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(nginx_upstream_responses_total{upstream=~"^$Upstream$"}) by (status_code,
upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ status_code }}.{{ upstream }}"
metric: nginx_upstream_response
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: HTTP Response Codes by Upstream
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_upstream_requests_total{upstream=~"^$Upstream$"}[5m]))
by (upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ upstream }}"
metric: nginx_upstream_requests
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Upstream Requests rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_upstream_bytes_total{upstream=~"^$Upstream$"}[5m])) by
(direction, upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ direction }}.{{ upstream }}"
metric: nginx_upstream_bytes
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Upstream Bytes Transfer rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 1
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_connections_total[5m])) by (type)
format: time_series
intervalFactor: 2
legendFormat: "{{ type }}"
metric: nginx_server_connections
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Connections rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 4
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_cache_total{ server_zone=~"$ingress"}[5m])) by (server_zone,
type)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ type }}.{{ server_zone }}"
metric: nginx_server_cache
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Cache Action rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_requests_total{ server_zone=~"$ingress" }[5m])) by (server_zone)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ server_zone }}"
metric: nginx_server_requests
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Requests rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 2
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_bytes_total{ server_zone=~"$ingress" }[5m])) by (direction,
server_zone)
format: time_series
intervalFactor: 2
legendFormat: "{{ direction }}.{{ server_zone }}"
metric: nginx_server_bytes
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Bytes Transferred rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags:
- prometheus
- nginx
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: true
name: Upstream
options: []
query: label_values(nginx_upstream_bytes_total, upstream)
refresh: 1
regex: ''
sort: 1
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: true
name: ingress
options: []
query: label_values(nginx_bytes_total, server_zone)
refresh: 1
regex: "/^[^\\*_]+$/"
sort: 1
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Nginx Stats
version: 13
openstack-service:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
enable: true
list: []
editable: true
gnetId:
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(225, 177, 40, 0.59)
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval: "> 60s"
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_[[Service]]_api{job="openstack-metrics"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '0,1'
title: ''
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: CRITICAL
value: '0'
- op: "="
text: OK
value: '1'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval: "> 60s"
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- column: value
condition: ''
expr: sum(nginx_responses_total{server_zone=~"[[Service]].*", status_code="5xx"})
fill: ''
format: time_series
function: count
groupBy:
- interval: auto
params:
- auto
type: time
- params:
- '0'
type: fill
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
tags: []
thresholds: ''
title: HTTP 5xx errors
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
id: 7
interval: ">60s"
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 8
stack: false
steppedLine: false
targets:
- expr: sum(nginx_upstream_response_msecs_avg{upstream=~"openstack-[[Service]].*"})
by (upstream)
format: time_series
intervalFactor: 2
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: HTTP response time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
id: 9
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: true
targets:
- alias: healthy
column: value
expr: openstack_check_[[Service]]_api
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
select: []
step: 120
tags: []
thresholds: []
timeFrom:
timeShift:
title: API Availability
tooltip:
msResolution: false
shared: false
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label: ''
logBase: 1
max: 1
min: 0
show: false
- format: short
logBase: 1
max:
min:
show: false
- aliasColors:
'{status_code="2xx"}': "#629E51"
'{status_code="5xx"}': "#BF1B00"
bars: true
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
id: 8
interval: "> 60s"
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
max: false
min: false
rightSide: false
show: true
total: false
values: false
lines: false
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 8
stack: true
steppedLine: false
targets:
- expr: sum(nginx_responses_total{server_zone=~"[[Service]].*"}) by (status_code)
format: time_series
intervalFactor: 2
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Number of HTTP responses
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Service Status
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
enable: true
list:
- allValue:
current:
tags: []
text: cinder
value: cinder
hide: 0
includeAll: false
label:
multi: false
name: Service
options:
- selected: false
text: nova
value: nova
- selected: false
text: glance
value: glance
- selected: false
text: keystone
value: keystone
- selected: true
text: cinder
value: cinder
- selected: false
text: heat
value: heat
- selected: false
text: placement
value: placement
- selected: false
text: neutron
value: neutron
query: nova,glance,keystone,cinder,heat,placement,neutron
type: custom
time:
from: now-1h
to: now
timepicker:
collapse: false
enable: true
notice: false
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
status: Stable
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
type: timepicker
timezone: browser
title: Openstack Service
version: 4
coredns:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.3
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
annotations:
list: []
editable: true
gnetId: 5926
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 1
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
by (proto)
format: time_series
intervalFactor: 2
legendFormat: "{{proto}}"
refId: A
step: 60
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
format: time_series
intervalFactor: 2
legendFormat: total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (total)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 12
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
- alias: other
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_type_count_total{instance=~"$instance"}[5m]))
by (type)
intervalFactor: 2
legendFormat: "{{type}}"
refId: A
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (by qtype)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 2
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
by (zone)
intervalFactor: 2
legendFormat: "{{zone}}"
refId: A
step: 60
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (by zone)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_do_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: DO
refId: A
step: 40
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: total
refId: B
step: 40
thresholds: []
timeFrom:
timeShift:
title: Requests (DO bit)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: tcp:90
yaxis: 2
- alias: 'tcp:99 '
yaxis: 2
- alias: tcp:50
yaxis: 2
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:99 "
refId: A
step: 60
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:90"
refId: B
step: 60
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:50"
refId: C
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (size, udp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 14
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: tcp:90
yaxis: 1
- alias: 'tcp:99 '
yaxis: 1
- alias: tcp:50
yaxis: 1
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:99 "
refId: A
step: 60
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:90"
refId: B
step: 60
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:50"
refId: C
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (size,tcp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_response_rcode_count_total{instance=~"$instance"}[5m]))
by (rcode)
intervalFactor: 2
legendFormat: "{{rcode}}"
refId: A
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (by rcode)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le, job))
intervalFactor: 2
legendFormat: 99%
refId: A
step: 40
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le))
intervalFactor: 2
legendFormat: 90%
refId: B
step: 40
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le))
intervalFactor: 2
legendFormat: 50%
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (duration)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: udp:50%
yaxis: 1
- alias: tcp:50%
yaxis: 2
- alias: tcp:90%
yaxis: 2
- alias: tcp:99%
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:99%"
refId: A
step: 40
- expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance="$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:90%"
refId: B
step: 40
- expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:50%"
metric: ''
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (size, udp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 13
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: udp:50%
yaxis: 1
- alias: tcp:50%
yaxis: 1
- alias: tcp:90%
yaxis: 1
- alias: tcp:99%
yaxis: 1
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:99%"
refId: A
step: 40
- expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:90%"
refId: B
step: 40
- expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le, proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:50%"
metric: ''
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (size, tcp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 15
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(coredns_cache_size{instance=~"$instance"}) by (type)
intervalFactor: 2
legendFormat: "{{type}}"
refId: A
step: 40
thresholds: []
timeFrom:
timeShift:
title: Cache (size)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 16
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: misses
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_cache_hits_total{instance=~"$instance"}[5m])) by (type)
intervalFactor: 2
legendFormat: hits:{{type}}
refId: A
step: 40
- expr: sum(rate(coredns_cache_misses_total{instance=~"$instance"}[5m])) by (type)
intervalFactor: 2
legendFormat: misses
refId: B
step: 40
thresholds: []
timeFrom:
timeShift:
title: Cache (hitrate)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
schemaVersion: 14
style: dark
tags:
- dns
- coredns
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: true
label: Instance
multi: false
name: instance
options: []
query: up{job="coredns"}
refresh: 1
regex: .*instance="(.*?)".*
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-3h
to: now
timepicker:
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: utc
title: CoreDNS
version: 3
description: A dashboard for the CoreDNS DNS server.