# Copyright 2017 The Openstack-Helm Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Default values for grafana # This is a YAML-formatted file. # Declare variables to be passed into your templates. images: tags: grafana: docker.io/grafana/grafana:5.0.0 dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1 db_init: docker.io/openstackhelm/heat:newton grafana_db_session_sync: docker.io/openstackhelm/heat:newton helm_tests: docker.io/openstackhelm/heat:newton image_repo_sync: docker.io/docker:17.07.0 pull_policy: IfNotPresent local_registry: active: false exclude: - dep_check - image_repo_sync labels: grafana: node_selector_key: openstack-control-plane node_selector_value: enabled job: node_selector_key: openstack-control-plane node_selector_value: enabled test: node_selector_key: openstack-control-plane node_selector_value: enabled pod: user: grafana: uid: 104 affinity: anti: type: default: preferredDuringSchedulingIgnoredDuringExecution topologyKey: default: kubernetes.io/hostname mounts: grafana: init_container: null grafana: replicas: grafana: 1 lifecycle: upgrades: deployments: revision_history: 3 pod_replacement_strategy: RollingUpdate rolling_update: max_unavailable: 1 max_surge: 3 termination_grace_period: grafana: timeout: 600 resources: enabled: false jobs: image_repo_sync: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" bootstrap: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" db_init: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" db_init_session: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" grafana_db_session_sync: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" tests: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" grafana: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" endpoints: cluster_domain_suffix: cluster.local local_image_registry: name: docker-registry namespace: docker-registry hosts: default: localhost internal: docker-registry node: localhost host_fqdn_override: default: null port: registry: node: 5000 oslo_db: namespace: null auth: admin: username: root password: password user: username: grafana password: password hosts: default: mariadb host_fqdn_override: default: null path: /grafana scheme: mysql+pymysql port: mysql: default: 3306 oslo_db_session: namespace: null auth: admin: username: root password: password user: username: grafana_session password: password hosts: default: mariadb host_fqdn_override: default: null path: /grafana_session scheme: mysql+pymysql port: mysql: default: 3306 grafana: name: grafana namespace: null auth: admin: username: admin password: password hosts: default: grafana-dashboard public: grafana host_fqdn_override: default: null # NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public # endpoints using the following format: # public: # host: null # tls: # crt: null # key: null path: default: null scheme: default: http port: grafana: default: 3000 public: 80 monitoring: name: prometheus namespace: null auth: user: username: admin password: changeme hosts: default: prom-metrics public: prometheus host_fqdn_override: default: null path: default: null scheme: default: http port: api: default: 80 public: 80 ldap: hosts: default: ldap auth: admin: bind_dn: "cn=admin,dc=cluster,dc=local" password: password host_fqdn_override: default: null path: default: "ou=People,dc=cluster,dc=local" scheme: default: ldap port: ldap: default: 389 dependencies: dynamic: common: local_image_registry: jobs: - grafana-image-repo-sync services: - endpoint: node service: local_image_registry static: db_init: services: - endpoint: internal service: oslo_db db_init_session: services: - endpoint: internal service: oslo_db db_session_sync: jobs: - grafana-db-init-session services: - endpoint: internal service: oslo_db grafana: jobs: - grafana-db-init - grafana-db-session-sync services: - endpoint: internal service: oslo_db image_repo_sync: services: - endpoint: internal service: local_image_registry tests: services: - endpoint: internal service: grafana network: grafana: node_port: enabled: false port: 30902 ingress: public: true classes: namespace: "nginx" cluster: "nginx-cluster" annotations: nginx.ingress.kubernetes.io/rewrite-target: / secrets: oslo_db: admin: grafana-db-admin user: grafana-db-user oslo_db_session: admin: grafana-session-db-admin user: grafana-session-db-user tls: grafana: grafana: public: grafana-tls-public prometheus: user: prometheus-user-creds manifests: configmap_bin: true configmap_dashboards: true configmap_etc: true deployment: true ingress: true helm_tests: true job_db_init: true job_db_init_session: true job_db_session_sync: true job_image_repo_sync: true network_policy: false secret_db: true secret_db_session: true secret_admin_creds: true secret_ingress_tls: true secret_prom_creds: true service: true service_ingress: true conf: ldap: config: base_dns: search: "dc=cluster,dc=local" group_search: "ou=Groups,dc=cluster,dc=local" filters: search: "(uid=%s)" group_search: "(&(objectclass=posixGroup)(memberUID=uid=%s,ou=People,dc=cluster,dc=local))" template: | verbose_logging = false [[servers]] host = "{{ tuple "ldap" "internal" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}" port = {{ tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} use_ssl = false start_tls = false ssl_skip_verify = false bind_dn = "{{ .Values.endpoints.ldap.auth.admin.bind_dn }}" bind_password = '{{ .Values.endpoints.ldap.auth.admin.password }}' search_filter = "{{ .Values.conf.ldap.config.filters.search }}" search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.search }}"] group_search_filter = "{{ .Values.conf.ldap.config.filters.group_search }}" group_search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.group_search }}"] [servers.attributes] username = "uid" surname = "sn" member_of = "cn" email = "mail" [[servers.group_mappings]] group_dn = "{{.Values.endpoints.ldap.auth.admin.bind_dn }}" org_role = "Admin" [[servers.group_mappings]] group_dn = "*" org_role = "Viewer" provisioning: dashboards: apiVersion: 1 providers: - name: 'osh-infra-dashboards' orgId: 1 folder: '' type: file disableDeletion: false editable: false options: path: /var/lib/grafana/dashboards datasources: #NOTE(srwilkers): The top key for each datasource (eg: monitoring) must # map to the key name for the datasource's endpoint entry in the endpoints # tree monitoring: name: prometheus type: prometheus access: proxy orgId: 1 editable: true basicAuth: true grafana: auth.ldap: enabled: true config_file: /etc/grafana/ldap.toml paths: data: /var/lib/grafana/data plugins: /var/lib/grafana/plugins provisioning: /var/lib/grafana/provisioning server: protocol: http http_port: 3000 database: type: mysql session: provider: mysql provider_config: null cookie_name: grafana_sess cookie_secure: false session_life_time: 86400 security: admin_user: ${GF_SECURITY_ADMIN_USER} admin_password: ${GF_SECURITY_ADMIN_PASSWORD} cookie_username: grafana_user cookie_remember_name: grafana_remember login_remember_days: 7 users: allow_sign_up: false allow_org_create: false auto_assign_org: true default_theme: dark log: mode: console level: info grafana_net: url: https://grafana.net dashboards: prometheus: __inputs: - name: prometheus label: Prometheus description: Prometheus which you want to monitor type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.6.0 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' - type: panel id: text name: Text version: '' annotations: list: - builtIn: 1 datasource: "-- Grafana --" enable: true hide: true iconColor: rgba(0, 211, 255, 1) name: Annotations & Alerts type: dashboard - datasource: "$datasource" enable: true expr: count(sum(up{instance="$instance"}) by (instance) < 1) hide: false iconColor: rgb(250, 44, 18) limit: 100 name: downage showIn: 0 step: 30s tagKeys: instance textFormat: prometheus down titleFormat: Downage type: alert - datasource: "$datasource" enable: true expr: sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance) hide: false iconColor: "#fceaca" limit: 100 name: Reload showIn: 0 step: 5m tagKeys: instance tags: [] titleFormat: Reload type: tags description: Dashboard for monitoring of Prometheus v2.x.x editable: true gnetId: 3681 graphTooltip: 1 hideControls: false id: links: - icon: info tags: [] targetBlank: true title: 'Dashboard''s Github ' tooltip: Github repo of this dashboard type: link url: https://github.com/FUSAKLA/Prometheus2-grafana-dashboard - icon: doc tags: [] targetBlank: true title: Prometheus Docs tooltip: '' type: link url: http://prometheus.io/docs/introduction/overview/ refresh: 5m rows: - collapse: false height: 161 panels: - cacheTimeout: colorBackground: false colorValue: false colors: - "#299c46" - rgba(237, 129, 40, 0.89) - "#bf1b00" datasource: prometheus decimals: 1 format: s gauge: maxValue: 1000000 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 41 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: time() - process_start_time_seconds{instance="$instance"} format: time_series instant: false intervalFactor: 2 refId: A thresholds: '' title: Uptime type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - "#299c46" - rgba(237, 129, 40, 0.89) - "#bf1b00" datasource: prometheus format: short gauge: maxValue: 1000000 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 42 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: prometheus_tsdb_head_series{instance="$instance"} format: time_series instant: false intervalFactor: 2 refId: A thresholds: '500000,800000,1000000' title: Total count of time series type: singlestat valueFontSize: 150% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - "#299c46" - rgba(237, 129, 40, 0.89) - "#d44a3a" datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 48 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: version targets: - expr: prometheus_build_info{instance="$instance"} format: table instant: true intervalFactor: 2 refId: A thresholds: '' title: Version type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - "#299c46" - rgba(237, 129, 40, 0.89) - "#d44a3a" datasource: prometheus decimals: 2 format: ms gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 49 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: prometheus_tsdb_head_max_time{instance="$instance"} - prometheus_tsdb_head_min_time{instance="$instance"} format: time_series instant: true intervalFactor: 2 refId: A thresholds: '' title: Actual head block length type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - content: height: '' id: 50 links: [] mode: html span: 1 title: '' transparent: true type: text - cacheTimeout: colorBackground: false colorValue: true colors: - "#e6522c" - rgba(237, 129, 40, 0.89) - "#299c46" datasource: prometheus decimals: 1 format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 52 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: '2' format: time_series intervalFactor: 2 refId: A thresholds: '10,20' title: '' transparent: true type: singlestat valueFontSize: 200% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: Header instance info titleSize: h6 - collapse: false height: '250' panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 15 legend: avg: true current: false max: false min: false show: false total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: true steppedLine: false targets: - expr: max(prometheus_engine_query_duration_seconds{instance="$instance"}) by (instance, slice) format: time_series intervalFactor: 1 legendFormat: max duration for {{slice}} metric: prometheus_local_storage_rushed_mode refId: A step: 900 thresholds: [] timeFrom: timeShift: title: Query elapsed time tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: '' logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 17 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_tsdb_head_series_created_total{instance="$instance"}[$aggregation_interval])) by (instance) format: time_series intervalFactor: 2 legendFormat: created on {{ instance }} metric: prometheus_local_storage_maintain_series_duration_seconds_count refId: A step: 1800 - expr: sum(increase(prometheus_tsdb_head_series_removed_total{instance="$instance"}[$aggregation_interval])) by (instance) * -1 format: time_series intervalFactor: 2 legendFormat: removed on {{ instance }} refId: B thresholds: [] timeFrom: timeShift: title: Head series created/deleted tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 13 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: exceeded_sample_limit on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: A step: 1800 - expr: sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: duplicate_timestamp on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: B step: 1800 - expr: sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: out_of_bounds on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: C step: 1800 - expr: sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: out_of_order on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: D step: 1800 - expr: sum(increase(prometheus_rule_evaluation_failures_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: rule_evaluation_failure on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: G step: 1800 - expr: sum(increase(prometheus_tsdb_compactions_failed_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: tsdb_compactions_failed on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: K step: 1800 - expr: sum(increase(prometheus_tsdb_reloads_failures_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: tsdb_reloads_failures on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: L step: 1800 - expr: sum(increase(prometheus_tsdb_head_series_not_found{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: head_series_not_found on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: N step: 1800 - expr: sum(increase(prometheus_evaluator_iterations_missed_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: evaluator_iterations_missed on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: O step: 1800 - expr: sum(increase(prometheus_evaluator_iterations_skipped_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: evaluator_iterations_skipped on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: P step: 1800 thresholds: [] timeFrom: timeShift: title: Prometheus errors tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Main info titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "$datasource" description: '' editable: true error: false fill: 1 grid: {} id: 25 legend: alignAsTable: true avg: true current: true max: true min: false show: false sort: max sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: prometheus_target_interval_length_seconds{instance="$instance",quantile="0.99"} - 60 format: time_series interval: 2m intervalFactor: 1 legendFormat: "{{instance}}" metric: '' refId: A step: 300 thresholds: [] timeFrom: timeShift: title: Scrape delay (counts with 1m scrape interval) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s logBase: 1 max: min: show: true - format: short logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 14 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: Queue length yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(prometheus_evaluator_duration_seconds{instance="$instance"}) by (instance, quantile) format: time_series intervalFactor: 2 legendFormat: Queue length metric: prometheus_local_storage_indexing_queue_length refId: B step: 1800 thresholds: [] timeFrom: timeShift: title: Rule evaulation duration tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: '0' show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Scrape & rule duration titleSize: h6 - collapse: false height: 250 panels: - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 18 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(increase(http_requests_total{instance="$instance"}[$aggregation_interval])) by (instance, handler) > 0 format: time_series intervalFactor: 2 legendFormat: "{{ handler }} on {{ instance }}" metric: '' refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Request count tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: none label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 16 legend: avg: false current: false hideEmpty: true hideZero: true max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: max(sum(http_request_duration_microseconds{instance="$instance"}) by (instance, handler, quantile)) by (instance, handler) > 0 format: time_series hide: false intervalFactor: 2 legendFormat: "{{ handler }} on {{ instance }}" refId: B thresholds: [] timeFrom: timeShift: title: Request duration per handler tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: µs label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 19 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(increase(http_request_size_bytes{instance="$instance", quantile="0.99"}[$aggregation_interval])) by (instance, handler) > 0 format: time_series hide: false intervalFactor: 2 legendFormat: "{{ handler }} in {{ instance }}" refId: B thresholds: [] timeFrom: timeShift: title: Request size by handler tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Allocated bytes: "#F9BA8F" Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max count collector: "#bf1b00" Max count harvester: "#bf1b00" Max to persist: "#3F6833" RSS: "#890F02" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 8 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/Max.*/" fill: 0 linewidth: 2 spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(prometheus_engine_queries{instance="$instance"}) by (instance, handler) format: time_series intervalFactor: 2 legendFormat: 'Current count ' metric: last refId: A step: 1800 - expr: sum(prometheus_engine_queries_concurrent_max{instance="$instance"}) by (instance, handler) format: time_series intervalFactor: 2 legendFormat: Max count metric: last refId: B step: 1800 thresholds: [] timeFrom: timeShift: title: Cont of concurent queries tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Requests & queries titleSize: h6 - collapse: false height: 250 panels: - aliasColors: Alert queue capacity on o collector: "#bf1b00" Alert queue capacity on o harvester: "#bf1b00" Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 20 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/.*capacity.*/" fill: 0 linewidth: 2 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(prometheus_notifications_queue_capacity{instance="$instance"})by (instance) format: time_series intervalFactor: 2 legendFormat: 'Alert queue capacity ' metric: prometheus_local_storage_checkpoint_last_size_bytes refId: A step: 1800 - expr: sum(prometheus_notifications_queue_length{instance="$instance"})by (instance) format: time_series intervalFactor: 2 legendFormat: 'Alert queue size on ' metric: prometheus_local_storage_checkpoint_last_size_bytes refId: B step: 1800 thresholds: [] timeFrom: timeShift: title: Alert queue size tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 21 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(prometheus_notifications_alertmanagers_discovered{instance="$instance"}) by (instance) format: time_series intervalFactor: 2 legendFormat: Checkpoint chunks written/s metric: prometheus_local_storage_checkpoint_series_chunks_written_sum refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Count of discovered alertmanagers tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: none label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 39 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_notifications_dropped_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: notifications_dropped on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: F step: 1800 - expr: sum(increase(prometheus_rule_evaluation_failures_total{rule_type="alerting",instance="$instance"}[$aggregation_interval])) by (rule_type,instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: rule_evaluation_failures on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Alerting errors tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Alerting titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 45 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: increase(prometheus_target_sync_length_seconds_count{scrape_job="kubernetes-service-endpoints"}[$aggregation_interval]) format: time_series intervalFactor: 2 legendFormat: Count of target synces refId: A step: 240 thresholds: [] timeFrom: timeShift: title: Kubernetes SD sync count tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 46 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: exceeded_sample_limit on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: A step: 1800 - expr: sum(increase(prometheus_sd_file_read_errors_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series interval: '' intervalFactor: 2 legendFormat: sd_file_read_error on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: E step: 1800 thresholds: [] timeFrom: timeShift: title: Service discovery errors tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Service discovery titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 36 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_tsdb_reloads_total{instance="$instance"}[30m])) by (instance) format: time_series intervalFactor: 2 legendFormat: "{{ instance }}" refId: A thresholds: [] timeFrom: timeShift: title: Reloaded block from disk tooltip: shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 5 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(prometheus_tsdb_blocks_loaded{instance="$instance"}) by (instance) format: time_series intervalFactor: 2 legendFormat: Loaded data blocks metric: prometheus_local_storage_memory_chunkdescs refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Loaded data blocks tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 3 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: prometheus_tsdb_head_series{instance="$instance"} format: time_series intervalFactor: 2 legendFormat: Time series count metric: prometheus_local_storage_memory_series refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Time series total count tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 1 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(rate(prometheus_tsdb_head_samples_appended_total{instance="$instance"}[$aggregation_interval])) by (instance) format: time_series intervalFactor: 2 legendFormat: samples/s {{instance}} metric: prometheus_local_storage_ingested_samples_total refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Samples Appended per second tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: '' logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: TSDB stats titleSize: h6 - collapse: false height: 250 panels: - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" To persist: "#9AC48A" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 2 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/Max.*/" fill: 0 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(prometheus_tsdb_head_chunks{instance="$instance"}) by (instance) format: time_series hide: false intervalFactor: 2 legendFormat: Head chunk count metric: prometheus_local_storage_memory_chunks refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Head chunks count tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 35 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: max(prometheus_tsdb_head_max_time{instance="$instance"}) by (instance) - min(prometheus_tsdb_head_min_time{instance="$instance"}) by (instance) format: time_series intervalFactor: 2 legendFormat: "{{ instance }}" refId: A thresholds: [] timeFrom: timeShift: title: Length of head block tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: ms label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 4 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(rate(prometheus_tsdb_head_chunks_created_total{instance="$instance"}[$aggregation_interval])) by (instance) format: time_series intervalFactor: 2 legendFormat: created on {{ instance }} refId: B - expr: sum(rate(prometheus_tsdb_head_chunks_removed_total{instance="$instance"}[$aggregation_interval])) by (instance) * -1 format: time_series intervalFactor: 2 legendFormat: deleted on {{ instance }} refId: C thresholds: [] timeFrom: timeShift: title: Head Chunks Created/Deleted per second tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Head block stats titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 33 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(increase(prometheus_tsdb_compaction_duration_sum{instance="$instance"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance="$instance"}[30m])) by (instance) format: time_series intervalFactor: 2 legendFormat: "{{ instance }}" refId: B thresholds: [] timeFrom: timeShift: title: Compaction duration tooltip: shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 34 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(prometheus_tsdb_head_gc_duration_seconds{instance="$instance"}) by (instance, quantile) format: time_series intervalFactor: 2 legendFormat: "{{ quantile }} on {{ instance }}" refId: A thresholds: [] timeFrom: timeShift: title: Go Garbage collection duration tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 37 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(prometheus_tsdb_wal_truncate_duration_seconds{instance="$instance"}) by (instance, quantile) format: time_series intervalFactor: 2 legendFormat: "{{ quantile }} on {{ instance }}" refId: A thresholds: [] timeFrom: timeShift: title: WAL truncate duration seconds tooltip: shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 38 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: sum(tsdb_wal_fsync_duration_seconds{instance="$instance"}) by (instance, quantile) format: time_series intervalFactor: 2 legendFormat: "{{ quantile }} {{ instance }}" refId: A thresholds: [] timeFrom: timeShift: title: WAL fsync duration seconds tooltip: shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Data maintenance titleSize: h6 - collapse: false height: 250 panels: - aliasColors: Allocated bytes: "#7EB26D" Allocated bytes - 1m max: "#BF1B00" Allocated bytes - 1m min: "#BF1B00" Allocated bytes - 5m max: "#BF1B00" Allocated bytes - 5m min: "#BF1B00" Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" RSS: "#447EBC" bars: false dashLength: 10 dashes: false datasource: "$datasource" decimals: editable: true error: false fill: 1 id: 6 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/-/" fill: 0 - alias: collector heap size color: "#E0752D" fill: 0 linewidth: 2 - alias: collector kubernetes memory limit color: "#BF1B00" fill: 0 linewidth: 3 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(process_resident_memory_bytes{instance="$instance"}) by (instance) format: time_series hide: false intervalFactor: 2 legendFormat: Total resident memory - {{instance}} metric: process_resident_memory_bytes refId: B step: 1800 - expr: sum(go_memstats_alloc_bytes{instance="$instance"}) by (instance) format: time_series hide: false intervalFactor: 2 legendFormat: Total llocated bytes - {{instance}} metric: go_memstats_alloc_bytes refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Memory tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Allocated bytes: "#F9BA8F" Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" RSS: "#890F02" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 7 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: rate(go_memstats_alloc_bytes_total{instance="$instance"}[$aggregation_interval]) format: time_series intervalFactor: 2 legendFormat: Allocated Bytes/s metric: go_memstats_alloc_bytes refId: A step: 1800 thresholds: [] timeFrom: timeShift: title: Allocations per second tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "$datasource" decimals: 2 editable: true error: false fill: 1 id: 9 legend: alignAsTable: false avg: false current: false hideEmpty: false max: false min: false rightSide: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(rate(process_cpu_seconds_total{instance="$instance"}[$aggregation_interval])) by (instance) format: time_series intervalFactor: 2 legendFormat: CPU/s metric: prometheus_local_storage_ingested_samples_total refId: B step: 1800 thresholds: [] timeFrom: timeShift: title: CPU per second tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: - avg yaxes: - format: none label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: RAM&CPU titleSize: h6 - collapse: false height: 250 panels: - aliasColors: Chunks: "#1F78C1" Chunks to persist: "#508642" Max chunks: "#052B51" Max to persist: "#3F6833" bars: false dashLength: 10 dashes: false datasource: "$datasource" editable: true error: false fill: 1 id: 47 legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 12 stack: false steppedLine: false targets: - expr: sum(increase(net_conntrack_dialer_conn_failed_total{instance="$instance"}[$aggregation_interval])) by (instance) > 0 format: time_series hide: false interval: '' intervalFactor: 2 legendFormat: conntrack_dialer_conn_failed on {{ instance }} metric: prometheus_local_storage_chunk_ops_total refId: M step: 1800 thresholds: [] timeFrom: timeShift: title: Net errors tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Contrac errors titleSize: h6 schemaVersion: 14 style: dark tags: - prometheus templating: list: - auto: true auto_count: 30 auto_min: 2m current: text: auto value: "$__auto_interval" hide: 0 label: aggregation intarval name: aggregation_interval options: - selected: true text: auto value: "$__auto_interval" - selected: false text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 2 type: interval - allValue: current: {} datasource: "$datasource" hide: 0 includeAll: false label: Instance multi: false name: instance options: [] query: label_values(prometheus_build_info, instance) refresh: 2 regex: '' sort: 2 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false - current: text: Prometheus value: Prometheus hide: 0 label: Prometheus datasource name: datasource options: [] query: prometheus refresh: 1 regex: '' type: datasource - current: text: influxdb(heapster) - kokura value: influxdb(heapster) - kokura hide: 0 label: InfluxDB datasource name: influx_datasource options: [] query: influxdb refresh: 1 regex: '' type: datasource time: from: now-7d to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Prometheus2.0 (v1.0.0 by FUSAKLA) version: 8 ceph_cluster: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: singlestat name: Singlestat version: '' - type: panel id: graph name: Graph version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - Cluster tags: - ceph - cluster style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 150px panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 21 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_health_status{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' title: Status transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' - op: "=" text: WARNING value: '0' - op: "=" text: HEALTHY value: '1' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 14 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_mon_quorum_count{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '2,3' title: Monitors In Quorum transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 22 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: Pools transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 33 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: 0.025,0.1 title: Cluster Capacity transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 34 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: 0.025,0.1 title: Used Capacity transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 23 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '70,80' title: Current Utilization transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 100px panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 26 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: OSDs IN type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 40, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 27 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '1,1' title: OSDs OUT type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 28 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: OSDs UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 40, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 29 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '1,1' title: OSDs DOWN type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 30 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '250,300' title: Average PGs per OSD type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 250px panels: - aliasColors: Available: "#EAB839" Total Capacity: "#447EBC" Used: "#BF1B00" total_avail: "#6ED0E0" total_space: "#7EB26D" total_used: "#890F02" bars: false datasource: prometheus editable: true error: false fill: 4 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '300' id: 1 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 0 links: [] minSpan: nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: Total Capacity fill: 0 linewidth: 3 stack: false span: 4 stack: true steppedLine: false targets: - expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Available refId: A step: 60 - expr: ceph_cluster_total_used_bytes interval: "$interval" intervalFactor: 1 legendFormat: Used refId: B step: 60 - expr: ceph_cluster_total_bytes interval: "$interval" intervalFactor: 1 legendFormat: Total Capacity refId: C step: 60 timeFrom: timeShift: title: Capacity tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Total Capacity: "#7EB26D" Used: "#BF1B00" total_avail: "#6ED0E0" total_space: "#7EB26D" total_used: "#890F02" bars: false datasource: prometheus decimals: 0 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false height: '300' id: 3 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] minSpan: nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: true steppedLine: false targets: - expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Write refId: A step: 60 - expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Read refId: B step: 60 timeFrom: timeShift: title: IOPS tooltip: msResolution: true shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: none label: '' logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '300' id: 7 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: true steppedLine: false targets: - expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Write refId: A step: 60 - expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Read refId: B step: 60 timeFrom: timeShift: title: Throughput tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true repeat: showTitle: true title: CLUSTER - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 18 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 12 stack: true steppedLine: false targets: - expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Total refId: A step: 60 timeFrom: timeShift: title: Objects in the Cluster tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 19 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 6 stack: true steppedLine: false targets: - expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Total refId: A step: 60 - expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Active refId: B step: 60 - expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Inconsistent refId: C step: 60 - expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Creating refId: D step: 60 - expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Recovering refId: E step: 60 - expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Down refId: F step: 60 timeFrom: timeShift: title: PGs tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 20 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 6 stack: true steppedLine: false targets: - expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Degraded refId: A step: 60 - expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Stale refId: B step: 60 - expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Undersized refId: C step: 60 timeFrom: timeShift: title: Stuck PGs tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true title: New row time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - current: {} hide: 0 label: Cluster name: ceph_cluster options: [] type: query query: label_values(ceph_health_status, release_group) refresh: 1 sort: 1 datasource: prometheus - auto: true auto_count: 10 auto_min: 1m current: tags: [] text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval annotations: list: [] refresh: 5m schemaVersion: 12 version: 26 links: [] gnetId: 917 description: "Ceph Cluster overview.\r\n" ceph_osd: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: singlestat name: Singlestat version: '' - type: panel id: graph name: Graph version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - OSD tags: - ceph - osd style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 100px panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 40, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' - from: '0' text: DOWN to: '0.99' - from: '0.99' text: UP to: '1' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_osd_up{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Status transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 40, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 8 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' - from: '0' text: OUT to: '0.99' - from: '0.99' text: IN to: '1' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_osd_in{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Available transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 10 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Total OSDs transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: 250 threshold1Color: rgba(216, 200, 27, 0.27) threshold2: 300 threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: true id: 5 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Average.*/" fill: 0 stack: false span: 10 stack: true steppedLine: false targets: - expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Number of PGs - {{ osd.$osd }} refId: A step: 60 - expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Average Number of PGs in the Cluster refId: B step: 60 timeFrom: timeShift: title: PGs tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: true targets: - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"})*100 interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '60,80' timeFrom: title: Utilization transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: true title: 'OSD: $osd' - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 2 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: true steppedLine: false targets: - expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Used - {{ osd.$osd }} metric: ceph_osd_used_bytes refId: A step: 60 - expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} hide: false interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ osd.$osd }} metric: ceph_osd_avail_bytes refId: B step: 60 timeFrom: timeShift: title: OSD Storage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus decimals: 5 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 9 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: false linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 2 points: true renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ osd.$osd }} metric: ceph_osd_avail_bytes refId: A step: 60 timeFrom: timeShift: title: Utilization Variance tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: logBase: 1 max: min: show: true - format: none label: logBase: 1 max: min: show: true time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - current: {} hide: 0 label: Cluster name: ceph_cluster options: [] type: query query: label_values(ceph_health_status, release_group) refresh: 1 sort: 1 datasource: prometheus - auto: true auto_count: 10 auto_min: 1m current: selected: true text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval - current: {} datasource: prometheus hide: 0 includeAll: false label: OSD multi: false name: osd options: [] query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, id) refresh: 1 regex: '' type: query annotations: list: [] refresh: 15m schemaVersion: 12 version: 18 links: [] gnetId: 923 description: CEPH OSD Status. ceph_pool: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: graph name: Graph version: '' - type: panel id: singlestat name: Singlestat version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - Pools tags: - ceph - pools style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 4 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 2 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true rightSide: true show: true total: false values: true lines: true linewidth: 0 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" fill: 0 linewidth: 4 stack: false - alias: "/^Raw.*$/" color: "#BF1B00" fill: 0 linewidth: 4 span: 10 stack: true steppedLine: false targets: - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Total - {{ $pool }} refId: A step: 60 - expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Used - {{ $pool }} refId: B step: 60 - expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ $pool }} refId: C step: 60 - expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Raw - {{ $pool }} refId: D step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool Storage" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: 2 editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 10 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}) interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '' title: "[[pool_name]] Pool Usage" type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: true title: 'Pool: $pool' - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 7 isNew: true legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Objects - {{ $pool_name }} refId: A step: 60 - expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} interval: "$interval" intervalFactor: 1 legendFormat: Dirty Objects - {{ $pool_name }} refId: B step: 60 timeFrom: timeShift: title: Objects in Pool [[pool_name]] tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false id: 4 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: true steppedLine: false targets: - expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Read - {{ $pool_name }} refId: B step: 60 - expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Write - {{ $pool_name }} refId: A step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool IOPS" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: none label: IOPS logBase: 1 max: min: 0 show: true - format: short label: IOPS logBase: 1 max: min: 0 show: false - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 5 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: true steppedLine: false targets: - expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Read Bytes - {{ $pool_name }} refId: A step: 60 - expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Written Bytes - {{ $pool_name }} refId: B step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool Throughput" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: 0 show: true - format: Bps label: logBase: 1 max: min: 0 show: true title: New row time: from: now-3h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - current: {} hide: 0 label: Cluster name: ceph_cluster options: [] type: query query: label_values(ceph_health_status, release_group) refresh: 1 sort: 1 datasource: prometheus - auto: true auto_count: 10 auto_min: 1m current: selected: true text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval - current: {} datasource: prometheus hide: 0 includeAll: false label: Pool multi: false name: pool options: [] query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id) refresh: 1 regex: '' type: query - current: {} datasource: prometheus hide: 0 includeAll: false label: Pool multi: false name: pool_name options: [] query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name) refresh: 1 regex: '' type: query annotations: list: [] refresh: 5m schemaVersion: 12 version: 22 links: [] gnetId: 926 description: Ceph Pools dashboard. elasticsearch: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.6.3 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: - builtIn: 1 datasource: "-- Grafana --" enable: true hide: true iconColor: rgba(0, 211, 255, 1) name: Annotations & Alerts type: dashboard editable: true gnetId: 4358 graphTooltip: 1 hideControls: false id: links: [] refresh: 5m rows: - collapse: false height: panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(178, 49, 13, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 8 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 5 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: sum(elasticsearch_cluster_health_status{cluster=~"$cluster"}) format: time_series intervalFactor: 2 legendFormat: '' metric: '' refId: A step: 40 thresholds: '0,1' title: Cluster health status transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: GREEN value: '1' - op: "=" text: RED value: '0' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 10 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"}) format: time_series interval: '' intervalFactor: 2 legendFormat: '' metric: '' refId: A step: 40 thresholds: '' title: Nodes transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 9 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: elasticsearch_cluster_health_number_of_data_nodes{cluster="$cluster"} format: time_series interval: '' intervalFactor: 2 legendFormat: '' metric: '' refId: A step: 40 thresholds: '' title: Data nodes transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' hideTimeOverride: true id: 16 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_number_of_pending_tasks{cluster="$cluster"} format: time_series interval: '' intervalFactor: 2 legendFormat: '' metric: '' refId: A step: 40 thresholds: '' title: Pending tasks transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Cluster titleSize: h6 - collapse: false height: '' panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 11 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 minSpan: 2 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' repeat: shard_type span: 2.4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_active_primary_shards{cluster="$cluster"} intervalFactor: 2 legendFormat: '' refId: A step: 40 thresholds: '' title: active primary shards type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 39 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 minSpan: 2 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2.4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_active_shards{cluster="$cluster"} intervalFactor: 2 legendFormat: '' refId: A step: 40 thresholds: '' title: active shards type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 40 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 minSpan: 2 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2.4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_initializing_shards{cluster="$cluster"} intervalFactor: 2 legendFormat: '' refId: A step: 40 thresholds: '' title: initializing shards type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 41 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 minSpan: 2 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2.4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_relocating_shards{cluster="$cluster"} intervalFactor: 2 legendFormat: '' refId: A step: 40 thresholds: '' title: relocating shards type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: '50' id: 42 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 minSpan: 2 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2.4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - expr: elasticsearch_cluster_health_delayed_unassigned_shards{cluster="$cluster"} intervalFactor: 2 legendFormat: '' refId: A step: 40 thresholds: '' title: unassigned shards type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Shards titleSize: h6 - collapse: false height: panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 30 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_master_node="true",name=~"$node"} format: time_series instant: false interval: '' intervalFactor: 2 legendFormat: "{{ name }} - master" metric: '' refId: A step: 10 - expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_data_node="true",name=~"$node"} format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }} - data" metric: '' refId: B step: 10 thresholds: [] timeFrom: timeShift: title: CPU usage tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percent label: CPU usage logBase: 1 max: 100 min: 0 show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 0 grid: {} height: '400' id: 31 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: elasticsearch_jvm_memory_used_bytes{cluster="$cluster",name=~"$node",name=~"$node"} format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }} - used: {{area}}" metric: '' refId: A step: 10 - expr: elasticsearch_jvm_memory_committed_bytes{cluster="$cluster",name=~"$node",name=~"$node"} format: time_series intervalFactor: 2 legendFormat: "{{ name }} - committed: {{area}}" refId: B step: 10 - expr: elasticsearch_jvm_memory_max_bytes{cluster="$cluster",name=~"$node",name=~"$node"} format: time_series intervalFactor: 2 legendFormat: "{{ name }} - max: {{area}}" refId: C step: 10 thresholds: [] timeFrom: timeShift: title: JVM memory usage tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: Memory logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 32 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: 1-(elasticsearch_filesystem_data_available_bytes{cluster="$cluster"}/elasticsearch_filesystem_data_size_bytes{cluster="$cluster",name=~"$node"}) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }} - {{path}}" metric: '' refId: A step: 10 thresholds: - colorMode: custom fill: true fillColor: rgba(216, 200, 27, 0.27) op: gt value: 0.8 - colorMode: custom fill: true fillColor: rgba(234, 112, 112, 0.22) op: gt value: 0.9 timeFrom: timeShift: title: Disk usage tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percentunit label: Disk Usage % logBase: 1 max: 1 min: 0 show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 47 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true sort: max sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: sent transform: negative-Y spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: irate(elasticsearch_transport_tx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} -sent" refId: D step: 10 - expr: irate(elasticsearch_transport_rx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} -received" refId: C step: 10 thresholds: [] timeFrom: timeShift: title: Network usage tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: Bps label: Bytes/sec logBase: 1 max: min: show: true - format: pps label: '' logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: System titleSize: h6 - collapse: false height: '' panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 1 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: elasticsearch_indices_docs{cluster="$cluster",name=~"$node"} format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Documents count tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Documents logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 24 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Documents indexed rate tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: index calls/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 25 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: rate(elasticsearch_indices_docs_deleted{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Documents deleted rate tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Documents/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 26 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: rate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Documents merged rate tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Documents/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: Documents titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 48 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }} - indexing" metric: '' refId: A step: 4 - expr: irate(elasticsearch_indices_search_query_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - query" refId: B step: 4 - expr: irate(elasticsearch_indices_search_fetch_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - fetch" refId: C step: 4 - expr: irate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - merges" refId: D step: 4 - expr: irate(elasticsearch_indices_refresh_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - refresh" refId: E step: 4 - expr: irate(elasticsearch_indices_flush_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - flush" refId: F step: 4 thresholds: [] timeFrom: timeShift: title: Total Operations rate tooltip: msResolution: false shared: true sort: 2 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Operations/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 49 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ name }} - indexing" metric: '' refId: A step: 4 - expr: irate(elasticsearch_indices_search_query_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - query" refId: B step: 4 - expr: irate(elasticsearch_indices_search_fetch_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - fetch" refId: C step: 4 - expr: irate(elasticsearch_indices_merges_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - merges" refId: D step: 4 - expr: irate(elasticsearch_indices_refresh_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - refresh" refId: E step: 4 - expr: irate(elasticsearch_indices_flush_time_ms_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{ name }} - flush" refId: F step: 4 thresholds: [] timeFrom: timeShift: title: Total Operations time tooltip: msResolution: false shared: true sort: 2 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: ms label: Time logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: Total Operations stats titleSize: h6 - collapse: false height: '' panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 33 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: 'rate(elasticsearch_indices_search_query_time_seconds{cluster="$cluster",name=~"$node"}[$interval]) ' format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 4 thresholds: [] timeFrom: timeShift: title: Query time tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: ms label: Time logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 5 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 4 thresholds: [] timeFrom: timeShift: title: Indexing time tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: ms label: Time logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 3 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: rate(elasticsearch_indices_merges_total_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 4 thresholds: [] timeFrom: timeShift: title: Merging time tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: Time logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: Times titleSize: h6 - collapse: false height: panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 4 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: elasticsearch_indices_fielddata_memory_size_bytes{cluster="$cluster",name=~"$node"} format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Field data memory size tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: Memory logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 34 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: rate(elasticsearch_indices_fielddata_evictions{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Field data evictions tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Evictions/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 35 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: elasticsearch_indices_query_cache_memory_size_bytes{cluster="$cluster",name=~"$node"} format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Query cache size tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: Size logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 36 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: true steppedLine: false targets: - expr: rate(elasticsearch_indices_query_cache_evictions{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}}" metric: '' refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Query cache evictions tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: Evictions/s logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: Caches titleSize: h6 - collapse: false height: 728 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 id: 45 legend: alignAsTable: true avg: true current: false max: true min: true show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: ' irate(elasticsearch_thread_pool_rejected_count{cluster="$cluster",name=~"$node"}[$interval])' format: time_series intervalFactor: 2 legendFormat: "{{name}} - {{ type }}" refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Thread Pool operations rejected tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 id: 46 legend: alignAsTable: true avg: true current: false max: true min: true show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} format: time_series intervalFactor: 2 legendFormat: "{{name}} - {{ type }}" refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Thread Pool operations queued tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 height: '' id: 43 legend: alignAsTable: true avg: true current: false max: true min: true show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"} format: time_series intervalFactor: 2 legendFormat: "{{name}} - {{ type }}" refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Thread Pool threads active tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 id: 44 legend: alignAsTable: true avg: true current: false max: true min: true show: true sort: avg sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: irate(elasticsearch_thread_pool_completed_count{cluster="$cluster",name=~"$node"}[$interval]) format: time_series intervalFactor: 2 legendFormat: "{{name}} - {{ type }}" refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Thread Pool operations completed tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Thread Pool titleSize: h6 - collapse: false height: panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 7 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: true steppedLine: false targets: - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}} - {{gc}}" metric: '' refId: A step: 4 thresholds: [] timeFrom: timeShift: title: GC count tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: GCs logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} height: '400' id: 27 legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: true min: true rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval]) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{name}} - {{gc}}" metric: '' refId: A step: 4 thresholds: [] timeFrom: timeShift: title: GC time tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s label: Time logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: repeatIteration: repeatRowId: showTitle: true title: JVM Garbage Collection titleSize: h6 schemaVersion: 14 style: dark tags: - elasticsearch - App templating: list: - auto: true auto_count: 30 auto_min: 10s current: text: auto value: "$__auto_interval" hide: 0 label: Interval name: interval options: - selected: true text: auto value: "$__auto_interval" - selected: false text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 2 type: interval - allValue: current: {} datasource: "prometheus" hide: 0 includeAll: false label: Instance multi: false name: cluster options: [] query: label_values(elasticsearch_cluster_health_status,cluster) refresh: 1 regex: '' sort: 1 tagValuesQuery: tags: [] tagsQuery: type: query useTags: false - allValue: current: {} datasource: "prometheus" hide: 0 includeAll: true label: node multi: true name: node options: [] query: label_values(elasticsearch_process_cpu_percent,name) refresh: 1 regex: '' sort: 1 tagValuesQuery: tags: [] tagsQuery: type: query useTags: false time: from: now-12h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Elasticsearch version: 1 description: Elasticsearch detailed dashboard hosts_containers: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: graph name: Graph version: '' - type: panel id: singlestat name: Singlestat version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.3.0 id: title: Container Metrics (cAdvisor) description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only. tags: - kubernetes style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 200px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false height: 200px id: 32 isNew: true legend: alignAsTable: false avg: true current: true max: false min: false rightSide: false show: false sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m])) interval: 10s intervalFactor: 1 legendFormat: Received metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))' interval: 10s intervalFactor: 1 legendFormat: Sent metric: network refId: B step: 10 timeFrom: timeShift: title: Network I/O pressure tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: Bps label: logBase: 1 max: min: show: false title: Network I/O pressure - collapse: false editable: true height: 250px panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 4 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: 65, 90 title: Cluster memory usage transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 6 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: 65, 90 title: Cluster CPU usage (5m avg) type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 7 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) / sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 legendFormat: '' metric: '' refId: A step: 10 thresholds: 65, 90 title: Cluster filesystem usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 9 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 20% prefix: '' prefixFontSize: 20% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 10 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 11 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: " cores" postfixFontSize: 30% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 12 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: " cores" postfixFontSize: 30% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 13 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 14 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: false title: Total usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 17 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "{{ pod_name }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: Pods CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative transparent: false type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false showTitle: false title: Pods CPU usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 23 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (systemd_service_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "{{ systemd_service_name }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: System services CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: System services CPU usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 24 isNew: true legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: false min: false rightSide: true show: true sideWidth: sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name) hide: false interval: 10s intervalFactor: 1 legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' metric: container_cpu refId: A step: 10 - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image) hide: false interval: 10s intervalFactor: 1 legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' metric: container_cpu refId: B step: 10 - expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name) interval: 10s intervalFactor: 1 legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' metric: container_cpu refId: C step: 10 timeFrom: timeShift: title: Containers CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers CPU usage - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 20 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id) hide: false interval: 10s intervalFactor: 1 legendFormat: "{{ id }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: All processes CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: showTitle: false title: All processes CPU usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 25 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "{{ pod_name }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: Pods memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Pods memory usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 26 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}) by (systemd_service_name) interval: 10s intervalFactor: 1 legendFormat: "{{ systemd_service_name }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: System services memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: System services memory usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 27 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}) by (container_name, pod_name) interval: 10s intervalFactor: 1 legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' metric: container_memory_usage:sort_desc refId: A step: 10 - expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) by (kubernetes_io_hostname, name, image) interval: 10s intervalFactor: 1 legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' metric: container_memory_usage:sort_desc refId: B step: 10 - expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}) by (kubernetes_io_hostname, rkt_container_name) interval: 10s intervalFactor: 1 legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' metric: container_memory_usage:sort_desc refId: C step: 10 timeFrom: timeShift: title: Containers memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers memory usage - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 28 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"}) by (id) interval: 10s intervalFactor: 1 legendFormat: "{{ id }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: All processes memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: All processes memory usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 16 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "-> {{ pod_name }}" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name)' interval: 10s intervalFactor: 1 legendFormat: "<- {{ pod_name }}" metric: network refId: B step: 10 timeFrom: timeShift: title: Pods network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Pods network I/O - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 30 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}" metric: network refId: B step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}" metric: network refId: D step: 10 - expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})" metric: network refId: C step: 10 - expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}" metric: network refId: E step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}" metric: network refId: F step: 10 timeFrom: timeShift: title: Containers network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers network I/O - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 29 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id) interval: 10s intervalFactor: 1 legendFormat: "-> {{ id }}" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id)' interval: 10s intervalFactor: 1 legendFormat: "<- {{ id }}" metric: network refId: B step: 10 timeFrom: timeShift: title: All processes network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: All processes network I/O time: from: now-5m to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - allValue: ".*" current: {} datasource: prometheus hide: 0 includeAll: true multi: false name: Node options: [] query: label_values(kubernetes_io_hostname) refresh: 1 type: query annotations: list: [] refresh: 5m schemaVersion: 12 version: 13 links: [] gnetId: 315 rabbitmq: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.2.0 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: 2121 graphTooltip: 0 hideControls: false id: links: [] refresh: 5m rows: - collapse: false height: 266 panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 13 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} refId: A step: 2 thresholds: Up,Down timeFrom: 30s title: RabbitMQ Server type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' - op: "=" text: Down value: '0' - op: "=" text: Up value: '1' valueName: current - alert: conditions: - evaluator: params: - 1 type: lt operator: type: and query: params: - A - 10s - now reducer: params: [] type: last type: query - evaluator: params: [] type: no_value operator: type: and query: params: - A - 10s - now reducer: params: [] type: last type: query executionErrorState: alerting frequency: 60s handler: 1 message: Some of the RabbitMQ node is down name: Node Stats alert noDataState: no_data notifications: [] aliasColors: {} bars: true datasource: prometheus decimals: 0 fill: 1 id: 12 legend: alignAsTable: true avg: false current: true max: false min: false show: true total: false values: true lines: false linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 9 stack: false steppedLine: false targets: - expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}" metric: rabbitmq_running refId: A step: 2 thresholds: - colorMode: critical fill: true line: true op: lt value: 1 timeFrom: 30s timeShift: title: Node up Stats tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 6 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:exchanges" metric: rabbitmq_exchangesTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Exchanges tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 4 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:channels" metric: rabbitmq_channelsTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Channels tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 3 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:consumers" metric: rabbitmq_consumersTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Consumers tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 5 legend: avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:connections" metric: rabbitmq_connectionsTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Connections tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 7 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:queues" metric: rabbitmq_queuesTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Queues tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 8 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:ready" metric: rabbitmq_queue_messages_ready refId: A step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:published" metric: rabbitmq_queue_messages_published_total refId: B step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:delivered" metric: rabbitmq_queue_messages_delivered_total refId: C step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:unack" metric: ack refId: D step: 2 thresholds: [] timeFrom: timeShift: title: Messages/host tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 2 legend: alignAsTable: true avg: false current: true max: false min: false rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{queue}}:{{durable}}" metric: rabbitmq_queue_messages refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Messages / Queue tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 9 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: rabbitmq_node_mem_used refId: A step: 2 - expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:limit" metric: node_mem refId: B step: 2 thresholds: [] timeFrom: timeShift: title: Memory tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: decbytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 10 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: '' refId: A step: 2 - expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:total" metric: node_mem refId: B step: 2 thresholds: [] timeFrom: timeShift: title: FIle descriptors tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 11 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: '' refId: A step: 2 - expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:total" metric: '' refId: B step: 2 thresholds: [] timeFrom: timeShift: title: Sockets tooltip: shared: true sort: 0 value_type: individual transparent: false type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: - current: tags: [] text: Prometheus value: Prometheus hide: 0 label: name: datasource options: [] query: prometheus refresh: 1 regex: '' type: datasource - current: {} hide: 0 label: null name: rabbit options: [] type: query query: label_values(rabbitmq_up, release_group) refresh: 1 sort: 1 datasource: prometheus time: from: now-5m to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: RabbitMQ Metrics version: 17 description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections, Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.' kubernetes_capacity_planning: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] description: '' editable: true gnetId: 22 graphTooltip: 0 hideControls: false id: links: [] refresh: false rows: - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100 hide: false intervalFactor: 10 legendFormat: '' refId: A step: 50 thresholds: [] timeFrom: timeShift: title: Idle cpu tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percent label: cpu usage logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 9 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(node_load1) intervalFactor: 4 legendFormat: load 1m refId: A step: 20 target: '' - expr: sum(node_load5) intervalFactor: 4 legendFormat: load 5m refId: B step: 20 target: '' - expr: sum(node_load15) intervalFactor: 4 legendFormat: load 15m refId: C step: 20 target: '' thresholds: [] timeFrom: timeShift: title: System load tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percentunit label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 4 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} yaxis: 2 spaceLength: 10 span: 9 stack: true steppedLine: false targets: - expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached) intervalFactor: 2 legendFormat: memory usage metric: memo refId: A step: 10 target: '' - expr: sum(node_memory_Buffers) interval: '' intervalFactor: 2 legendFormat: memory buffers metric: memo refId: B step: 10 target: '' - expr: sum(node_memory_Cached) interval: '' intervalFactor: 2 legendFormat: memory cached metric: memo refId: C step: 10 target: '' - expr: sum(node_memory_MemFree) interval: '' intervalFactor: 2 legendFormat: memory free metric: memo refId: D step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Memory usage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" intervalFactor: 2 metric: '' refId: A step: 60 target: '' thresholds: 80, 90 title: Memory usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 246 panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: read yaxis: 1 - alias: '{instance="172.17.0.1:9100"}' yaxis: 2 - alias: io time yaxis: 2 spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum(rate(node_disk_bytes_read[5m])) hide: false intervalFactor: 4 legendFormat: read refId: A step: 20 target: '' - expr: sum(rate(node_disk_bytes_written[5m])) intervalFactor: 4 legendFormat: written refId: B step: 20 - expr: sum(rate(node_disk_io_time_ms[5m])) intervalFactor: 4 legendFormat: io time refId: C step: 20 thresholds: [] timeFrom: timeShift: title: Disk I/O tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: ms label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 12 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) / sum(node_filesystem_size{device!="rootfs"}) intervalFactor: 2 refId: A step: 60 target: '' thresholds: 0.75, 0.9 title: Disk space usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 8 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m])) hide: false intervalFactor: 2 legendFormat: '' refId: A step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network received tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 10 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m])) hide: false intervalFactor: 2 legendFormat: '' refId: B step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network transmitted tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 276 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 11 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum(kube_pod_info) format: time_series intervalFactor: 2 legendFormat: Current number of Pods refId: A step: 10 - expr: sum(kube_node_status_capacity_pods) format: time_series intervalFactor: 2 legendFormat: Maximum capacity of pods refId: B step: 10 thresholds: [] timeFrom: timeShift: title: Cluster Pod Utilization tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100 format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 60 target: '' thresholds: '80,90' title: Pod Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Kubernetes Capacity Planning version: 4 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true kubernetes_cluster_status: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: graphTooltip: 0 hideControls: false id: links: [] rows: - collapse: false height: 129 panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 6 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Control Plane UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: UP value: 'null' valueName: total - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 6 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '3,5' title: Alerts Firing type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Cluster Health titleSize: h6 - collapse: false height: 168 panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 1 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: API Servers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 2 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: Controller Managers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 3 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: Schedulers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true hideTimeOverride: false id: 4 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h]) > 5) format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Crashlooping Control Plane Pods type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Control Plane Status titleSize: h6 - collapse: false height: 158 panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 8 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m])) * 100)) / count(node_cpu{job="node-exporter",mode="idle"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: CPU Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Memory Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 9 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) / sum(node_filesystem_size{device!="rootfs"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Filesystem Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 10 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100 format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Pod Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: true title: Capacity Planing titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-6h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: '' title: Kubernetes Cluster Status version: 3 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true nodes: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] description: Dashboard to get an overview of one server editable: true gnetId: 22 graphTooltip: 0 hideControls: false id: links: [] refresh: false rows: - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m])) * 100) hide: false intervalFactor: 10 legendFormat: "{{cpu}}" refId: A step: 50 thresholds: [] timeFrom: timeShift: title: Idle cpu tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percent label: cpu usage logBase: 1 max: 100 min: 0 show: true - format: short label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 9 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: node_load1{instance="$server"} intervalFactor: 4 legendFormat: load 1m refId: A step: 20 target: '' - expr: node_load5{instance="$server"} intervalFactor: 4 legendFormat: load 5m refId: B step: 20 target: '' - expr: node_load15{instance="$server"} intervalFactor: 4 legendFormat: load 15m refId: C step: 20 target: '' thresholds: [] timeFrom: timeShift: title: System load tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percentunit label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 4 legend: alignAsTable: false avg: false current: false hideEmpty: false hideZero: false max: false min: false rightSide: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} yaxis: 2 spaceLength: 10 span: 9 stack: true steppedLine: false targets: - expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"} hide: false interval: '' intervalFactor: 2 legendFormat: memory used metric: '' refId: C step: 10 - expr: node_memory_Buffers{instance="$server"} interval: '' intervalFactor: 2 legendFormat: memory buffers metric: '' refId: E step: 10 - expr: node_memory_Cached{instance="$server"} intervalFactor: 2 legendFormat: memory cached metric: '' refId: F step: 10 - expr: node_memory_MemFree{instance="$server"} intervalFactor: 2 legendFormat: memory free metric: '' refId: D step: 10 thresholds: [] timeFrom: timeShift: title: Memory usage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}) / node_memory_MemTotal{instance="$server"}) * 100 intervalFactor: 2 refId: A step: 60 target: '' thresholds: 80, 90 title: Memory usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: read yaxis: 1 - alias: '{instance="172.17.0.1:9100"}' yaxis: 2 - alias: io time yaxis: 2 spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m])) hide: false intervalFactor: 4 legendFormat: read refId: A step: 20 target: '' - expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m])) intervalFactor: 4 legendFormat: written refId: B step: 20 - expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m])) intervalFactor: 4 legendFormat: io time refId: C step: 20 thresholds: [] timeFrom: timeShift: title: Disk I/O tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: ms label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"})) / sum(node_filesystem_size{device!="rootfs",instance="$server"}) intervalFactor: 2 refId: A step: 60 target: '' thresholds: 0.75, 0.9 title: Disk space usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 8 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m]) hide: false intervalFactor: 2 legendFormat: "{{device}}" refId: A step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network received tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 10 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m]) hide: false intervalFactor: 2 legendFormat: "{{device}}" refId: B step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network transmitted tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: - allValue: current: {} datasource: prometheus hide: 0 includeAll: false label: multi: false name: server options: [] query: label_values(node_boot_time, instance) refresh: 1 regex: '' sort: 0 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Nodes version: 2 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true openstack_control_plane: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.5.2 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' - type: panel id: text name: Text version: '' annotations: list: [] editable: true gnetId: graphTooltip: 1 hideControls: false id: links: [] refresh: 5m rows: - collapse: false height: 250px panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 24 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=keystone title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_keystone_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Keystone type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 23 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=glance title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_glance_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Glance type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(202, 58, 40, 0.86) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 22 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=heat title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_heat_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Heat type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 21 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=neutron title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_neutron_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Neutron type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 20 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=nova title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_nova_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Nova type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 19 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=swift title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_swift_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Ceph type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 18 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=cinder title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_cinder_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Cinder type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 17 interval: "> 60s" links: - dashboard: Openstack Service name: Drilldown dashboard params: var-Service=placement title: Openstack Service type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_placement_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Placement type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 16 interval: "> 60s" links: - dashboard: RabbitMQ Metrics name: Drilldown dashboard title: RabbitMQ Metrics type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: min(rabbitmq_up) fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: RabbitMQ type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 15 interval: "> 60s" mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: min(mysql_global_status_wsrep_ready) fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: MariaDB type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(225, 177, 40, 0.59) - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 14 interval: "> 60s" links: - dashboard: Nginx Stats name: Drilldown dashboard title: Nginx Stats type: dashboard mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: '1' text: OK to: '99999999999999' - from: '0' text: CRIT to: '0' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: sum_over_time(nginx_connections_total{type="active", namespace="openstack"}[5m]) fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '0,1' title: Nginx type: singlestat valueFontSize: 50% valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 13 interval: "> 60s" links: - dashboard: Memcached name: Drilldown dashboard title: Memcached type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: min(memcached_up) fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Memcached type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: OpenStack Services titleSize: h6 - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 11 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 3 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + openstack_total_free_vcpus{job="openstack-metrics", region="$region"} format: time_series function: min groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} format: time_series function: max groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: VCPUs (total vs used) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 12 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 3 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + openstack_total_free_ram_MB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] interval: '' intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: RAM (total vs used) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: mbytes label: '' logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 13 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 3 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + openstack_total_free_disk_GB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: Disk (used vs total) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: gbytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Virtual resources titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: enable: true list: - allValue: current: {} datasource: prometheus hide: 0 includeAll: false label: multi: false name: region options: [] query: label_values(openstack_exporter_cache_refresh_duration_seconds, region) refresh: 1 regex: '' sort: 0 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-1h to: now timepicker: collapse: false enable: true notice: false now: true refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d status: Stable time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d type: timepicker timezone: browser title: OpenStack Metrics version: 2 nginx_stats: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.5.2 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 annotations: list: [] description: Show stats from the hnlq715/nginx-vts-exporter. editable: true gnetId: 2949 graphTooltip: 0 hideControls: false id: links: [] refresh: 5m rows: - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 7 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 12 stack: false steppedLine: false targets: - expr: sum(nginx_upstream_responses_total{upstream=~"^$Upstream$"}) by (status_code, upstream) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ status_code }}.{{ upstream }}" metric: nginx_upstream_response refId: A step: 4 thresholds: [] timeFrom: timeShift: title: HTTP Response Codes by Upstream tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_upstream_requests_total{upstream=~"^$Upstream$"}[5m])) by (upstream) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ upstream }}" metric: nginx_upstream_requests refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Upstream Requests rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 5 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_upstream_bytes_total{upstream=~"^$Upstream$"}[5m])) by (direction, upstream) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ direction }}.{{ upstream }}" metric: nginx_upstream_bytes refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Upstream Bytes Transfer rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 1 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_connections_total[5m])) by (type) format: time_series intervalFactor: 2 legendFormat: "{{ type }}" metric: nginx_server_connections refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Overall Connections rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 4 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_cache_total{ server_zone=~"$ingress"}[5m])) by (server_zone, type) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ type }}.{{ server_zone }}" metric: nginx_server_cache refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Cache Action rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_requests_total{ server_zone=~"$ingress" }[5m])) by (server_zone) format: time_series interval: '' intervalFactor: 2 legendFormat: "{{ server_zone }}" metric: nginx_server_requests refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Overall Requests rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 2 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(irate(nginx_bytes_total{ server_zone=~"$ingress" }[5m])) by (direction, server_zone) format: time_series intervalFactor: 2 legendFormat: "{{ direction }}.{{ server_zone }}" metric: nginx_server_bytes refId: A step: 10 thresholds: [] timeFrom: timeShift: title: Overall Bytes Transferred rate tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: - prometheus - nginx templating: list: - allValue: ".*" current: {} datasource: prometheus hide: 0 includeAll: false label: multi: true name: Upstream options: [] query: label_values(nginx_upstream_bytes_total, upstream) refresh: 1 regex: '' sort: 1 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false - allValue: current: {} datasource: prometheus hide: 0 includeAll: false label: multi: true name: ingress options: [] query: label_values(nginx_bytes_total, server_zone) refresh: 1 regex: "/^[^\\*_]+$/" sort: 1 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Nginx Stats version: 13 openstack-service: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.5.2 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: enable: true list: [] editable: true gnetId: graphTooltip: 1 hideControls: false id: links: [] refresh: 5m rows: - collapse: false height: 250px panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(225, 177, 40, 0.59) - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: "> 60s" links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' expr: openstack_check_[[Service]]_api{job="openstack-metrics"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '0,1' title: '' type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: CRITICAL value: '0' - op: "=" text: OK value: '1' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: "prometheus" editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 13 interval: "> 60s" links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: true tableColumn: '' targets: - column: value condition: '' expr: sum(nginx_responses_total{server_zone=~"[[Service]].*", status_code="5xx"}) fill: '' format: time_series function: count groupBy: - interval: auto params: - auto type: time - params: - '0' type: fill groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 tags: [] thresholds: '' title: HTTP 5xx errors type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 0 grid: {} id: 7 interval: ">60s" legend: alignAsTable: true avg: true current: false max: true min: true show: true sortDesc: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 8 stack: false steppedLine: false targets: - expr: sum(nginx_upstream_response_msecs_avg{upstream=~"openstack-[[Service]].*"}) by (upstream) format: time_series intervalFactor: 2 refId: A step: 120 thresholds: [] timeFrom: timeShift: title: HTTP response time tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 1 grid: {} id: 9 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: true targets: - alias: healthy column: value expr: openstack_check_[[Service]]_api format: time_series function: last groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series select: [] step: 120 tags: [] thresholds: [] timeFrom: timeShift: title: API Availability tooltip: msResolution: false shared: false sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: none label: '' logBase: 1 max: 1 min: 0 show: false - format: short logBase: 1 max: min: show: false - aliasColors: '{status_code="2xx"}': "#629E51" '{status_code="5xx"}': "#BF1B00" bars: true dashLength: 10 dashes: false datasource: "prometheus" editable: true error: false fill: 0 grid: {} id: 8 interval: "> 60s" legend: alignAsTable: false avg: false current: false hideEmpty: false max: false min: false rightSide: false show: true total: false values: false lines: false linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 8 stack: true steppedLine: false targets: - expr: sum(nginx_responses_total{server_zone=~"[[Service]].*"}) by (status_code) format: time_series intervalFactor: 2 refId: A step: 120 thresholds: [] timeFrom: timeShift: title: Number of HTTP responses tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Service Status titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: enable: true list: - allValue: current: tags: [] text: cinder value: cinder hide: 0 includeAll: false label: multi: false name: Service options: - selected: false text: nova value: nova - selected: false text: glance value: glance - selected: false text: keystone value: keystone - selected: true text: cinder value: cinder - selected: false text: heat value: heat - selected: false text: placement value: placement - selected: false text: neutron value: neutron query: nova,glance,keystone,cinder,heat,placement,neutron type: custom time: from: now-1h to: now timepicker: collapse: false enable: true notice: false now: true refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d status: Stable time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d type: timepicker timezone: browser title: Openstack Service version: 4 coredns: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.3 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 annotations: list: [] editable: true gnetId: 5926 graphTooltip: 0 hideControls: false id: links: [] rows: - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 1 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: total yaxis: 2 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) by (proto) format: time_series intervalFactor: 2 legendFormat: "{{proto}}" refId: A step: 60 - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) format: time_series intervalFactor: 2 legendFormat: total refId: B step: 60 thresholds: [] timeFrom: timeShift: title: Requests (total) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: pps logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 12 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: total yaxis: 2 - alias: other yaxis: 2 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(rate(coredns_dns_request_type_count_total{instance=~"$instance"}[5m])) by (type) intervalFactor: 2 legendFormat: "{{type}}" refId: A step: 60 thresholds: [] timeFrom: timeShift: title: Requests (by qtype) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: pps logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 2 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: total yaxis: 2 spaceLength: 10 span: 4 stack: false steppedLine: false targets: - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) by (zone) intervalFactor: 2 legendFormat: "{{zone}}" refId: A step: 60 - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) intervalFactor: 2 legendFormat: total refId: B step: 60 thresholds: [] timeFrom: timeShift: title: Requests (by zone) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: pps logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 10 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: total yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(coredns_dns_request_do_count_total{instance=~"$instance"}[5m])) intervalFactor: 2 legendFormat: DO refId: A step: 40 - expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m])) intervalFactor: 2 legendFormat: total refId: B step: 40 thresholds: [] timeFrom: timeShift: title: Requests (DO bit) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: pps logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 9 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: tcp:90 yaxis: 2 - alias: 'tcp:99 ' yaxis: 2 - alias: tcp:50 yaxis: 2 spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:99 " refId: A step: 60 - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:90" refId: B step: 60 - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:50" refId: C step: 60 thresholds: [] timeFrom: timeShift: title: Requests (size, udp) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 14 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: tcp:90 yaxis: 1 - alias: 'tcp:99 ' yaxis: 1 - alias: tcp:50 yaxis: 1 spaceLength: 10 span: 3 stack: false steppedLine: false targets: - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:99 " refId: A step: 60 - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:90" refId: B step: 60 - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le,proto)) intervalFactor: 2 legendFormat: "{{proto}}:50" refId: C step: 60 thresholds: [] timeFrom: timeShift: title: Requests (size,tcp) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Row titleSize: h6 - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 5 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(coredns_dns_response_rcode_count_total{instance=~"$instance"}[5m])) by (rcode) intervalFactor: 2 legendFormat: "{{rcode}}" refId: A step: 40 thresholds: [] timeFrom: timeShift: title: Responses (by rcode) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) by (le, job)) intervalFactor: 2 legendFormat: 99% refId: A step: 40 - expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) by (le)) intervalFactor: 2 legendFormat: 90% refId: B step: 40 - expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m])) by (le)) intervalFactor: 2 legendFormat: 50% refId: C step: 40 thresholds: [] timeFrom: timeShift: title: Responses (duration) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: s logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 8 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: udp:50% yaxis: 1 - alias: tcp:50% yaxis: 2 - alias: tcp:90% yaxis: 2 - alias: tcp:99% yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) by (le,proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:99%" refId: A step: 40 - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance="$instance",proto="udp"}[5m])) by (le,proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:90%" refId: B step: 40 - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m])) by (le,proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:50%" metric: '' refId: C step: 40 thresholds: [] timeFrom: timeShift: title: Responses (size, udp) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 13 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: udp:50% yaxis: 1 - alias: tcp:50% yaxis: 1 - alias: tcp:90% yaxis: 1 - alias: tcp:99% yaxis: 1 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le,proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:99%" refId: A step: 40 - expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le,proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:90%" refId: B step: 40 - expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m])) by (le, proto)) ' intervalFactor: 2 legendFormat: "{{proto}}:50%" metric: '' refId: C step: 40 thresholds: [] timeFrom: timeShift: title: Responses (size, tcp) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 15 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(coredns_cache_size{instance=~"$instance"}) by (type) intervalFactor: 2 legendFormat: "{{type}}" refId: A step: 40 thresholds: [] timeFrom: timeShift: title: Cache (size) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 16 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: misses yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(coredns_cache_hits_total{instance=~"$instance"}[5m])) by (type) intervalFactor: 2 legendFormat: hits:{{type}} refId: A step: 40 - expr: sum(rate(coredns_cache_misses_total{instance=~"$instance"}[5m])) by (type) intervalFactor: 2 legendFormat: misses refId: B step: 40 thresholds: [] timeFrom: timeShift: title: Cache (hitrate) tooltip: shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: pps logBase: 1 max: min: 0 show: true - format: pps logBase: 1 max: min: 0 show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 schemaVersion: 14 style: dark tags: - dns - coredns templating: list: - allValue: ".*" current: {} datasource: prometheus hide: 0 includeAll: true label: Instance multi: false name: instance options: [] query: up{job="coredns"} refresh: 1 regex: .*instance="(.*?)".* sort: 0 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-3h to: now timepicker: now: true refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: utc title: CoreDNS version: 3 description: A dashboard for the CoreDNS DNS server.