diff --git a/grafana/values.yaml b/grafana/values.yaml index fa3187dd7..0a573cfde 100644 --- a/grafana/values.yaml +++ b/grafana/values.yaml @@ -2823,7 +2823,7 @@ conf: name: Prometheus version: 1.3.0 id: - title: Kubernetes cluster monitoring (via Prometheus) + title: Container Metrics (cAdvisor) description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only. @@ -4562,7 +4562,7 @@ conf: lineColor: rgb(31, 120, 193) show: false targets: - - expr: rabbitmq_up + - expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} refId: A @@ -6236,568 +6236,6 @@ conf: type: datasource value: prometheus overwrite: true - kubernetes_cluster_health: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.1 - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - editable: true - gnetId: - graphTooltip: 0 - hideControls: false - id: - links: [] - rows: - - collapse: false - height: 254 - panels: - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 1 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(up{job=~"kube-apiserver|kube-scheduler|kube-controller-manager"} == - 0) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Control Plane Components Down - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: Everything UP and healthy - value: 'null' - - op: "=" - text: '' - value: '' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 2 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '3,5' - title: Alerts Firing - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 3 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(ALERTS{alertstate="pending",alertname!="DeadMansSwitch"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '3,5' - title: Alerts Pending - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 4 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: count(increase(kube_pod_container_status_restarts[1h]) > 5) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Crashlooping Pods - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250 - panels: - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 5 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(kube_node_status_condition{condition="Ready",status!="true"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Node Not Ready - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 6 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(kube_node_status_condition{condition="DiskPressure",status="true"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Node Disk Pressure - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 7 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(kube_node_status_condition{condition="MemoryPressure",status="true"}) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Node Memory Pressure - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - - cacheTimeout: - colorBackground: false - colorValue: true - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - decimals: - format: none - gauge: - maxValue: 100 - minValue: 0 - show: false - thresholdLabels: false - thresholdMarkers: true - id: 8 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: sum(kube_node_spec_unschedulable) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '1,3' - title: Nodes Unschedulable - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: current - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: [] - time: - from: now-6h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: '' - title: Kubernetes Cluster Health - version: 9 - inputs: - - name: prometheus - pluginId: prometheus - type: datasource - value: prometheus - overwrite: true kubernetes_cluster_status: __inputs: - name: prometheus @@ -7490,532 +6928,6 @@ conf: type: datasource value: prometheus overwrite: true - kubernetes_control_plane: - __inputs: - - name: prometheus - label: prometheus - description: '' - type: datasource - pluginId: prometheus - pluginName: Prometheus - __requires: - - type: grafana - id: grafana - name: Grafana - version: 4.4.1 - - type: panel - id: graph - name: Graph - version: '' - - type: datasource - id: prometheus - name: Prometheus - version: 1.0.0 - - type: panel - id: singlestat - name: Singlestat - version: '' - annotations: - list: [] - editable: true - gnetId: - graphTooltip: 0 - hideControls: false - id: - links: [] - rows: - - collapse: false - height: 250px - panels: - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: prometheus - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 1 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="apiserver"} == 1) / sum(up{job="apiserver"})) * 100 - format: time_series - intervalFactor: 2 - refId: A - step: 600 - thresholds: '50,80' - title: API Servers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: prometheus - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 2 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / sum(up{job="kube-controller-manager-discovery"})) - * 100 - format: time_series - intervalFactor: 2 - refId: A - step: 600 - thresholds: '50,80' - title: Controller Managers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(245, 54, 54, 0.9) - - rgba(237, 129, 40, 0.89) - - rgba(50, 172, 45, 0.97) - datasource: prometheus - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 3 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / sum(up{job="kube-scheduler-discovery"})) - * 100 - format: time_series - intervalFactor: 2 - refId: A - step: 600 - thresholds: '50,80' - title: Schedulers UP - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: N/A - value: 'null' - valueName: avg - - cacheTimeout: - colorBackground: false - colorValue: false - colors: - - rgba(50, 172, 45, 0.97) - - rgba(237, 129, 40, 0.89) - - rgba(245, 54, 54, 0.9) - datasource: prometheus - format: percent - gauge: - maxValue: 100 - minValue: 0 - show: true - thresholdLabels: false - thresholdMarkers: true - id: 4 - interval: - links: [] - mappingType: 1 - mappingTypes: - - name: value to text - value: 1 - - name: range to text - value: 2 - maxDataPoints: 100 - nullPointMode: connected - nullText: - postfix: '' - postfixFontSize: 50% - prefix: '' - prefixFontSize: 50% - rangeMaps: - - from: 'null' - text: N/A - to: 'null' - span: 3 - sparkline: - fillColor: rgba(31, 118, 189, 0.18) - full: false - lineColor: rgb(31, 120, 193) - show: false - tableColumn: '' - targets: - - expr: max(sum by(instance) (rate(apiserver_request_count{code=~"5.."}[5m])) - / sum by(instance) (rate(apiserver_request_count[5m]))) * 100 - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 600 - thresholds: '5,10' - title: API Server Request Error Rate - type: singlestat - valueFontSize: 80% - valueMaps: - - op: "=" - text: '0' - value: 'null' - valueName: avg - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: prometheus - fill: 1 - id: 7 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 12 - stack: false - steppedLine: false - targets: - - expr: sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0) - format: time_series - intervalFactor: 2 - legendFormat: '' - refId: A - step: 30 - thresholds: [] - timeFrom: - timeShift: - title: API Server Request Latency - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - - collapse: false - height: 250 - panels: - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: prometheus - fill: 1 - id: 5 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: cluster:scheduler_e2e_scheduling_latency_seconds:quantile - format: time_series - intervalFactor: 2 - refId: A - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: End to end scheduling latency - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: dtdurations - label: - logBase: 1 - max: - min: - show: true - - aliasColors: {} - bars: false - dashLength: 10 - dashes: false - datasource: prometheus - fill: 1 - id: 6 - legend: - avg: false - current: false - max: false - min: false - show: true - total: false - values: false - lines: true - linewidth: 1 - links: [] - nullPointMode: 'null' - percentage: false - pointradius: 5 - points: false - renderer: flot - seriesOverrides: [] - spaceLength: 10 - span: 6 - stack: false - steppedLine: false - targets: - - expr: sum by(instance) (rate(apiserver_request_count{code!~"2.."}[5m])) - format: time_series - intervalFactor: 2 - legendFormat: Error Rate - refId: A - step: 60 - - expr: sum by(instance) (rate(apiserver_request_count[5m])) - format: time_series - intervalFactor: 2 - legendFormat: Request Rate - refId: B - step: 60 - thresholds: [] - timeFrom: - timeShift: - title: API Server Request Rates - tooltip: - shared: true - sort: 0 - value_type: individual - type: graph - xaxis: - buckets: - mode: time - name: - show: true - values: [] - yaxes: - - format: short - label: - logBase: 1 - max: - min: - show: true - - format: short - label: - logBase: 1 - max: - min: - show: true - repeat: - repeatIteration: - repeatRowId: - showTitle: false - title: Dashboard Row - titleSize: h6 - schemaVersion: 14 - style: dark - tags: [] - templating: - list: [] - time: - from: now-6h - to: now - timepicker: - refresh_intervals: - - 5s - - 10s - - 30s - - 1m - - 5m - - 15m - - 30m - - 1h - - 2h - - 1d - time_options: - - 5m - - 15m - - 1h - - 6h - - 12h - - 24h - - 2d - - 7d - - 30d - timezone: '' - title: Kubernetes Control Plane Status - version: 3 - inputs: - - name: prometheus - pluginId: prometheus - type: datasource - value: prometheus - overwrite: true nodes: __inputs: - name: prometheus @@ -10162,11 +9074,11 @@ conf: - 30d type: timepicker timezone: browser - title: Openstack Main1 + title: OpenStack Metrics version: 2 nginx_stats: __inputs: - - name: DS_PROMETHEUS + - name: prometheus label: prometheus description: '' type: datasource @@ -10203,7 +9115,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 7 legend: @@ -10279,7 +9191,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 6 legend: @@ -10346,7 +9258,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 5 legend: @@ -10422,7 +9334,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 1 legend: @@ -10487,7 +9399,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 4 legend: @@ -10563,7 +9475,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 3 legend: @@ -10629,7 +9541,7 @@ conf: bars: false dashLength: 10 dashes: false - datasource: "${DS_PROMETHEUS}" + datasource: prometheus fill: 1 id: 2 legend: @@ -10706,7 +9618,7 @@ conf: list: - allValue: ".*" current: {} - datasource: "${DS_PROMETHEUS}" + datasource: prometheus hide: 0 includeAll: false label: @@ -10724,7 +9636,7 @@ conf: useTags: false - allValue: current: {} - datasource: "${DS_PROMETHEUS}" + datasource: prometheus hide: 0 includeAll: false label: