diff --git a/charts/rally-exporter/Chart.yaml b/charts/rally-exporter/Chart.yaml new file mode 100755 index 0000000..a60e9fc --- /dev/null +++ b/charts/rally-exporter/Chart.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +name: rally-exporter +version: 0.1.1 +description: Rally exporter for Prometheus +home: https://github.com/vexxhost/rally-exporter +maintainers: +- name: Mohammed Naser + email: mnaser@vexxhost.com + url: https://github.com/mnaser +appVersion: 0.0.1 diff --git a/charts/rally-exporter/templates/_helpers.tpl b/charts/rally-exporter/templates/_helpers.tpl new file mode 100755 index 0000000..c6dc359 --- /dev/null +++ b/charts/rally-exporter/templates/_helpers.tpl @@ -0,0 +1,45 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "rally-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "rally-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Generate basic labels +*/}} +{{- define "rally-exporter.labels" }} +app.kubernetes.io/name: {{ include "rally-exporter.fullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: rally +{{- if .Values.commonLabels }} +{{ toYaml .Values.commonLabels }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "rally-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} \ No newline at end of file diff --git a/charts/rally-exporter/templates/deployment.yaml b/charts/rally-exporter/templates/deployment.yaml new file mode 100755 index 0000000..e29f40d --- /dev/null +++ b/charts/rally-exporter/templates/deployment.yaml @@ -0,0 +1,50 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "rally-exporter.fullname" . }} + labels: +{{- include "rally-exporter.labels" . | indent 4 }} +spec: + selector: + matchLabels: +{{- include "rally-exporter.labels" . | indent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + labels: +{{- include "rally-exporter.labels" . | indent 8 }} + spec: + containers: + - name: rally-exporter + image: vexxhost/rally-exporter:latest + args: + - default + - /etc/rally/plan.yaml + volumeMounts: + - name: exporter-config + mountPath: /etc/rally + ports: + - name: metrics + containerPort: 9355 + env: + - name: OS_CLIENT_CONFIG_FILE + value: /etc/rally/clouds.yaml + volumes: + - name: exporter-config + secret: + secretName: {{ include "rally-exporter.fullname" . }} + {{- with .Values.hostAliases }} + hostAliases: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} diff --git a/charts/rally-exporter/templates/podmonitor.yaml b/charts/rally-exporter/templates/podmonitor.yaml new file mode 100755 index 0000000..f5f9312 --- /dev/null +++ b/charts/rally-exporter/templates/podmonitor.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "rally-exporter.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: +{{ include "rally-exporter.labels" . | indent 4 }} +spec: + podMetricsEndpoints: + - interval: {{ .Values.podMonitor.interval }} + scrapeTimeout: {{ .Values.podMonitor.scrapeTimeout }} + port: metrics + relabelings: + - action: replace + regex: (.*) + replacement: default + targetLabel: instance + - action: labeldrop + regex: '^(container|endpoint|namespace|pod)$' + namespaceSelector: + any: true + selector: + matchLabels: +{{ include "rally-exporter.labels" . | indent 6 }} \ No newline at end of file diff --git a/charts/rally-exporter/templates/prometheusrule.yaml b/charts/rally-exporter/templates/prometheusrule.yaml new file mode 100755 index 0000000..03dadbf --- /dev/null +++ b/charts/rally-exporter/templates/prometheusrule.yaml @@ -0,0 +1,165 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "rally-exporter.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: +{{ include "rally-exporter.labels" . | indent 4 }} +spec: + groups: + - name: rally + rules: + - alert: RallyTestFailed + expr: | + rally_task_passed != 1 + labels: + severity: P5 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed" + description: > + The Rally task {{`{{$labels.title}}`}} has failed in it's most + recent run which means the cloud is currently seeing issues. + - alert: RallyTestFailed + for: 7m + expr: | + rally_task_passed != 1 + labels: + severity: P4 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed" + description: > + The Rally task {{`{{$labels.title}}`}} has failed in it's most + recent run which means the cloud is currently seeing issues. + - alert: RallyTestFailed + for: 12m + expr: | + rally_task_passed != 1 + labels: + severity: P3 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed" + description: > + The Rally task {{`{{$labels.title}}`}} has failed in it's most + recent run which means the cloud is currently seeing issues. + - alert: RallyTestFailed + for: 17m + expr: | + rally_task_passed != 1 + labels: + severity: P2 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed" + description: > + The Rally task {{`{{$labels.title}}`}} has failed in it's most + recent run which means the cloud is currently seeing issues. + + - alert: RallyNotReporting + for: 5m + expr: | + absent(rally_task_passed) + labels: + severity: P4 + annotations: + summary: "[Rally] Not reporting" + description: > + Rally is not reporting any details at all for the past 5 minutes + which means that the service is down and it could likely be + masking issues with the cloud. + + - alert: RallyNotReporting + for: 7m + expr: | + absent(rally_task_passed) + labels: + severity: P3 + annotations: + summary: "[Rally] Not reporting" + description: > + Rally is not reporting any details at all for the past 7 minutes + which means that the service is down and it could likely be + masking issues with the cloud. + + - alert: RallyNotRunning + expr: | + time() - rally_task_time > 900 + labels: + severity: P3 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} not running" + description: > + The most recent Rally run for the {{`{{$labels.title}}`}} task + was {{`{{$value}}`}} seconds ago which implies that it is not + running properly. + + - alert: RallyInstanceStatus + for: 15m + expr: | + count(openstack_nova_server_status{name=~"^s_rally.*"}) > 2 + labels: + severity: P5 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" + description: > + Rally has failed to cleanup {{`{{$value}}`}} instances which resulted + to an ERROR state. + + - alert: RallyInstanceStatus + for: 15m + expr: | + count(openstack_nova_server_status{name=~"^s_rally.*"}) > 5 + labels: + severity: P4 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" + description: > + Rally has failed to cleanup {{`{{$value}}`}} instances which resulted + to an ERROR state. + + - alert: RallyInstanceStatus + for: 15m + expr: | + count(openstack_nova_server_status{name=~"^s_rally.*"}) >= 10 + labels: + severity: P3 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} instances" + description: > + Rally has failed to cleanup {{`{{$value}}`}} instances which resulted + to an ERROR state. + + - alert: RallyVolumeStatus + for: 15m + expr: | + count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 2 + labels: + severity: P5 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" + description: > + Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted + to an ERROR state. + + - alert: RallyVolumeStatus + for: 15m + expr: | + count(openstack_cinder_volume_status{name=~"^s_rally.*"}) > 5 + labels: + severity: P4 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" + description: > + Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted + to an ERROR state. + + - alert: RallyVolumeStatus + for: 15m + expr: | + count(openstack_cinder_volume_status{name=~"^s_rally.*"}) >= 10 + labels: + severity: P3 + annotations: + summary: "[Rally] {{`{{$labels.title}}`}} failed to cleanup {{`{{$value}}`}} volumes" + description: > + Rally has failed to cleanup {{`{{$value}}`}} volumes which resulted + to an ERROR state. diff --git a/charts/rally-exporter/templates/secret.yaml b/charts/rally-exporter/templates/secret.yaml new file mode 100755 index 0000000..040bdfa --- /dev/null +++ b/charts/rally-exporter/templates/secret.yaml @@ -0,0 +1,66 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + namespace: {{ .Release.Namespace }} + name: {{ include "rally-exporter.fullname" . }} + labels: +{{- include "rally-exporter.labels" . | indent 4 }} +stringData: + rally.conf: | + [database] + connection=sqlite:////home/rally/data/rally.db + [openstack] + nova_server_boot_timeout=90 + clouds.yaml: | + clouds: + default: + auth: + auth_url: {{ .Values.cloud.auth_url }} + project_name: {{ .Values.cloud.project_name }} + tenant_name: {{ .Values.cloud.project_name }} + username: {{ .Values.cloud.username }} + password: {{ .Values.cloud.password }} + user_domain_name: {{ .Values.cloud.user_domain_name }} + project_domain_name: {{ .Values.cloud.project_domain_name }} + region_name: {{ .Values.cloud.region_name }} + interface: {{ .Values.cloud.interface }} + plan.yaml: | + --- + version: 2 + title: OpenStack Cloud Validation + description: | + This task runs a few synethic tests to ensure that the cloud is in a + functional state. + subtasks: + - title: keystone + scenario: + Authenticate.keystone: {} + runner: + constant: + times: 1 + concurrency: 1 + sla: + max_seconds_per_iteration: 5 + failure_rate: + max: 0 + - title: nova + scenario: + NovaServers.boot_server_from_volume_and_delete: +{{ if .Values.test.network }} + nics: + - net-name: {{ .Values.test.network }} +{{- end }} + volume_size: 20 + flavor: + name: {{ .Values.test.flavor }} + image: + name: {{ .Values.test.image }} + runner: + constant: + times: 1 + concurrency: 1 + sla: + max_seconds_per_iteration: 90 + failure_rate: + max: 0 diff --git a/charts/rally-exporter/values.yaml b/charts/rally-exporter/values.yaml new file mode 100755 index 0000000..87085bf --- /dev/null +++ b/charts/rally-exporter/values.yaml @@ -0,0 +1,18 @@ +--- +cloud: + auth_url: + region_name: + interface: internal + user_domain_name: Default + project_domain_name: Default + project_name: rally + username: rally + password: + +test: + flavor: + image: + +podMonitor: + interval: 1m + scrapeTimeout: 1m