Merge "add support to index ocp metrics to ES"

This commit is contained in:
Zuul 2024-09-04 07:31:02 +00:00 committed by Gerrit Code Review
commit 36165d5896
10 changed files with 189 additions and 0 deletions

View File

@ -6,3 +6,8 @@
roles: roles:
- { role: collectd-rhoso } - { role: collectd-rhoso }
environment: "{{proxy_env}}" environment: "{{proxy_env}}"
- name: trigger the cronjob to index data from OCP
import_playbook: toggle-indexing-cron-job.yml
vars:
cron_state: "present"

View File

@ -15,6 +15,8 @@ host_remote_user: heat-admin
is_rhoso_deployment: true is_rhoso_deployment: true
python_interpreter: /usr/bin/python3.6 python_interpreter: /usr/bin/python3.6
kubeconfig_path: /home/kni/clusterconfigs/auth/kubeconfig kubeconfig_path: /home/kni/clusterconfigs/auth/kubeconfig
kube_burner_path: https://github.com/cloud-bulldozer/kube-burner/releases/download/v1.7.12/kube-burner-V1.7.12-linux-x86_64.tar.gz
ocp_metrics_query: roles/index-ocp-data/templates/metrics.yaml
# OpenStack Installer # OpenStack Installer
# Tripleo is the only installer supported currently # Tripleo is the only installer supported currently

View File

@ -0,0 +1,5 @@
---
- hosts: localhost
gather_facts: yes
roles:
- index-ocp-data

View File

@ -0,0 +1,22 @@
---
- name: Check if oc is installed
shell: "which oc"
register: oc_location
ignore_errors: true
- name: Fail if oc is not installed
fail:
msg: "oc is not installed"
when: oc_location.rc != 0
- name: Check if logged into Kubernetes cluster
shell: "oc version"
register: oc_version
ignore_errors: true
environment:
KUBECONFIG: "{{ kubeconfig_path }}"
- name: Fail if not logged in
fail:
msg: "Not logged in to Kubernetes cluster"
when: oc_version.rc != 0

View File

@ -0,0 +1,62 @@
---
- name: prepare kube-burner args
set_fact:
uuid: "{{ lookup('pipe', 'uuidgen') }}"
es_index: "OSP-metrics-from-OCP-{{ ansible_date_time.year }}.{{ ansible_date_time.month }}"
end_time: "{{ ansible_date_time.epoch | int }}"
job_name: "{{ cloud_prefix }}"
es_server: "http://{{ es_ip }}:{{ es_local_port }}"
metrics: "{{ ocp_metrics_query }}"
- name: calculate start time from end time
set_fact:
start_time: "{{ end_time | int - 1800 }}"
when: start_time is undefined
- name: Check if all variables are defined
assert:
that:
- item in vars
loop: "{{ required_vars }}"
loop_control:
label: "{{ item }}"
register: var_check_result
ignore_errors: true
- name: Extract missing variables
set_fact:
missing_vars: "{{ var_check_result.results | selectattr('failed', 'equalto', true) | map(attribute='item') | list }}"
- name: Fail if any variable is not defined
fail:
msg: "Variable '{{ missing_vars | join(', ') }}' is not defined. Aborting playbook execution."
when: missing_vars | length > 0
- name: check oc is installed and accessable
include_tasks: check_oc.yml
- name: Download kube-burner
ansible.builtin.get_url:
url: "{{ kube_burner_path }}"
dest: /tmp/kube-burner.tar.gz
- name: Extract the kube-burner
ansible.builtin.unarchive:
src: /tmp/kube-burner.tar.gz
dest: /tmp
mode: '0774'
- block:
- name: retrieve the prometheus url
shell: oc get routes -n openshift-monitoring prometheus-k8s -o=jsonpath='{.spec.host}'
register: prometheus_url
- name: create token to access prometheus
shell: oc create token prometheus-k8s -n openshift-monitoring
register: token
environment:
KUBECONFIG: "{{ kubeconfig_path }}"
- name: index data from premetheus to elastic
shell: |
/tmp/kube-burner index --es-server {{ es_server }} --es-index {{ es_index }} --uuid={{ uuid}} --job-name {{ job_name }} --token={{ token.stdout}} -m={{ metrics }} --start={{ start_time }} --end={{ end_time }} --log-level debug -u https://{{ prometheus_url.stdout }}

View File

@ -0,0 +1,41 @@
# Containers & pod metrics
#
- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!~"POD|",namespace=~"openstack"}[2m]) * 100) by (container, pod, namespace, node)) > 0
metricName: containerCPU
- query: sum(container_memory_rss{name!="",container!~"POD|",namespace=~"openstack"}) by (container, pod, namespace, node)
metricName: containerMemory
- query: sum(irate(container_network_receive_packets_total{cluster="",namespace=~"openstack", pod!=""}[2m])) by (pod, namespace, node, interface)
metricName: containerRecvPackets
- query: sum(irate(container_network_transmit_packets_total{cluster="",namespace=~"openstack", pod!=""}[2m])) by (pod, namespace, node, interface)
metricName: containerTranPackets
- query: cluster_version{type="completed"}
metricName: clusterVersion
instant: true
- query: sum by (cluster_version)(etcd_cluster_version)
metricName: etcdVersion
instant: true
- query: count(kube_secret_info{namespace='openstack'})
metricName: ospSecretCount
instant: true
- query: count(kube_deployment_labels{namespace='openstack'})
metricName: ospDeploymentCount
instant: true
- query: count(kube_configmap_info{namespace='openstack'})
metricName: ospConfigmapCount
instant: true
- query: count(kube_service_info{namespace='openstack'})
metricName: ospServiceCount
instant: true
- query: count(kube_statefulset_labels{namespace='openstack'})
metricName: ospStatefulsetCount
instant: true

View File

@ -0,0 +1,8 @@
required_vars:
- es_server
- es_index
- uuid
- job_name
- metrics
- start_time
- end_time

View File

@ -18,3 +18,7 @@
when: is_statefulset_exist.rc == 0 when: is_statefulset_exist.rc == 0
environment: environment:
KUBECONFIG: "{{ kubeconfig_path }}" KUBECONFIG: "{{ kubeconfig_path }}"
- name: trigger the cronjob to index data from OCP
import_playbook: toggle-indexing-cron-job.yml
vars:
cron_state: "present"

View File

@ -6,3 +6,8 @@
environment: environment:
KUBECONFIG: "{{ kubeconfig_path }}" KUBECONFIG: "{{ kubeconfig_path }}"
ignore_errors: yes ignore_errors: yes
- name: stop the cronjob to stop index data from OCP
import_playbook: toggle-indexing-cron-job.yml
vars:
cron_state: "absent"

View File

@ -0,0 +1,35 @@
---
- hosts: localhost
vars:
cron_state: "present"
tasks:
- block:
- name: Enure the log file exist
stat:
path: "/tmp/ocp_index_cron.log"
register: log_file
- name: find the age of file(last modification)
set_fact:
file_age: "{{ ((ansible_date_time.epoch | int) - (log_file.stat.mtime | int)) / 60 }}"
when: log_file.stat.exists
- name: run the ansible task with the start time
include_role:
name: index-ocp-data
vars:
start_time: "{{ log_file.stat.mtime | int }}"
when: log_file.stat.exists and (file_age | int < 30)
- name: run the ansible task without start time
include_role:
name: index-ocp-data
when: not log_file.stat.exists or (file_age | int >= 30)
when: cron_state == "absent"
- name: toggle the indexing OCP data cron job
cron:
name: "Index ocp data every 30 mins"
minute: "*/30"
job: "PATH=/usr/local/bin:/usr/bin:/bin && /usr/bin/ansible-playbook {{ browbeat_path }}/ansible/install/index-ocp-data.yml > /tmp/ocp_index_cron.log 2>&1"
state: "{{ cron_state }}"