rajeshP524 58f4d9316b Update metrics.yaml and collectd template for RHOSO
1) New prometheus queries has been added to metrics.yaml
2) Prometheus port is updated in the template, as its conflicting
   with some processes port on worker nodes
3) Processes plugin is commented out in the collectd template.

Change-Id: I1b489fccd1cde7b2af1d4ec3f08cf7c6c0274eda
2024-10-01 17:23:57 +05:30

181 lines
6.3 KiB
YAML

# Containers & pod metrics
- query: sum(irate(container_cpu_usage_seconds_total{name!="",container!~"POD|",namespace=~"openstack"}[2m]) * 100) by (container, pod, namespace, node)
metricName: containerCPU
- query: sum(container_memory_rss{name!="",container!~"POD|",namespace=~"openstack"}) by (container, pod, namespace, node)
metricName: containerMemory
- query: sum(irate(container_network_receive_packets_total{cluster="",namespace=~"openstack", pod!=""}[2m])) by (pod, namespace, node, interface)
metricName: containerRecvPackets
- query: sum(irate(container_network_transmit_packets_total{cluster="",namespace=~"openstack", pod!=""}[2m])) by (pod, namespace, node, interface)
metricName: containerTranPackets
# OCP Cluster details
- query: cluster_version{type="completed"}
metricName: clusterVersion
instant: true
- query: sum by (cluster_version)(etcd_cluster_version)
metricName: etcdVersion
instant: true
- query: count(kube_secret_info{namespace='openstack'})
metricName: ospSecretCount
instant: true
- query: count(kube_deployment_labels{namespace='openstack'})
metricName: ospDeploymentCount
instant: true
- query: count(kube_configmap_info{namespace='openstack'})
metricName: ospConfigmapCount
instant: true
- query: count(kube_service_info{namespace='openstack'})
metricName: ospServiceCount
instant: true
- query: count(kube_statefulset_labels{namespace='openstack'})
metricName: ospStatefulsetCount
instant: true
# MYSQL Innodb data
- query: sum(irate(collectd_mysql_mysql_innodb_log_total[5m])) by (type)
metricName: ospMysqlInnodbLog
- query: sum(irate(collectd_mysql_mysql_innodb_data_total[5m])) by (type)
metricName: ospMysqlInnodbData
- query: sum(irate(collectd_mysql_mysql_innodb_dblwr_total[5m])) by (type)
metricName: ospMysqlInnodbDoubleWrite
- query: sum(irate(collectd_mysql_mysql_innodb_pages_total[5m])) by (type)
metricName: ospMysqlInnodbPages
- query: sum(irate(collectd_mysql_mysql_innodb_row_lock_total[5m])) by (type)
metricName: ospMysqlInnodbRowLock
- query: sum(irate(collectd_mysql_mysql_innodb_rows_total[5m])) by (type)
metricName: ospMysqlInnodbRows
# OVN DB Tables Row Counts
- query: sum(collectd_ovn_gauge) by (type)
metricName: ospOVNDBTableRowCount
# OVN RAFT Monitoring
- query: sum(collectd_ovn_raft_monitoring_gauge) by (ovn_raft_monitoring)
metricName: ospOVNRAFTData
# Tail info
- query: sum(increase(collectd_tail_counter_total[5m])) by (tail, type)
metricName: ospTailInfo
# OSP DB Data
- query: sum(collectd_dbi_gauge) by (type, dbi)
metricName: ospDbResources
# OSP resource response time
- query: sum(collectd_tail_response_time{tail=~".*_avg_response_time"}) by (tail, type)
metricName: ospResourceAvgRespTime
- query: sum(collectd_tail_response_time{tail=~".*_min_response_time"}) by (tail, type)
metricName: ospResourceMinRespTime
- query: sum(collectd_tail_response_time{tail=~".*_max_response_time"}) by (tail, type)
metricName: ospResourceMaxRespTime
# Commenting processes plugin queries, as the processes naming has changed in RHOSO
# Collecting processes metrics from prometheus now. we can use these below queries
# once regex is corrected.
# # OSP Process and Thread Counts
# - query: sum(collectd_processes_ps_count_processes) by (processes, instance)
# metricName: ospProcessCount
#
# - query: sum(collectd_processes_ps_count_threads) by (processes, instance)
# metricName: ospThreadsCount
#
# # OSP process CPU time
# - query: sum(irate(collectd_processes_ps_cputime_syst_total[2m]) / 10000) by (instance, processes)
# metricName: ospProcessCPUSys
#
# - query: sum(irate(collectd_processes_ps_cputime_syst_total[2m]) / 10000) by (instance, processes)
# metricName: ospProcessCPUUser
#
# # OSP Process Memory
# - query: sum(irate(collectd_processes_ps_rss[5m])) by (instance, processes)
# metricName: ospProcessMemRSS
#
# - query: sum(irate(collectd_processes_ps_vm[5m])) by (instance, processes)
# metricName: ospProcessMemVirt
#
# # OSP Process Pagefault
# - query: sum(increase(collectd_processes_ps_pagefaults_majflt_total[5m])) by (instance, processes)
# metricName: ospProcessPagefaultMax
#
# - query: sum(increase(collectd_processes_ps_pagefaults_minflt_total[5m])) by (instance, processes)
# metricName: ospProcessPagefaultMin
#
# # OSP process IOPs
# - query: sum(irate(collectd_processes_io_ops_write_total[5m])) by (instance, processes)
# metricName: ospProcessIOPsWrite
#
# - query: sum(irate(collectd_processes_io_ops_read_total[5m])) by (instance, processes)
# metricName: ospProcessIOPsRead
#
# # OSP Process IO Throughput
# - query: sum(irate(collectd_processes_io_octets_rx_total[5m])) by (instance, processes)
# metricName: ospProcessIORx
#
# - query: sum(irate(collectd_processes_io_octets_tx_total[5m])) by (instance, processes)
# metricName: ospProcessIOTx
#
# # OSP Process Disk IO
# - query: sum(irate(collectd_processes_disk_octets_read_total[5m])) by (instance, processes)
# metricName: ospProcessDiskRead
#
# - query: sum(irate(collectd_processes_disk_octets_write_total[5m])) by (instance, processes)
# metricName: ospProcessDiskWrite
# MariaDB
- query: sum(collectd_mysql_threads) by (name, type)
metricName: ospMysqlThreads
- query: sum(rate(collectd_mysql_mysql_octets_tx_total[2m])) by (name)
metricName: ospMysqlTrafficTx
- query: sum(rate(collectd_mysql_mysql_octets_rx_total[2m])) by (name)
metricName: ospMysqlTrafficRx
- query: sum(increase(collectd_mysql_cache_result_total[5m])) by (type)
metricName: ospMysqlQueryCache
- query: sum(collectd_mysql_cache_size) by (type)
metricName: ospMysqlQueryCacheSize
- query: sum(collectd_mysql_mysql_bpool_bytes) by (type)
metricName: ospMysqlBufferPoolData
- query: sum(increase(collectd_mysql_mysql_bpool_counters_total[5m])) by (type)
metricName: ospMysqlBufferPool
- query: sum(increase(collectd_mysql_mysql_commands_total[5m])) by (type)
metricName: ospMysqlCommands
- query: sum(increase(collectd_mysql_mysql_handler_total[5m])) by (type)
metricName: ospMysqlHandlers
- query: sum(increase(collectd_mysql_mysql_locks_total[5m])) by (type)
metricName: ospMysqlLocks
- query: sum(increase(collectd_mysql_mysql_select_total[5m])) by (type)
metricName: ospMysqlSelect
- query: sum(increase(collectd_mysql_mysql_sort_total[5m])) by (type)
metricName: ospMysqlSort
# RabbitMQ
- query: sum(collectd_rabbitmq_monitoring_gauge) by (rabbitmq_monitoring)
metricName: ospRabbitmqQueueCounts