6f7790e451
This adds support for arbitrary object definitions via the conf key in the Nagios chart. This allows for customizing the definitions required by different deployment targets instead of assuming all nagios deployments are monitoring and targeting the same hosts and executing the same service checks and commands. This also adds reference overrides to the chart for elasticsearch, postgresql, and openstack nagios objects that are deployed in the single and multinode jobs here Change-Id: I6475ca980447591b5b691220eb841a2ab958e854 Signed-off-by: Steve Wilkerson <sw5822@att.com>
94 lines
4.6 KiB
YAML
94 lines
4.6 KiB
YAML
conf:
|
|
nagios:
|
|
objects:
|
|
fluent:
|
|
template: |
|
|
define service {
|
|
check_command check_prom_alert!fluentd_not_running!CRITICAL- fluentd is not running on {instance}!OK- Flunetd is working on all nodes
|
|
check_interval 60
|
|
hostgroup_name prometheus-hosts
|
|
service_description Fluentd_status
|
|
use notifying_service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!prom_exporter_fluentd_unavailable!CRITICAL- Fluentd exporter is not collecting metrics for alerting!OK- Fluentd exporter metrics are available.
|
|
hostgroup_name prometheus-hosts
|
|
service_description Prometheus-exporter_Fluentd
|
|
use generic-service
|
|
}
|
|
elasticsearch:
|
|
template: |
|
|
define command {
|
|
command_line $USER1$/query_elasticsearch.py $USER9$ '$ARG1$' '$ARG2$' '$ARG3$' '$ARG4$' '$ARG5$' --simple_query '$ARG6$' --simple_query_fields '$ARG7$' --match '$ARG8$' --range '$ARG9$'
|
|
command_name check_es_query
|
|
}
|
|
|
|
define command {
|
|
command_line $USER1$/query_elasticsearch.py $USER9$ '$ARG1$' '$ARG2$' '$ARG3$' '$ARG4$' '$ARG5$' --simple_query '$ARG6$' --simple_query_fields '$ARG7$' --query_file '/opt/nagios/etc/objects/query_es_clauses.json' --query_clause '$ARG8$' --match '$ARG9$' --range '$ARG10$'
|
|
command_name check_es_query_w_file
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!prom_exporter_elasticsearch_unavailable!CRITICAL- Elasticsearch exporter is not collecting metrics for alerting!OK- Elasticsearch exporter metrics are available.
|
|
hostgroup_name prometheus-hosts
|
|
service_description Prometheus-exporter_Elasticsearch
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_high_process_open_files_count!CRITICAL- Elasticsearch {host} has high process open file count!OK- Elasticsearch process open file count is normal.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_high-process-open-file-count
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_high_process_cpu_percent!CRITICAL- Elasticsearch {instance} has high process CPU percent!OK- Elasticsearch process cpu usage is normal.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_high-process-cpu-percent
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_fs_usage_high!CRITICAL- Elasticsearch {instance} has high filesystem usage!OK- Elasticsearch filesystem usage is normal.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_high-filesystem-usage
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_unassigned_shards!CRITICAL- Elasticsearch has unassinged shards!OK- Elasticsearch has no unassigned shards.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_unassigned-shards
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_cluster_health_timed_out!CRITICAL- Elasticsearch Cluster health status call timedout!OK- Elasticsearch cluster health is retrievable.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_cluster-health-timedout
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_cluster_health_status_alert!CRITICAL- Elasticsearch cluster health status is not green. One or more shards or replicas are unallocated!OK- Elasticsearch cluster health is green.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_cluster-health-status
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_cluster_health_too_few_nodes_running!CRITICAL- Elasticsearch Cluster has < 3 nodes running!OK- Elasticsearch cluster has 3 or more nodes running.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_cluster-running-node-count
|
|
use generic-service
|
|
}
|
|
|
|
define service {
|
|
check_command check_prom_alert!es_cluster_health_too_few_data_nodes_running!CRITICAL- Elasticsearch Cluster has < 3 data nodes running!OK- Elasticsearch cluster has 3 or more data nodes running.
|
|
hostgroup_name prometheus-hosts
|
|
service_description ES_cluster-running-data-node-count
|
|
use generic-service
|
|
}
|