72b4fe097d
Add CDM checks and alarms
349 lines
16 KiB
YAML
349 lines
16 KiB
YAML
---
|
|
# Copyright 2014, Rackspace US, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
- hosts: cinder_api
|
|
vars:
|
|
check_name: cinder_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'cinder_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["cinder_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: cinder_scheduler
|
|
vars:
|
|
check_name: cinder_scheduler_check
|
|
check_details: file=cinder_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'cinder_scheduler_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["cinder-scheduler_status"] != 1) { return new AlarmStatus(CRITICAL, "cinder-scheduler down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: cinder_volume
|
|
vars:
|
|
check_name: cinder_volume_check
|
|
check_details: file=cinder_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'cinder_volume_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["cinder-volume_status"] != 1) { return new AlarmStatus(CRITICAL, "cinder-volume down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: glance_api
|
|
vars:
|
|
check_name: glance_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'glance_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["glance_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: glance_registry
|
|
vars:
|
|
check_name: glance_registry_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'glance_registry_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["glance_registry_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: heat_api
|
|
vars:
|
|
check_name: heat_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'heat_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["heat_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: heat_api_cfn
|
|
vars:
|
|
check_name: heat_cfn_api_check
|
|
check_details: file=service_api_local_check.py,args=heat_cfn,args={{ ansible_ssh_host }},args=8000
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'heat_cfn_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["heat_cfn_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: heat_api_cloudwatch
|
|
vars:
|
|
check_name: heat_cw_api_check
|
|
check_details: file=service_api_local_check.py,args=heat_cw,args={{ ansible_ssh_host }},args=8003
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'heat_cw_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["heat_cw_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: horizon
|
|
vars:
|
|
check_name: horizon_local_check
|
|
check_details: file=horizon_check.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'horizon_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["horizon_local_status"] != 1) { return new AlarmStatus(CRITICAL, "Horizon unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: keystone
|
|
vars:
|
|
check_name: keystone_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'keystone_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["keystone_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_server
|
|
vars:
|
|
check_name: neutron_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_dhcp_agent
|
|
vars:
|
|
check_name: neutron_dhcp_agent_check
|
|
check_details: file=neutron_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_dhcp_agent_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron-dhcp-agent_status"] != 1) { return new AlarmStatus(CRITICAL, "neutron-dhcp-agent down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_l3_agent
|
|
vars:
|
|
check_name: neutron_l3_agent_check
|
|
check_details: file=neutron_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_l3_agent_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron-l3-agent_status"] != 1) { return new AlarmStatus(CRITICAL, "neutron-l3-agent down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_linuxbridge_agent
|
|
vars:
|
|
check_name: neutron_linuxbridge_agent_check
|
|
check_details: file=neutron_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_linuxbridge_agent_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron-linuxbridge-agent_status"] != 1) { return new AlarmStatus(CRITICAL, "neutron-linuxbridge-agent down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_metadata_agent
|
|
vars:
|
|
check_name: neutron_metadata_agent_check
|
|
check_details: file=neutron_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_metadata_agent_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron-metadata-agent_status"] != 1) { return new AlarmStatus(CRITICAL, "neutron-metadata-agent down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: neutron_l3_agent
|
|
vars:
|
|
check_name: neutron_metering_agent_check
|
|
check_details: file=neutron_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'neutron_metering_agent_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["neutron-metering-agent_status"] != 1) { return new AlarmStatus(CRITICAL, "neutron-metering-agent down"); }' }
|
|
|
|
- hosts: nova_api_metadata
|
|
vars:
|
|
check_name: nova_api_metadata_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_api_metadata_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova_api_metadata_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API (metadata) unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_api_os_compute
|
|
vars:
|
|
check_name: nova_api_local_check
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API (os-compute) unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_compute
|
|
vars:
|
|
check_name: nova_compute_check
|
|
check_details: file=nova_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_compute_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova-compute_status"] != 1) { return new AlarmStatus(CRITICAL, "nova-compute down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_cert
|
|
vars:
|
|
check_name: nova_cert_check
|
|
check_details: file=nova_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_cert_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova-cert_status"] != 1) { return new AlarmStatus(CRITICAL, "nova-cert down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_conductor
|
|
vars:
|
|
check_name: nova_conductor_check
|
|
check_details: file=nova_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_conductor_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova-conductor_status"] != 1) { return new AlarmStatus(CRITICAL, "nova-conductor down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_scheduler
|
|
vars:
|
|
check_name: nova_scheduler_check
|
|
check_details: file=nova_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_scheduler_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova-scheduler_status"] != 1) { return new AlarmStatus(CRITICAL, "nova-scheduler down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_spice_console
|
|
vars:
|
|
check_name: nova_consoleauth_check
|
|
check_details: file=nova_service_check.py,args=--host,args={{ ansible_hostname }},args={{ internal_vip_address }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_consoleauth_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova-consoleauth_status"] != 1) { return new AlarmStatus(CRITICAL, "nova-consoleauth down"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: nova_spice_console
|
|
vars:
|
|
check_name: nova_spice_console_check
|
|
check_details: file=service_api_local_check.py,args=nova_spice,args={{ ansible_ssh_host }},args=6082
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'nova_spice_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["nova_spice_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "API unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: rabbit
|
|
vars:
|
|
check_name: rabbitmq_status
|
|
check_details: file={{ check_name }}.py,args=-H,args={{ ansible_ssh_host }},args=-n,args={{ inventory_hostname.split('.')[0] }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'disk_free_alarm', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_free_alarm"] != 1) { return new AlarmStatus(CRITICAL, "disk_free_alarm triggered"); }' }
|
|
- { 'name': 'mem_alarm', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["mem_alarm"] != 1) { return new AlarmStatus(CRITICAL, "mem_alarm triggered"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: galera
|
|
vars:
|
|
check_name: galera_check
|
|
check_details: file={{ check_name }}.py,args=-H,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: memcached
|
|
vars:
|
|
check_name: memcached_status
|
|
check_details: file={{ check_name }}.py,args={{ ansible_ssh_host }}
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'memcache_api_local_status', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["memcache_api_local_status"] != 1) { return new AlarmStatus(CRITICAL, "memcached unavailable"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|
|
|
|
- hosts: hosts
|
|
vars:
|
|
check_name: disk_utilisation
|
|
check_details: file={{ check_name }}.py
|
|
check_period: "{{ maas_check_period }}"
|
|
check_timeout: "{{ maas_check_timeout }}"
|
|
alarms:
|
|
- { 'name': 'percentage_disk_utilisation_sda', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sda"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sda >= 90%"); }' }
|
|
- { 'name': 'percentage_disk_utilisation_sdb', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdb"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdb >= 90%"); }' }
|
|
- { 'name': 'percentage_disk_utilisation_sdc', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["disk_utilisation_sdc"] >= 90.0) { return new AlarmStatus(WARNING, "Disk utilisation for sdc >= 90%"); }' }
|
|
user: root
|
|
roles:
|
|
- maas_local
|