Allow setup of specific MaaS filesystem monitors

By default will generate a list of filesystems that are ext or xfs and
set these up to be monitored with a specified threshold set in
user_variables.

Can loop through list of filesystems/thresholds specifically set in
user_variables if needed, and the default for a server can be overridden
in the rpc_user_config.yml per host.

The user_variables.yml sample file, and the rpc_user_config.yml files
have been updated to reflect these changes.

Change-Id: I1959a630e2c603a76001f52db6b027bf71124c54
Closes-Bug: #1414249
This commit is contained in:
Andy McCrae 2015-01-27 11:32:54 +00:00 committed by Matt Thompson
parent 413df1e38c
commit 3deb360808
5 changed files with 146 additions and 11 deletions

View File

@ -124,6 +124,13 @@ global_overrides:
infra_hosts:
infra1:
ip: 172.29.236.100
## You can override the maas_filesystem_monitors here
# container_vars:
# maas_filesystem_overrides:
# - filesystem: /
# threshold: 85.0
# - filesystem: /boot
# threshold: 90.0
infra2:
ip: 172.29.236.101
infra3:

View File

@ -124,6 +124,15 @@ maas_monitoring_zones:
# maas_fqdn_extension: .example.com
# Set the following to skip creating alarms for this device
#maas_excluded_devices: ['xvde']
# Set the threshold for filesystem monitoring when you are not specifying specific filesystems.
maas_filesystem_threshold: 90.0
# Explicitly set the filesystems to set up monitors/alerts for.
# NB You can override these in rpc_user_config per device using "maas_filesystem_overrides"
#maas_filesystem_monitors:
# - filesystem: /
# threshold: 90.0
# - filesystem: /boot
# threshold: 90.0
## Neutron Options
neutron_container_mysql_password:

View File

@ -0,0 +1,47 @@
---
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Get entity ID for physical_host
shell: raxmon-entities-list | grep "label={{ inventory_hostname|quote }}{{ maas_fqdn_extension|default('') }} " | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
register: entity_id
- name: Validate if check exists
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}_{{ item.filesystem|quote }}--{{ inventory_hostname|quote }}"
register: check_exists
with_items: drives
ignore_errors: True
- name: Create check if it does not exist
command: raxmon-checks-create --entity-id {{ entity_id.stdout }} --type {{ agent_type }} --label {{ check_name }}_{{ item.item.filesystem }}--{{ inventory_hostname }} --details target={{ item.item.filesystem }} --period {{ check_period }} --timeout {{ check_timeout }}
with_items: check_exists.results
when: item.rc != 0
- name: Get check ID for newly created check
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}_{{ item.item.filesystem|quote }}--{{ inventory_hostname|quote }}" | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
with_items: check_exists.results
register: check_id
- name: Validate if alarm exists
shell: raxmon-alarms-list --entity-id {{ entity_id.stdout|quote }} | grep "label=Disk space used on {{ item.item.filesystem|quote }}--{{ inventory_hostname|quote }}"
register: alarm_exists
ignore_errors: True
with_items: check_exists.results
- name: Create alarm if it does not exist
shell: raxmon-alarms-create --entity-id {{ entity_id.stdout|quote }} --check-id {{ item[1].stdout|quote }} --notification-plan {{ maas_notification_plan }} --label "Disk space used on {{ item[1].item.item.filesystem|quote }}--{{ inventory_hostname|quote }}" --criteria ":set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric['used'], metric['total']) >= {{ item[1].item.item.threshold }}) { return new AlarmStatus(WARNING, '{{ item[1].item.item.filesystem }} filesystem is >= {{ item[1].item.item.threshold }}% full.'); }"
when: item[0].rc != 0 and item[0].item.item.filesystem == item[1].item.item.filesystem
with_nested:
- alarm_exists.results
- check_id.results

View File

@ -0,0 +1,47 @@
---
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Get entity ID for physical_host
shell: raxmon-entities-list | grep "label={{ inventory_hostname|quote }}{{ maas_fqdn_extension|default('') }} " | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
register: entity_id
- name: Validate if check exists
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}_{{ item }}--{{ inventory_hostname|quote }}"
register: check_exists
with_items: drives
ignore_errors: True
- name: Create check if it does not exist
command: raxmon-checks-create --entity-id {{ entity_id.stdout }} --type {{ agent_type }} --label {{ check_name }}_{{ item.item }}--{{ inventory_hostname }} --details target={{ item.item }} --period {{ check_period }} --timeout {{ check_timeout }}
with_items: check_exists.results
when: item.rc != 0
- name: Get check ID for newly created check
shell: raxmon-checks-list --entity-id {{ entity_id.stdout|quote }} | grep "label={{ check_name|quote }}_{{ item.item }}--{{ inventory_hostname|quote }}" | sed -e 's/^.* id=\(.*\) label=.*$/\1/g'
with_items: check_exists.results
register: check_id
- name: Validate if alarm exists
shell: raxmon-alarms-list --entity-id {{ entity_id.stdout|quote }} | grep "label=Disk space used on {{ item.item|quote }}--{{ inventory_hostname|quote }}"
register: alarm_exists
ignore_errors: True
with_items: check_exists.results
- name: Create alarm if it does not exist
shell: raxmon-alarms-create --entity-id {{ entity_id.stdout|quote }} --check-id {{ item[1].stdout|quote }} --notification-plan {{ maas_notification_plan }} --label "Disk space used on {{ item[1].item.item|quote }}--{{ inventory_hostname|quote }}" --criteria ":set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric['used'], metric['total']) >= {{ threshold }}) { return new AlarmStatus(WARNING, '{{ item[1].item.item }} filesystem is >= {{ threshold }}% full.'); }"
when: item[0].rc != 0 and item[0].item.item == item[1].item.item
with_nested:
- alarm_exists.results
- check_id.results

View File

@ -24,17 +24,6 @@
- { 'name': 'idle_percent_average', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (metric["idle_percent_average"] <= 10.0) { return new AlarmStatus(WARNING, "CPU time spent idle has dropped to <= 10%"); }' }
user: root
- include: cdm.yml
vars:
check_name: filesystem
check_details: "target=/"
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.filesystem"
alarms:
- { 'name': 'Disk space used on /', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Root filesystem is >= 95% full."); }' }
user: root
- include: cdm.yml
vars:
check_name: memory
@ -45,3 +34,39 @@
alarms:
- { 'name': 'Memory used', 'criteria': ':set consecutiveCount={{ maas_alarm_local_consecutive_count }} if (percentage(metric["actual_used"], metric["total"]) >= 95.0) { return new AlarmStatus(WARNING, "Memory is 95%+ in use."); }' }
user: root
- name: Gathering facts for mounted drives
set_fact:
mounted_drives: "{% for item in ansible_mounts %}{% if 'xfs' in item.fstype or 'ext' in item.fstype %}{{ item.mount }}{% if not loop.last %},{% endif %}{% endif %}{% endfor %}"
when: maas_filesystem_overrides is not defined and maas_filesystem_monitors is not defined
- include: filesystem_auto.yml
vars:
check_name: filesystem
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.filesystem"
drives: "{{ mounted_drives.split(',') }}"
threshold: "{{ maas_filesystem_threshold }}"
user: root
when: maas_filesystem_overrides is not defined and maas_filesystem_monitors is not defined
- include: filesystem.yml
vars:
check_name: filesystem
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.filesystem"
drives: "{{ maas_filesystem_monitors }}"
user: root
when: maas_filesystem_overrides is not defined and maas_filesystem_monitors is defined
- include: filesystem.yml
vars:
check_name: filesystem
check_period: "{{ maas_check_period }}"
check_timeout: "{{ maas_check_timeout }}"
agent_type: "agent.filesystem"
drives: "{{ maas_filesystem_overrides }}"
user: root
when: maas_filesystem_overrides is defined