Added playbook to deploy Kapacitor

Added a playbook to deploy an alerting tool, Kapacitor that can work
with influxdb. Updated readme to demonstrate how to deploy Kapacitor.

Kapacitor can be used to trigger alerts based on some uncertain
events. It subscribes to influxdb to collect data.

General Flow:
Telegraf -> InfluxDb -> Grafana
Telegraf -> InfluxDb -> Kapacitor

Change-Id: I5c400cf9efbda43bb5cb7a9bbd890435e74127f3
This commit is contained in:
Nish Patwa 2016-09-20 21:56:39 +00:00 committed by Nish Patwa(nishpatwa_)
parent 6d4557e55f
commit c1f7a5b2fb
4 changed files with 224 additions and 1 deletions

View File

@ -0,0 +1,41 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Deploy kapacitor
hosts: "cluster-metrics"
gather_facts: true
user: root
tasks:
- name: Add kapacitor repo
apt_repository:
repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
state: "present"
- name: Install kapacitor
apt:
pkg: "kapacitor"
state: "latest"
- name: Drop kapacitor config file
template:
src: templates/kapacitor.conf.j2
dest: /etc/kapacitor/kapacitor.conf
- name: Enable and restart kapacitor
service:
name: "kapacitor"
enabled: true
state: restarted
- name: Start kapacitor server
shell: kapacitord -config /etc/kapacitor/kapacitor.conf -log-file /var/log/kapacitor/kapacitor.log &
vars_files:
- vars.yml

View File

@ -8,7 +8,8 @@ Gather and visualize cluster wide metrics
About this repository About this repository
--------------------- ---------------------
This set of playbooks will deploy InfluxDB, Telegraf, and Grafana for the purpose of collecting metrics on an OpenStack cluster. This set of playbooks will deploy InfluxDB, Telegraf, Grafana and Kapacitor for the purpose of collecting
metrics on an OpenStack cluster.
Process Process
------- -------
@ -66,3 +67,9 @@ If you're proxy'ing grafana you will need to provide the full ``root_path`` when
openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1' openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1'
Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory. Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory.
Install Kapacitor
.. code-block:: bash
openstack-ansible playbook-kapacitor.yml

View File

@ -0,0 +1,172 @@
#jinja2:variable_start_string:'[%' , variable_end_string:'%]', trim_blocks: False
hostname = "localhost"
data_dir = "/var/lib/kapacitor"
[http]
bind-address = ":[% kapacitor_port %]"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = false
https-enabled = false
https-certificate = "/etc/ssl/kapacitor.pem"
shutdown-timeout = "10s"
shared-secret = ""
[replay]
dir = "/var/lib/kapacitor/replay"
[storage]
boltdb = "/var/lib/kapacitor/kapacitor.db"
[task]
dir = "/var/lib/kapacitor/tasks"
snapshot-interval = "1m0s"
[[influxdb]]
enabled = true
name = "[% influxdb_db_name %]"
default = true
urls = ["http://[% hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] %]:[% influxdb_port %]"]
username = "[% influxdb_db_root_name %]"
password = "[% influxdb_db_root_password %]"
ssl-ca = ""
ssl-cert = ""
ssl-key = ""
insecure-skip-verify = false
timeout = "0"
disable-subscriptions = false
subscription-protocol = "http"
udp-bind = ""
udp-buffer = 1000
udp-read-buffer = 0
startup-timeout = "5m0s"
subscriptions-sync-interval = "1m0s"
[influxdb.subscriptions]
[influxdb.excluded-subscriptions]
_kapacitor = ["autogen"]
[logging]
file = "/var/log/kapacitor/kapacitor.log"
level = "INFO"
[collectd]
enabled = false
bind-address = ":25826"
database = "collectd"
retention-policy = ""
batch-size = 1000
batch-pending = 5
batch-timeout = "10s"
read-buffer = 0
typesdb = "/usr/share/collectd/types.db"
[opentsdb]
enabled = false
bind-address = ":4242"
database = "opentsdb"
retention-policy = ""
consistency-level = "one"
tls-enabled = false
certificate = "/etc/ssl/influxdb.pem"
batch-size = 1000
batch-pending = 5
batch-timeout = "1s"
log-point-errors = true
[smtp]
enabled = false
host = "localhost"
port = 25
username = ""
password = ""
no-verify = false
global = false
state-changes-only = false
from = ""
idle-timeout = "30s"
[opsgenie]
enabled = false
api-key = ""
url = "https://api.opsgenie.com/v1/json/alert"
recovery_url = "https://api.opsgenie.com/v1/json/alert/note"
global = false
[victorops]
enabled = false
api-key = ""
routing-key = ""
url = "https://alert.victorops.com/integrations/generic/20131114/alert"
global = false
[pagerduty]
enabled = false
url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
service-key = ""
global = false
[sensu]
enabled = false
addr = "sensu-client:3030"
source = "Kapacitor"
[slack]
enabled = false
url = ""
channel = ""
global = false
state-changes-only = false
[telegram]
enabled = false
url = "https://api.telegram.org/bot"
token = ""
chat-id = ""
parse-mode = ""
disable-web-page-preview = false
disable-notification = false
global = false
state-changes-only = false
[hipchat]
enabled = false
url = "https://subdomain.hipchat.com/v2/room"
token = ""
room = ""
global = false
state-changes-only = false
[alerta]
enabled = false
url = ""
token = ""
environment = ""
origin = "kapacitor"
[reporting]
enabled = true
url = "https://usage.influxdata.com"
[stats]
enabled = true
stats-interval = "10s"
database = "_kapacitor"
retention-policy = "autogen"
timing-sample-rate = 0.1
timing-movavg-size = 1000
[udf]
[udf.functions]
[deadman]
interval = "10s"
threshold = 0.0
id = "node 'NODE_NAME' in task '{{ .TaskName }}'"
message = "{{ .ID }} is {{ if eq .Level \"OK\" }}alive{{ else }}dead{{ end }}: {{ index .Fields \"collected\" | printf \"%0.3f\" }} points/INTERVAL."
global = false
[talk]
enabled = false
url = "https://jianliao.com/v2/services/webhook/uuid"
author_name = "Kapacitor"

View File

@ -32,3 +32,6 @@ influxdb_db_root_name: root
influxdb_db_root_password: SuperSecrete influxdb_db_root_password: SuperSecrete
influxdb_db_metric_user: openstack influxdb_db_metric_user: openstack
influxdb_db_metric_password: SuperDuperSecrete influxdb_db_metric_password: SuperDuperSecrete
# Kapacitor Vars
kapacitor_port: 9092