From c1f7a5b2fbc069c66a131f2f456e0ed9e74d116b Mon Sep 17 00:00:00 2001 From: Nish Patwa Date: Tue, 20 Sep 2016 21:56:39 +0000 Subject: [PATCH] Added playbook to deploy Kapacitor Added a playbook to deploy an alerting tool, Kapacitor that can work with influxdb. Updated readme to demonstrate how to deploy Kapacitor. Kapacitor can be used to trigger alerts based on some uncertain events. It subscribes to influxdb to collect data. General Flow: Telegraf -> InfluxDb -> Grafana Telegraf -> InfluxDb -> Kapacitor Change-Id: I5c400cf9efbda43bb5cb7a9bbd890435e74127f3 --- cluster_metrics/playbook-kapacitor.yml | 41 +++++ cluster_metrics/readme.rst | 9 +- cluster_metrics/templates/kapacitor.conf.j2 | 172 ++++++++++++++++++++ cluster_metrics/vars.yml | 3 + 4 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 cluster_metrics/playbook-kapacitor.yml create mode 100644 cluster_metrics/templates/kapacitor.conf.j2 diff --git a/cluster_metrics/playbook-kapacitor.yml b/cluster_metrics/playbook-kapacitor.yml new file mode 100644 index 00000000..7646ec17 --- /dev/null +++ b/cluster_metrics/playbook-kapacitor.yml @@ -0,0 +1,41 @@ +--- +# Copyright 2016, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Deploy kapacitor + hosts: "cluster-metrics" + gather_facts: true + user: root + tasks: + - name: Add kapacitor repo + apt_repository: + repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable" + state: "present" + - name: Install kapacitor + apt: + pkg: "kapacitor" + state: "latest" + - name: Drop kapacitor config file + template: + src: templates/kapacitor.conf.j2 + dest: /etc/kapacitor/kapacitor.conf + - name: Enable and restart kapacitor + service: + name: "kapacitor" + enabled: true + state: restarted + - name: Start kapacitor server + shell: kapacitord -config /etc/kapacitor/kapacitor.conf -log-file /var/log/kapacitor/kapacitor.log & + vars_files: + - vars.yml diff --git a/cluster_metrics/readme.rst b/cluster_metrics/readme.rst index 7a66663d..da950838 100644 --- a/cluster_metrics/readme.rst +++ b/cluster_metrics/readme.rst @@ -8,7 +8,8 @@ Gather and visualize cluster wide metrics About this repository --------------------- -This set of playbooks will deploy InfluxDB, Telegraf, and Grafana for the purpose of collecting metrics on an OpenStack cluster. +This set of playbooks will deploy InfluxDB, Telegraf, Grafana and Kapacitor for the purpose of collecting +metrics on an OpenStack cluster. Process ------- @@ -66,3 +67,9 @@ If you're proxy'ing grafana you will need to provide the full ``root_path`` when openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1' Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory. + +Install Kapacitor + +.. code-block:: bash + + openstack-ansible playbook-kapacitor.yml diff --git a/cluster_metrics/templates/kapacitor.conf.j2 b/cluster_metrics/templates/kapacitor.conf.j2 new file mode 100644 index 00000000..6b0f85e4 --- /dev/null +++ b/cluster_metrics/templates/kapacitor.conf.j2 @@ -0,0 +1,172 @@ +#jinja2:variable_start_string:'[%' , variable_end_string:'%]', trim_blocks: False +hostname = "localhost" +data_dir = "/var/lib/kapacitor" + +[http] + bind-address = ":[% kapacitor_port %]" + auth-enabled = false + log-enabled = true + write-tracing = false + pprof-enabled = false + https-enabled = false + https-certificate = "/etc/ssl/kapacitor.pem" + shutdown-timeout = "10s" + shared-secret = "" + +[replay] + dir = "/var/lib/kapacitor/replay" + +[storage] + boltdb = "/var/lib/kapacitor/kapacitor.db" + +[task] + dir = "/var/lib/kapacitor/tasks" + snapshot-interval = "1m0s" + +[[influxdb]] + enabled = true + name = "[% influxdb_db_name %]" + default = true + urls = ["http://[% hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] %]:[% influxdb_port %]"] + username = "[% influxdb_db_root_name %]" + password = "[% influxdb_db_root_password %]" + ssl-ca = "" + ssl-cert = "" + ssl-key = "" + insecure-skip-verify = false + timeout = "0" + disable-subscriptions = false + subscription-protocol = "http" + udp-bind = "" + udp-buffer = 1000 + udp-read-buffer = 0 + startup-timeout = "5m0s" + subscriptions-sync-interval = "1m0s" + [influxdb.subscriptions] + [influxdb.excluded-subscriptions] + _kapacitor = ["autogen"] + +[logging] + file = "/var/log/kapacitor/kapacitor.log" + level = "INFO" + +[collectd] + enabled = false + bind-address = ":25826" + database = "collectd" + retention-policy = "" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "10s" + read-buffer = 0 + typesdb = "/usr/share/collectd/types.db" + +[opentsdb] + enabled = false + bind-address = ":4242" + database = "opentsdb" + retention-policy = "" + consistency-level = "one" + tls-enabled = false + certificate = "/etc/ssl/influxdb.pem" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "1s" + log-point-errors = true + +[smtp] + enabled = false + host = "localhost" + port = 25 + username = "" + password = "" + no-verify = false + global = false + state-changes-only = false + from = "" + idle-timeout = "30s" + +[opsgenie] + enabled = false + api-key = "" + url = "https://api.opsgenie.com/v1/json/alert" + recovery_url = "https://api.opsgenie.com/v1/json/alert/note" + global = false + +[victorops] + enabled = false + api-key = "" + routing-key = "" + url = "https://alert.victorops.com/integrations/generic/20131114/alert" + global = false + +[pagerduty] + enabled = false + url = "https://events.pagerduty.com/generic/2010-04-15/create_event.json" + service-key = "" + global = false + +[sensu] + enabled = false + addr = "sensu-client:3030" + source = "Kapacitor" + +[slack] + enabled = false + url = "" + channel = "" + global = false + state-changes-only = false + +[telegram] + enabled = false + url = "https://api.telegram.org/bot" + token = "" + chat-id = "" + parse-mode = "" + disable-web-page-preview = false + disable-notification = false + global = false + state-changes-only = false + +[hipchat] + enabled = false + url = "https://subdomain.hipchat.com/v2/room" + token = "" + room = "" + global = false + state-changes-only = false + +[alerta] + enabled = false + url = "" + token = "" + environment = "" + origin = "kapacitor" + +[reporting] + enabled = true + url = "https://usage.influxdata.com" + +[stats] + enabled = true + stats-interval = "10s" + database = "_kapacitor" + retention-policy = "autogen" + timing-sample-rate = 0.1 + timing-movavg-size = 1000 + +[udf] + [udf.functions] + +[deadman] + interval = "10s" + threshold = 0.0 + id = "node 'NODE_NAME' in task '{{ .TaskName }}'" + message = "{{ .ID }} is {{ if eq .Level \"OK\" }}alive{{ else }}dead{{ end }}: {{ index .Fields \"collected\" | printf \"%0.3f\" }} points/INTERVAL." + global = false + +[talk] + enabled = false + url = "https://jianliao.com/v2/services/webhook/uuid" + author_name = "Kapacitor" diff --git a/cluster_metrics/vars.yml b/cluster_metrics/vars.yml index 09d8a44c..a9a1604c 100644 --- a/cluster_metrics/vars.yml +++ b/cluster_metrics/vars.yml @@ -32,3 +32,6 @@ influxdb_db_root_name: root influxdb_db_root_password: SuperSecrete influxdb_db_metric_user: openstack influxdb_db_metric_password: SuperDuperSecrete + +# Kapacitor Vars +kapacitor_port: 9092