From 17450f35f343a98330e55db57cf8643ec129164c Mon Sep 17 00:00:00 2001 From: Nish Patwa Date: Mon, 24 Oct 2016 00:32:41 +0000 Subject: [PATCH] Adding influx relay to make the existing monitoring stack highly available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added Influxdb relay to make the existing monitoring stack highly available. Relay replicates the data to multiple database instances. Also added configutation in HAProxy that load balances the read queries to influxdb instances and write queries to influxdb relays ┌─────────────────┐ │writes & queries │ └─────────────────┘ │ ▼ ┌───────────────┐ │ │ ┌────────│ Load Balancer │─────────┐ │ │ │ │ │ └──────┬─┬──────┘ │ │ │ │ │ │ │ │ │ │ ┌──────┘ └────────┐ │ │ │ ┌─────────────┐ │ │┌──────┐ │ │ │/write or UDP│ │ ││/query│ │ ▼ └─────────────┘ ▼ │└──────┘ │ ┌──────────┐ ┌──────────┐ │ │ │ InfluxDB │ │ InfluxDB │ │ │ │ Relay │ │ Relay │ │ │ └──┬────┬──┘ └────┬──┬──┘ │ │ │ | | │ │ │ | ┌─┼──────────────┘ | │ │ │ │ └──────────────┐ │ │ │ ▼ ▼ ▼ ▼ │ │ ┌──────────┐ ┌──────────┐ │ │ │ │ │ │ │ └─▶│ InfluxDB │ │ InfluxDB │◀─┘ │ │ │ │ └──────────┘ └──────────┘ This patch is dependent on this patch: https://review.openstack.org/#/c/392328/ Change-Id: I05bdaa0e2fb251b48df1d26d09ad63942872293a --- cluster_metrics/files/deploy_go.sh | 11 +++++ cluster_metrics/files/deploy_influxdbrelay.sh | 8 ++++ cluster_metrics/playbook-influx-db.yml | 40 ++++++++++++++++++- cluster_metrics/playbook-metrics-lb.yml | 28 ++++++++++++- .../templates/influxdbrelay.conf.j2 | 6 +++ .../templates/influxdbrelay.service.j2 | 17 ++++++++ cluster_metrics/templates/relay.toml.j2 | 10 +++++ cluster_metrics/templates/telegraf.conf.j2 | 2 +- cluster_metrics/vars.yml | 3 ++ 9 files changed, 122 insertions(+), 3 deletions(-) create mode 100755 cluster_metrics/files/deploy_go.sh create mode 100755 cluster_metrics/files/deploy_influxdbrelay.sh create mode 100644 cluster_metrics/templates/influxdbrelay.conf.j2 create mode 100644 cluster_metrics/templates/influxdbrelay.service.j2 create mode 100644 cluster_metrics/templates/relay.toml.j2 diff --git a/cluster_metrics/files/deploy_go.sh b/cluster_metrics/files/deploy_go.sh new file mode 100755 index 00000000..0593f573 --- /dev/null +++ b/cluster_metrics/files/deploy_go.sh @@ -0,0 +1,11 @@ +#!/bin/bash +pushd /opt + wget -P /opt/ https://storage.googleapis.com/golang/go1.7.3.linux-amd64.tar.gz + tar -xzf /opt/go1.7.3.linux-amd64.tar.gz -C /opt/ +popd +pushd /usr/local/bin + find /opt/go/bin/ -type f -exec ln -sf {} \; +popd +if ! grep -qw 'GOROOT="/opt/go"' /etc/environment; then + echo 'GOROOT="/opt/go"' | tee -a /etc/environment +fi diff --git a/cluster_metrics/files/deploy_influxdbrelay.sh b/cluster_metrics/files/deploy_influxdbrelay.sh new file mode 100755 index 00000000..8473cf7b --- /dev/null +++ b/cluster_metrics/files/deploy_influxdbrelay.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm -rf /opt/influxdb-relay; + +mkdir /opt/influxdb-relay; + +export GOPATH=/opt/influxdb-relay/; +export GOROOT=/opt/go; +go get -u github.com/influxdata/influxdb-relay diff --git a/cluster_metrics/playbook-influx-db.yml b/cluster_metrics/playbook-influx-db.yml index 306bef96..b2df0117 100644 --- a/cluster_metrics/playbook-influx-db.yml +++ b/cluster_metrics/playbook-influx-db.yml @@ -18,6 +18,17 @@ gather_facts: true user: root tasks: + - name: Check init system + command: cat /proc/1/comm + changed_when: false + register: _pid1_name + tags: + - always + - name: Set the name of pid1 + set_fact: + pid1_name: "{{ _pid1_name.stdout }}" + tags: + - always - name: InfluxDB datapath bind mount lxc_container: name: "{{ inventory_hostname }}" @@ -49,9 +60,10 @@ state: restarted - name: Wait for influxdb to be ready wait_for: - host: "{{ hostvars[groups['cluster-metrics'][0]]['ansible_host'] }}" + host: "{{ hostvars[item]['ansible_host'] }}" port: "{{ influxdb_port }}" delay: 1 + with_items: "{{ groups['cluster-metrics'] }}" - name: Create metrics DB shell: > influx -username {{ influxdb_db_root_name }} @@ -62,6 +74,32 @@ - "CREATE RETENTION POLICY {{ influxdb_db_retention_policy }} ON {{ influxdb_db_name }} DURATION {{ influxdb_db_retention }} REPLICATION {{ influxdb_db_replication }}" - "CREATE USER {{ influxdb_db_metric_user }} WITH PASSWORD '{{ influxdb_db_metric_password }}'" - "GRANT ALL ON {{ influxdb_db_name }} TO {{ influxdb_db_metric_user }}" + - name: Install git + apt: + pkg: "git" + state: "latest" + - name: Install GOLang + script: files/deploy_go.sh + - name: Download and install influx-relay + script: files/deploy_influxdbrelay.sh + - name: Drop influx relay toml file + template: + src: templates/relay.toml.j2 + dest: /opt/influxdb-relay/relay.toml + - name: Drop Influx Relay upstart + template: + src: templates/influxdbrelay.conf.j2 + dest: /etc/init/influxdbrelay.conf + when: pid1_name == "init" + - name: Drop Influx Relay service file + template: + src: templates/influxdbrelay.service.j2 + dest: /etc/systemd/system/influxdbrelay.service + when: pid1_name == "systemd" + - name: Enable and restart influxdb + service: + name: "influxdbrelay" + state: restarted vars_files: - vars.yml diff --git a/cluster_metrics/playbook-metrics-lb.yml b/cluster_metrics/playbook-metrics-lb.yml index dbd4f692..f5e50ab3 100644 --- a/cluster_metrics/playbook-metrics-lb.yml +++ b/cluster_metrics/playbook-metrics-lb.yml @@ -37,13 +37,39 @@ haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}" haproxy_ssl: "{{ haproxy_ssl }}" haproxy_port: 8086 - haproxy_balance_type: tcp + haproxy_backend_port: 8086 + haproxy_balance_type: http + haproxy_backend_options: + - "httpchk HEAD /ping" + haproxy_whitelist_networks: + - 192.168.0.0/16 + - 172.16.0.0/12 + - 10.0.0.0/8 + haproxy_acls: + read_queries: + rule: "path_sub -i query" + write_queries: + rule: "path_sub -i write" + backend_name: "influxdb_relay" + - service: + haproxy_service_name: influxdb_relay + haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}" + haproxy_ssl: "{{ haproxy_ssl }}" + haproxy_port: 8086 + haproxy_backend_port: 9096 + haproxy_balance_type: http haproxy_backend_options: - tcp-check haproxy_whitelist_networks: - 192.168.0.0/16 - 172.16.0.0/12 - 10.0.0.0/8 + haproxy_acls: + write_queries: + rule: "path_sub -i write" + read_queries: + rule: "path_sub -i query" + backend_name: "influxdb" - service: haproxy_service_name: grafana haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}" diff --git a/cluster_metrics/templates/influxdbrelay.conf.j2 b/cluster_metrics/templates/influxdbrelay.conf.j2 new file mode 100644 index 00000000..e0457a76 --- /dev/null +++ b/cluster_metrics/templates/influxdbrelay.conf.j2 @@ -0,0 +1,6 @@ +description "Influxdb Relay" + +start on runlevel [2345] +stop on runlevel [016] + +exec /opt/influxdb-relay/bin/influxdb-relay -config /opt/influxdb-relay/relay.toml diff --git a/cluster_metrics/templates/influxdbrelay.service.j2 b/cluster_metrics/templates/influxdbrelay.service.j2 new file mode 100644 index 00000000..5c9512bd --- /dev/null +++ b/cluster_metrics/templates/influxdbrelay.service.j2 @@ -0,0 +1,17 @@ +# If you modify this, please also make sure to edit init.sh + +[Unit] +Description=Influx relay adds a basic high availability layer to InfluxDB. +After=network-online.target + +[Service] +User=influxdb +Group=influxdb +LimitNOFILE=65536 +ExecStart=/opt/influxdb-relay/bin/influxdb-relay -config /opt/influxdb-relay/relay.toml +KillMode=control-group +Restart=on-failure + +[Install] +WantedBy=multi-user.target +Alias=influxd.service diff --git a/cluster_metrics/templates/relay.toml.j2 b/cluster_metrics/templates/relay.toml.j2 new file mode 100644 index 00000000..611b68e1 --- /dev/null +++ b/cluster_metrics/templates/relay.toml.j2 @@ -0,0 +1,10 @@ +[[http]] +name = "example-http" +bind-addr = '0.0.0.0:{{ influxdb_relay_port }}' +output = [ +{% set i =1%} +{%for host_name in groups['cluster-metrics'] %} + { name="local{{ i }}", location = "http://{{ hostvars[host_name]['ansible_host'] }}:{{ influxdb_port }}/write"}, +{%set i = i + 1%} +{%endfor%} +] diff --git a/cluster_metrics/templates/telegraf.conf.j2 b/cluster_metrics/templates/telegraf.conf.j2 index ce06cf97..ec056c26 100644 --- a/cluster_metrics/templates/telegraf.conf.j2 +++ b/cluster_metrics/templates/telegraf.conf.j2 @@ -23,7 +23,7 @@ omit_hostname = false [[outputs.influxdb]] - urls = ["http://{{ hostvars[groups['cluster-metrics'][0]]['ansible_host'] }}:{{ influxdb_port }}"] + urls = ["http://{{ internal_lb_vip_address }}:{{ influxdb_port }}"] database = "{{ influxdb_db_name }}" precision = "s" write_consistency = "any" diff --git a/cluster_metrics/vars.yml b/cluster_metrics/vars.yml index a9a1604c..8ee99fb3 100644 --- a/cluster_metrics/vars.yml +++ b/cluster_metrics/vars.yml @@ -35,3 +35,6 @@ influxdb_db_metric_password: SuperDuperSecrete # Kapacitor Vars kapacitor_port: 9092 + +# Influxdb Relay vars +influxdb_relay_port: 9096