Influx HA as an external storage for Prometheus
Change-Id: I01bdfabb4189bcf35f0872350b78feba0f762eef
@ -12,3 +12,4 @@ Methodologies
|
||||
tools
|
||||
hyper-scale
|
||||
monitoring/index
|
||||
monitoring/influxha
|
||||
|
BIN
doc/source/methodologies/monitoring/images/db/1_heap_usage.png
Normal file
After Width: | Height: | Size: 42 KiB |
BIN
doc/source/methodologies/monitoring/images/db/1_http_errors.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
doc/source/methodologies/monitoring/images/db/1_point_intake.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
doc/source/methodologies/monitoring/images/db/2_heap_usage.png
Normal file
After Width: | Height: | Size: 67 KiB |
BIN
doc/source/methodologies/monitoring/images/db/2_http_errors.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
doc/source/methodologies/monitoring/images/db/2_point_intake.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
doc/source/methodologies/monitoring/images/influxdb-relay.png
Normal file
After Width: | Height: | Size: 326 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/cpu_idle.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/cpu_system.png
Normal file
After Width: | Height: | Size: 92 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/cpu_user.png
Normal file
After Width: | Height: | Size: 98 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/disk_rate.png
Normal file
After Width: | Height: | Size: 61 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/la.png
Normal file
After Width: | Height: | Size: 58 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/mem_free.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/lb/mem_used.png
Normal file
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 74 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 62 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/node1/la.png
Normal file
After Width: | Height: | Size: 59 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 62 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 144 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 67 KiB |
BIN
doc/source/methodologies/monitoring/images/sys/node2/la.png
Normal file
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 90 KiB |
@ -0,0 +1,42 @@
|
||||
|
||||
client_max_body_size 20M;
|
||||
|
||||
upstream influxdb {
|
||||
server influx1_ip:8086;
|
||||
server influx2_ip:8086;
|
||||
}
|
||||
upstream relay {
|
||||
server influx1_ip:9096;
|
||||
server influx2_ip:9096;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 7076;
|
||||
location /query {
|
||||
limit_except GET {
|
||||
deny all;
|
||||
}
|
||||
proxy_pass http://influxdb;
|
||||
}
|
||||
location /write {
|
||||
limit_except POST {
|
||||
deny all;
|
||||
}
|
||||
proxy_pass http://relay;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# stream {
|
||||
# upstream test {
|
||||
# server server1:8003;
|
||||
# server server2:8003;
|
||||
# }
|
||||
#
|
||||
# server {
|
||||
# listen 7003 udp;
|
||||
# proxy_pass test;
|
||||
# proxy_timeout 1s;
|
||||
# proxy_responses 1;
|
||||
# }
|
||||
# }
|
129
doc/source/methodologies/monitoring/influx_ha/conf/influxdb.conf
Normal file
@ -0,0 +1,129 @@
|
||||
reporting-disabled = false
|
||||
bind-address = ":8088"
|
||||
|
||||
[meta]
|
||||
dir = "/var/lib/influxdb/meta"
|
||||
retention-autocreate = true
|
||||
logging-enabled = true
|
||||
|
||||
[data]
|
||||
dir = "/var/lib/influxdb/data"
|
||||
wal-dir = "/var/lib/influxdb/wal"
|
||||
query-log-enabled = true
|
||||
cache-max-memory-size = 1073741824
|
||||
cache-snapshot-memory-size = 26214400
|
||||
cache-snapshot-write-cold-duration = "10m0s"
|
||||
compact-full-write-cold-duration = "4h0m0s"
|
||||
max-series-per-database = 0
|
||||
max-values-per-tag = 100000
|
||||
trace-logging-enabled = false
|
||||
|
||||
[coordinator]
|
||||
write-timeout = "10s"
|
||||
max-concurrent-queries = 0
|
||||
query-timeout = "0s"
|
||||
log-queries-after = "0s"
|
||||
max-select-point = 0
|
||||
max-select-series = 0
|
||||
max-select-buckets = 0
|
||||
|
||||
[retention]
|
||||
enabled = true
|
||||
check-interval = "30m0s"
|
||||
|
||||
[shard-precreation]
|
||||
enabled = true
|
||||
check-interval = "10m0s"
|
||||
advance-period = "30m0s"
|
||||
|
||||
[admin]
|
||||
enabled = false
|
||||
bind-address = ":8083"
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
[monitor]
|
||||
store-enabled = true
|
||||
store-database = "_internal"
|
||||
store-interval = "10s"
|
||||
|
||||
[subscriber]
|
||||
enabled = true
|
||||
http-timeout = "30s"
|
||||
insecure-skip-verify = false
|
||||
ca-certs = ""
|
||||
write-concurrency = 40
|
||||
write-buffer-size = 1000
|
||||
|
||||
[http]
|
||||
enabled = true
|
||||
bind-address = ":8086"
|
||||
auth-enabled = false
|
||||
log-enabled = true
|
||||
write-tracing = false
|
||||
pprof-enabled = true
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
https-private-key = ""
|
||||
max-row-limit = 10000
|
||||
max-connection-limit = 0
|
||||
shared-secret = ""
|
||||
realm = "InfluxDB"
|
||||
unix-socket-enabled = false
|
||||
bind-socket = "/var/run/influxdb.sock"
|
||||
|
||||
[[graphite]]
|
||||
enabled = false
|
||||
bind-address = ":2003"
|
||||
database = "graphite"
|
||||
retention-policy = ""
|
||||
protocol = "tcp"
|
||||
batch-size = 5000
|
||||
batch-pending = 10
|
||||
batch-timeout = "1s"
|
||||
consistency-level = "one"
|
||||
separator = "."
|
||||
udp-read-buffer = 0
|
||||
|
||||
[[collectd]]
|
||||
enabled = false
|
||||
bind-address = ":25826"
|
||||
database = "collectd"
|
||||
retention-policy = ""
|
||||
batch-size = 5000
|
||||
batch-pending = 10
|
||||
batch-timeout = "10s"
|
||||
read-buffer = 0
|
||||
typesdb = "/usr/share/collectd/types.db"
|
||||
security-level = "none"
|
||||
auth-file = "/etc/collectd/auth_file"
|
||||
|
||||
[[opentsdb]]
|
||||
enabled = false
|
||||
bind-address = ":4242"
|
||||
database = "opentsdb"
|
||||
retention-policy = ""
|
||||
consistency-level = "one"
|
||||
tls-enabled = false
|
||||
certificate = "/etc/ssl/influxdb.pem"
|
||||
batch-size = 1000
|
||||
batch-pending = 5
|
||||
batch-timeout = "1s"
|
||||
log-point-errors = true
|
||||
|
||||
[[udp]]
|
||||
enabled = false
|
||||
bind-address = ":8089"
|
||||
database = "udp"
|
||||
retention-policy = ""
|
||||
batch-size = 5000
|
||||
batch-pending = 10
|
||||
read-buffer = 0
|
||||
batch-timeout = "1s"
|
||||
precision = ""
|
||||
|
||||
[continuous_queries]
|
||||
log-enabled = true
|
||||
enabled = true
|
||||
run-interval = "1s"
|
||||
|
@ -0,0 +1,38 @@
|
||||
# Name of the HTTP server, used for display purposes only
|
||||
[[http]]
|
||||
name = "influx-http"
|
||||
|
||||
# TCP address to bind to, for HTTP server
|
||||
bind-addr = "influx1_ip:9096"
|
||||
|
||||
# Array of InfluxDB instances to use as backends for Relay
|
||||
# name: name of the backend, used for display purposes only.
|
||||
# location: full URL of the /write endpoint of the backend
|
||||
# timeout: Go-parseable time duration. Fail writes if incomplete in this time.
|
||||
# skip-tls-verification: skip verification for HTTPS location. WARNING: it's insecure. Don't use in production.
|
||||
output = [
|
||||
{ name="local-influx1", location = "http://127.0.0.1:8086/write", timeout="10s" },
|
||||
{ name="remote-influx2", location = "http://influx2_ip:8086/write", timeout="10s" },
|
||||
]
|
||||
|
||||
[[udp]]
|
||||
# Name of the UDP server, used for display purposes only
|
||||
name = "influx-udp"
|
||||
|
||||
# UDP address to bind to
|
||||
bind-addr = "127.0.0.1:9096"
|
||||
|
||||
# Socket buffer size for incoming connections
|
||||
read-buffer = 0 # default
|
||||
|
||||
# Precision to use for timestamps
|
||||
precision = "n" # Can be n, u, ms, s, m, h
|
||||
|
||||
# Array of InfluxDB UDP instances to use as backends for Relay
|
||||
# name: name of the backend, used for display purposes only.
|
||||
# location: host and port of backend.
|
||||
# mtu: maximum output payload size
|
||||
output = [
|
||||
{ name="local-influx1-udp", location="127.0.0.1:8089", mtu=512 },
|
||||
{ name="remote-influx2-udp", location="influx2_ip:8089", mtu=512 },
|
||||
]
|
@ -0,0 +1,38 @@
|
||||
# Name of the HTTP server, used for display purposes only
|
||||
[[http]]
|
||||
name = "influx-http"
|
||||
|
||||
# TCP address to bind to, for HTTP server
|
||||
bind-addr = "influx2_ip:9096"
|
||||
|
||||
# Array of InfluxDB instances to use as backends for Relay
|
||||
# name: name of the backend, used for display purposes only.
|
||||
# location: full URL of the /write endpoint of the backend
|
||||
# timeout: Go-parseable time duration. Fail writes if incomplete in this time.
|
||||
# skip-tls-verification: skip verification for HTTPS location. WARNING: it's insecure. Don't use in production.
|
||||
output = [
|
||||
{ name="local-influx2", location = "http://127.0.0.1:8086/write", timeout="10s" },
|
||||
{ name="remote-influx1", location = "http://influx1_ip:8086/write", timeout="10s" },
|
||||
]
|
||||
|
||||
[[udp]]
|
||||
# Name of the UDP server, used for display purposes only
|
||||
name = "influx-udp"
|
||||
|
||||
# UDP address to bind to
|
||||
bind-addr = "127.0.0.1:9096"
|
||||
|
||||
# Socket buffer size for incoming connections
|
||||
read-buffer = 0 # default
|
||||
|
||||
# Precision to use for timestamps
|
||||
precision = "n" # Can be n, u, ms, s, m, h
|
||||
|
||||
# Array of InfluxDB UDP instances to use as backends for Relay
|
||||
# name: name of the backend, used for display purposes only.
|
||||
# location: host and port of backend.
|
||||
# mtu: maximum output payload size
|
||||
output = [
|
||||
{ name="local-influx2-udp", location="127.0.0.1:8089", mtu=512 },
|
||||
{ name="remote-influx1-udp", location="influx1_ip:8089", mtu=512 },
|
||||
]
|
@ -0,0 +1,56 @@
|
||||
#!/bin/bash -xe
|
||||
|
||||
INFLUX1=${INFLUX1:-172.20.9.29}
|
||||
INFLUX2=${INFLUX2:-172.20.9.19}
|
||||
BALANCER=${BALANCER:-172.20.9.27}
|
||||
SSH_PASSWORD="r00tme"
|
||||
SSH_USER="root"
|
||||
SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
|
||||
|
||||
type sshpass || (echo "sshpass is not installed" && exit 1)
|
||||
|
||||
ssh_exec() {
|
||||
node=$1
|
||||
shift
|
||||
sshpass -p ${SSH_PASSWORD} ssh ${SSH_OPTIONS} ${SSH_USER}@${node} "$@"
|
||||
}
|
||||
|
||||
scp_exec() {
|
||||
node=$1
|
||||
src=$2
|
||||
dst=$3
|
||||
sshpass -p ${SSH_PASSWORD} scp ${SSH_OPTIONS} ${2} ${SSH_USER}@${node}:${3}
|
||||
}
|
||||
|
||||
# prepare influx1:
|
||||
ssh_exec $INFLUX1 "echo 'deb https://repos.influxdata.com/ubuntu xenial stable' > /etc/apt/sources.list.d/influxdb.list"
|
||||
ssh_exec $INFLUX1 "apt-get update && apt-get install -y influxdb"
|
||||
scp_exec $INFLUX1 conf/influxdb.conf /etc/influxdb/influxdb.conf
|
||||
ssh_exec $INFLUX1 "service influxdb restart"
|
||||
ssh_exec $INFLUX1 "echo 'GOPATH=/root/gocode' >> /etc/environment"
|
||||
ssh_exec $INFLUX1 "apt-get install -y golang-go && mkdir /root/gocode"
|
||||
ssh_exec $INFLUX1 "source /etc/environment && go get -u github.com/influxdata/influxdb-relay"
|
||||
scp_exec $INFLUX1 conf/relay_1.toml /root/relay.toml
|
||||
ssh_exec $INFLUX1 "sed -i -e 's/influx1_ip/${INFLUX1}/g' -e 's/influx2_ip/${INFLUX2}/g' /root/relay.toml"
|
||||
ssh_exec $INFLUX1 "influxdb-relay -config relay.toml &"
|
||||
|
||||
# prepare influx2:
|
||||
ssh_exec $INFLUX2 "echo 'deb https://repos.influxdata.com/ubuntu xenial stable' > /etc/apt/sources.list.d/influxdb.list"
|
||||
ssh_exec $INFLUX2 "apt-get update && apt-get install -y influxdb"
|
||||
scp_exec $INFLUX2 conf/influxdb.conf /etc/influxdb/influxdb.conf
|
||||
ssh_exec $INFLUX2 "service influxdb restart"
|
||||
ssh_exec $INFLUX2 "echo 'GOPATH=/root/gocode' >> /etc/environment"
|
||||
ssh_exec $INFLUX2 "apt-get install -y golang-go && mkdir /root/gocode"
|
||||
ssh_exec $INFLUX2 "source /etc/environment && go get -u github.com/influxdata/influxdb-relay"
|
||||
scp_exec $INFLUX2 conf/relay_2.toml /root/relay.toml
|
||||
ssh_exec $INFLUX2 "sed -i -e 's/influx1_ip/${INFLUX1}/g' -e 's/influx2_ip/${INFLUX2}/g' /root/relay.toml"
|
||||
ssh_exec $INFLUX2 "influxdb-relay -config relay.toml &"
|
||||
|
||||
# prepare balancer:
|
||||
ssh_exec $BALANCER "apt-get install -y nginx"
|
||||
scp_exec $BALANCER conf/influx-loadbalancer.conf /etc/nginx/sites-enabled/influx-loadbalancer.conf
|
||||
ssh_exec $BALANCER "sed -i -e 's/influx1_ip/${INFLUX1}/g' -e 's/influx2_ip/${INFLUX2}/g' /etc/nginx/sites-enabled/influx-loadbalancer.conf"
|
||||
ssh_exec $BALANCER "service nginx reload"
|
||||
|
||||
echo "INFLUX HA SERVICE IS AVAILABLE AT http://${BALANCER}:7076"
|
||||
|
281
doc/source/methodologies/monitoring/influxha.rst
Normal file
@ -0,0 +1,281 @@
|
||||
|
||||
.. _HA_InfluxDB_as_an_external_strorage_for_Prometheus:
|
||||
|
||||
**************************************************
|
||||
HA InfluxDB as an external storage for Prometheus
|
||||
**************************************************
|
||||
|
||||
:Abstract:
|
||||
|
||||
This document describes a way to provide high-available InfluxDB storage
|
||||
based on Influx-relay and Nginx.
|
||||
|
||||
|
||||
Prometheus storage issue and solutions
|
||||
======================================
|
||||
|
||||
`Prometheus`_ native storage was designed only for short period data and needs
|
||||
to be shortened in order to stay responsible and operational. For us to store
|
||||
persistent data for longer periods the 'external storage' mechanism was
|
||||
used. In this mode Prometheus duplicating its own data to external storage,
|
||||
only external writes are available. Several options were possible but we
|
||||
chose InfluxDB high-available solution. InfluxDB is a reliable and robust
|
||||
storage with many features. Also, it's perfect in supplying monitoring data to
|
||||
`Grafana`_ dashboard.
|
||||
|
||||
.. table:: Monitoring software version
|
||||
|
||||
+-------------+--------------------+
|
||||
|Software |Version |
|
||||
+-------------+--------------------+
|
||||
|Prometheus | 1.4.0 |
|
||||
+-------------+--------------------+
|
||||
|Grafana | 4.0.1 |
|
||||
+-------------+--------------------+
|
||||
|
||||
InfluxDB installation overview
|
||||
==============================
|
||||
|
||||
During our deployment we were following `Influx-Relay Offical Documentation`_.
|
||||
The installation comprises three nodes:
|
||||
- first and second are InfluxDB instances with running Influx-relay daemon
|
||||
- third is a load-balancer node with running Nginx
|
||||
|
||||
Influx-Relay working scheme taken from InfluxDB web site describes 5-nodes
|
||||
installation (four InfluxDB instances + Loadbalancer node), but three nodes
|
||||
were sufficient for our working load.
|
||||
|
||||
.. image:: images/influxdb-relay.png
|
||||
:alt: HA InfluxDB scheme
|
||||
:scale: 80
|
||||
|
||||
|
||||
Ubuntu Xenial were used on each node. See software version table below:
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|Ubuntu |Ubuntu 16.04.1 LTS |
|
||||
+--------------------+-----------------------------------------+
|
||||
|Kernel |4.4.0-47-generic |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`InfluxDB`_ |1.2.0-17 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Influx-Relay`_ |adaa2ea7bf97af592884fcfa57df1a2a77adb571 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Nginx`_ |nginx/1.10.0 (Ubuntu) |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
In order to deploy InfluxDB HA deployment `InfluxdbHA deployment script`_
|
||||
was used.
|
||||
|
||||
InfluxDB HA mechanism realization
|
||||
=================================
|
||||
|
||||
Native HA mechanisms were moved away from InfluxDB (since version 1.x.x) and
|
||||
now provided only as an enterprise option. Open-source third-party software
|
||||
Influx-relay is considered as one of the available substitutions for previous
|
||||
native replication mechanisms.
|
||||
|
||||
Influx-Relay
|
||||
------------
|
||||
|
||||
Influx-relay is written in Golang and its operation boils down to
|
||||
proxying incoming write queries to multiple destinations (InfluxDB
|
||||
instances).
|
||||
Influx-Relay runs on every InfluxDB node, thus any writes requests coming
|
||||
to any InfluxDB instance are mirrored across all other nodes.
|
||||
Influx-Relay is light and robust and it doesn't consume much of the system
|
||||
resources.
|
||||
See Influx-Relay configuration in the `Influx-Relay configuration`_ section.
|
||||
|
||||
Nginx
|
||||
-----
|
||||
|
||||
Nginx daemon runs on a separate node and acts as load-balancer (upstream proxy mode).
|
||||
It redirects '/query' queries directly to an each InfluxDB instance and '/write' queries
|
||||
to an each Influx-relay daemon. Round-robin algorithm is scheduled for both query and write.
|
||||
This way, incoming reads and writes are balanced equally across the whole InfluxDB cluster.
|
||||
See Nginx configuration in the `Nginx configuration`_ section.
|
||||
|
||||
InfluxDB Monitoring
|
||||
===================
|
||||
|
||||
InfluxDB HA installation was tested with Prometheus that polls 200-nodes environment
|
||||
and generates huge data flows towards its external storage. In order to test InfluxDB
|
||||
performance '_internal' database counters were used and visualized with the help of
|
||||
Grafana. We figured out that 3-nodes InfluxDB HA installation can handle 200-nodes
|
||||
Prometheus load and total performance doesn't degrade.
|
||||
Grafana dashboards for InfluxDB monitoring can be found at `Grafana InfluxDB dashboard`_
|
||||
section.
|
||||
|
||||
InfluxDB HA performance data
|
||||
============================
|
||||
|
||||
InfluxDB database performance data
|
||||
----------------------------------
|
||||
|
||||
These graphs were built with Grafana based on the metrics that are natively stored
|
||||
inside the InfluxDB '_internal' database. To create the visualization we used
|
||||
`Grafana InfluxDB dashboard`_.
|
||||
|
||||
+---------------------------------------+----------------------------------------+
|
||||
|InfluxDB node1 database performance |InfluxDB node2 database performance |
|
||||
| | |
|
||||
+=======================================+========================================+
|
||||
|.. image:: images/db/1_heap_usage.png |.. image:: images/db/2_heap_usage.png |
|
||||
| :alt: heap_usage(gb) | :alt: heap_usage(gb) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+----------------------------------------+
|
||||
|.. image:: images/db/1_point_intake.png|.. image:: images/db/2_point_intake.png |
|
||||
| :alt: point_intake(ops/sec) | :alt: point_intake(ops/sec) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+----------------------------------------+
|
||||
|.. image:: images/db/1_http_errors.png |.. image:: images/db/2_http_errors.png |
|
||||
| :alt: http_errors((ops/sec) | :alt: http_errors((ops/sec) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+----------------------------------------+
|
||||
|
||||
OS performance data
|
||||
-------------------
|
||||
|
||||
Operation System performance metrics were gathered using Telegraf agent
|
||||
that was started on each cluster node with appropriate plugins. See the `Telegraf system`_
|
||||
configuration file from `Containerized Openstack Monitoring`_ documentation.
|
||||
|
||||
|
||||
InfluxDB node1 OS performance
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
+-----------------------------------------------------------------------------------------+
|
||||
|.. image:: images/sys/node1/la.png |.. image:: images/sys/node1/mem_free.png |
|
||||
| :alt: load_average(%) | :alt: mem_free(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node1/cpu_user.png |.. image:: images/sys/node1/mem_used.png |
|
||||
| :alt: cpu_user(%) | :alt: mem_used(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node1/cpu_system.png |.. image:: images/sys/node1/disk_rate.png |
|
||||
| :alt: cpu_system(%) | :alt: disk_rate(MBps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node1/cpu_idle.png |.. image:: images/sys/node1/network_load.png|
|
||||
| :alt: cpu_idle(%) | :alt: network_load(Mbps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|
||||
InfluxDB node2 OS performance
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
+-----------------------------------------------------------------------------------------+
|
||||
|.. image:: images/sys/node2/la.png |.. image:: images/sys/node2/mem_free.png |
|
||||
| :alt: load_average(%) | :alt: mem_free(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node2/cpu_user.png |.. image:: images/sys/node2/mem_used.png |
|
||||
| :alt: cpu_user(%) | :alt: mem_used(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node2/cpu_system.png |.. image:: images/sys/node2/disk_rate.png |
|
||||
| :alt: cpu_system(%) | :alt: disk_rate(MBps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/node2/cpu_idle.png |.. image:: images/sys/node2/network_load.png|
|
||||
| :alt: cpu_idle(%) | :alt: network_load(Mbps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+--------------------------------------------+--------------------------------------------+
|
||||
|
||||
Load-balancer node OS performance
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
+------------------------------------------------------------------------------------+
|
||||
|.. image:: images/sys/lb/la.png |.. image:: images/sys/lb/mem_free.png |
|
||||
| :alt: load_average(%) | :alt: mem_free(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/lb/cpu_user.png |.. image:: images/sys/lb/mem_used.png |
|
||||
| :alt: cpu_user(%) | :alt: mem_used(GB) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/lb/cpu_system.png|.. image:: images/sys/lb/disk_rate.png |
|
||||
| :alt: cpu_system(%) | :alt: disk_rate(MBps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+--------------------------------------------+
|
||||
|.. image:: images/sys/lb/cpu_idle.png |.. image:: images/sys/lb/network_load.png |
|
||||
| :alt: cpu_idle(%) | :alt: network_load(Mbps) |
|
||||
| :scale: 32 | :scale: 32 |
|
||||
+---------------------------------------+--------------------------------------------+
|
||||
|
||||
How to deploy
|
||||
=============
|
||||
|
||||
- Prepare three Ubuntu Xenial nodes with working network and Internet access
|
||||
- Temporarily allow ssh access for root user
|
||||
- Untar influx_ha_deployment.tar
|
||||
- Set appropriate SSH_PASSWORD variable in the influx_ha/deploy_influx_ha.sh
|
||||
- Start deployment script preceding it with node ip variables, e.g.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
INFLUX1=172.20.9.29 INFLUX2=172.20.9.19 BALANCER=172.20.9.27 bash -xe influx_ha/deploy_influx_ha.sh
|
||||
|
||||
Applications
|
||||
============
|
||||
|
||||
InfluxdbHA deployment script
|
||||
----------------------------
|
||||
|
||||
.. literalinclude:: influx_ha/deploy_influx_ha.sh
|
||||
:language: bash
|
||||
|
||||
Configuration tarball (for deployment script)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
:download:`influx_ha_deployment.tar <influx_ha/influx_ha_deployment.tar>`
|
||||
|
||||
InfluxDB configuration
|
||||
----------------------
|
||||
|
||||
.. literalinclude:: influx_ha/conf/influxdb.conf
|
||||
:language: bash
|
||||
|
||||
Influx-Relay configuration
|
||||
--------------------------
|
||||
|
||||
first instance
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: influx_ha/conf/relay_1.toml
|
||||
:language: bash
|
||||
|
||||
second instance
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: influx_ha/conf/relay_2.toml
|
||||
:language: bash
|
||||
|
||||
Nginx configuration
|
||||
-------------------
|
||||
|
||||
.. literalinclude:: influx_ha/conf/influx-loadbalancer.conf
|
||||
:language: bash
|
||||
|
||||
Grafana InfluxDB dashboard
|
||||
-------------------------
|
||||
|
||||
:download:`InfluxDB_Dashboard.json <influx_ha/InfluxDB_Dashboard.json>`
|
||||
|
||||
.. references:
|
||||
|
||||
.. _Prometheus: https://prometheus.io/
|
||||
.. _Grafana: http://grafana.org/
|
||||
.. _InfluxDB: https://www.influxdata.com/open-source/#influxdb
|
||||
.. _Influx-Relay Offical Documentation: https://github.com/influxdata/influxdb-relay/blob/master/README.md
|
||||
.. _Influx-Relay: https://github.com/influxdata/influxdb-relay
|
||||
.. _Nginx: https://www.nginx.com/
|
||||
.. _Telegraf system: https://docs.openstack.org/developer/performance-docs/methodologies/monitoring/index.html#telegraf-sys-conf
|
||||
.. _Containerized Openstack Monitoring: https://docs.openstack.org/developer/performance-docs/methodologies/monitoring/index.html
|
||||
|