Containerized Openstack Monitoring Solution
Change-Id: I66ea0711dd0319c1153a13b159dc5be6f7a7016c
This commit is contained in:
parent
f812999925
commit
013d072f2b
@ -7,7 +7,8 @@ Methodologies
|
||||
=======================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:maxdepth: 4
|
||||
|
||||
tools
|
||||
hyper-scale
|
||||
monitoring/index
|
||||
|
15
doc/source/methodologies/monitoring/configs/ccp/ccp.yaml
Normal file
15
doc/source/methodologies/monitoring/configs/ccp/ccp.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
builder:
|
||||
push: true
|
||||
no_cache: false
|
||||
registry:
|
||||
address: "172.20.8.35:5000/env-1"
|
||||
repositories:
|
||||
skip_empty: True
|
||||
kubernetes:
|
||||
server: http://172.20.9.234:8080
|
||||
---
|
||||
!include
|
||||
- versions.yaml
|
||||
- topology.yaml
|
||||
- configs.yaml
|
||||
- repos.yaml
|
38
doc/source/methodologies/monitoring/configs/ccp/configs.yaml
Normal file
38
doc/source/methodologies/monitoring/configs/ccp/configs.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
configs:
|
||||
private_interface: p1p1.602
|
||||
public_interface: p1p1.602
|
||||
ingress:
|
||||
enabled: true
|
||||
glance:
|
||||
bootstrap:
|
||||
enable: true
|
||||
# nova:
|
||||
# allocation_ratio:
|
||||
# cpu: 16.0
|
||||
neutron:
|
||||
physnets:
|
||||
- name: "physnet1"
|
||||
bridge_name: "br-ex"
|
||||
interface: "p1p1.649"
|
||||
flat: true
|
||||
vlan_range: false
|
||||
bootstrap:
|
||||
internal:
|
||||
enable: true
|
||||
external:
|
||||
enable: true
|
||||
net_name: ext-net
|
||||
subnet_name: ext-subnet
|
||||
physnet: physnet1
|
||||
network: 10.144.0.0/12
|
||||
gateway: 10.144.0.1
|
||||
nameserver: 10.144.0.1
|
||||
pool:
|
||||
start: 10.144.1.0
|
||||
end: 10.159.255.250
|
||||
keystone:
|
||||
debug: true
|
||||
heat:
|
||||
debug: true
|
||||
memcached:
|
||||
ram: 30720
|
78
doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh
Executable file
78
doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh
Executable file
@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
if [ -z "$1" ]; then
|
||||
echo "Please set number of env as argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DEPLOY_TIMEOUT=1200
|
||||
export SSH_USER="root"
|
||||
export SSH_PASS="r00tme"
|
||||
cd $(dirname $(realpath $0))
|
||||
|
||||
NODE1="172.20.8.6${1}"
|
||||
|
||||
SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
||||
SSH_CMD="sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${NODE1}"
|
||||
SCP_CMD="sshpass -p ${SSH_PASS} scp ${SSH_OPTS}"
|
||||
|
||||
if [ ! -d ./env-${1} ]; then
|
||||
echo "Yaml files for env-${1} is not found"
|
||||
echo "Please, create and commit deployment/ccp/rackspace/env-${1}/configs with correct yaml files"
|
||||
echo "Main file should be deployment/ccp/rackspace/env-${1}/configs/ccp.yaml"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
$SCP_CMD ./env-${1}/configs/ccp.yaml ${SSH_USER}@${NODE1}:/root/.ccp.yaml
|
||||
for i in $(ls -1 ./env-${1}/configs/ | grep -v ccp.yaml ); do
|
||||
$SCP_CMD ./env-${1}/configs/${i} ${SSH_USER}@${NODE1}:/root/
|
||||
done
|
||||
|
||||
$SSH_CMD "rm -rf /root/fuel-ccp; cd /root; git clone https://git.openstack.org/openstack/fuel-ccp"
|
||||
$SSH_CMD "apt-get -y install python-pip"
|
||||
$SSH_CMD "/usr/bin/pip install --upgrade pip"
|
||||
$SSH_CMD "/usr/bin/pip install /root/fuel-ccp/"
|
||||
|
||||
CCP_STATUS=$($SSH_CMD "/usr/local/bin/ccp status")
|
||||
if [ -n "$CCP_STATUS" ]; then
|
||||
echo "Active deployment was found"
|
||||
echo "$CCP_STATUS"
|
||||
echo "Please execute 'ccp cleanup' and 'rm -rf /var/lib/mysql/*' on the ${NODE1} manually"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
$SSH_CMD "echo '172.20.8.6${1} cloudformation.ccp.external console.ccp.external identity.ccp.external object-store.ccp.external compute.ccp.external orchestration.ccp.external network.ccp.external image.ccp.external volume.ccp.external horizon.ccp.external' >> /etc/hosts"
|
||||
# $SSH_CMD kubectl delete configmaps traefik-conf -n kube-system
|
||||
# $SSH_CMD kubectl delete service traefik -n kube-system
|
||||
# $SSH_CMD kubectl delete secret traefik-cert -n kube-system
|
||||
# $SSH_CMD kubectl delete deployment traefik -n kube-system
|
||||
$SSH_CMD "/root/fuel-ccp/tools/ingress/deploy-ingress-controller.sh -i 172.20.8.6${1}" || echo "Already configured"
|
||||
$SSH_CMD "echo 172.20.8.6${1} \$(ccp domains list -f value) >> /etc/hosts"
|
||||
$SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' >> /usr/local/lib/python2.7/dist-packages/requests/cacert.pem"
|
||||
$SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' > /usr/share/ca-certificates/ingress.crt"
|
||||
$SSH_CMD "cp /usr/share/ca-certificates/ingress.crt /usr/local/share/ca-certificates/"
|
||||
$SSH_CMD "update-ca-certificates"
|
||||
if [ $($SSH_CMD "curl -s 'https://identity.ccp.external:8443/' > /dev/null; echo \$?") != 0 ]
|
||||
then
|
||||
echo "keystone is unreachable check https://identity.ccp.external:8443"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#$SSH_CMD "/root/fuel-ccp/tools/registry/deploy-registry.sh" &&
|
||||
$SSH_CMD "/usr/local/bin/ccp fetch"
|
||||
$SSH_CMD "/usr/local/bin/ccp build"
|
||||
$SSH_CMD "/usr/local/bin/ccp deploy"
|
||||
|
||||
DEPLOY_TIME=0
|
||||
while [ "$($SSH_CMD '/usr/local/bin/ccp status -s -f value' 2>/dev/null)" != "ok" ]
|
||||
do
|
||||
sleep 5
|
||||
DEPLOY_TIME=$((${DEPLOY_TIME} + 5))
|
||||
if [ $DEPLOY_TIME -ge $DEPLOY_TIMEOUT ]; then
|
||||
echo "Deployment timeout"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
$SSH_CMD "/usr/local/bin/ccp status"
|
@ -0,0 +1,7 @@
|
||||
export OS_PROJECT_DOMAIN_NAME=default
|
||||
export OS_USER_DOMAIN_NAME=default
|
||||
export OS_PROJECT_NAME=admin
|
||||
export OS_USERNAME=admin
|
||||
export OS_PASSWORD=password
|
||||
export OS_IDENTITY_API_VERSION=3
|
||||
export OS_AUTH_URL=https://identity.ccp.external:8443/v3
|
44
doc/source/methodologies/monitoring/configs/ccp/repos.yaml
Normal file
44
doc/source/methodologies/monitoring/configs/ccp/repos.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
repositories:
|
||||
repos:
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-ceph
|
||||
name: fuel-ccp-ceph
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-cinder
|
||||
name: fuel-ccp-cinder
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-debian-base
|
||||
name: fuel-ccp-debian-base
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-entrypoint
|
||||
name: fuel-ccp-entrypoint
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-etcd
|
||||
name: fuel-ccp-etcd
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-glance
|
||||
name: fuel-ccp-glance
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-heat
|
||||
name: fuel-ccp-heat
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-horizon
|
||||
name: fuel-ccp-horizon
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-ironic
|
||||
# name: fuel-ccp-ironic
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-keystone
|
||||
name: fuel-ccp-keystone
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-mariadb
|
||||
# name: fuel-ccp-mariadb
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-galera
|
||||
name: fuel-ccp-galera
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-memcached
|
||||
name: fuel-ccp-memcached
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-murano
|
||||
# name: fuel-ccp-murano
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-neutron
|
||||
name: fuel-ccp-neutron
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-nova
|
||||
name: fuel-ccp-nova
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-openstack-base
|
||||
name: fuel-ccp-openstack-base
|
||||
- git_url: https://git.openstack.org/openstack/fuel-ccp-rabbitmq
|
||||
name: fuel-ccp-rabbitmq
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-sahara
|
||||
# name: fuel-ccp-sahara
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-searchlight
|
||||
# name: fuel-ccp-searchlight
|
||||
# - git_url: https://git.openstack.org/openstack/fuel-ccp-stacklight
|
||||
# name: fuel-ccp-stacklight
|
@ -0,0 +1,77 @@
|
||||
nodes:
|
||||
# node[1-3]: Kubernetes
|
||||
node([4-6])$: # 4-6
|
||||
roles:
|
||||
- controller
|
||||
- openvswitch
|
||||
node[7-9]$: # 7-9
|
||||
roles:
|
||||
- rabbitmq
|
||||
node10$: # 10
|
||||
roles:
|
||||
- galera
|
||||
node11$: # 11
|
||||
roles:
|
||||
- heat
|
||||
node(1[2-9])$: # 12-19
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node[2-9][0-9]$: # 20-99
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node(1[0-9][0-9])$: # 100-199
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node200$:
|
||||
roles:
|
||||
- backup
|
||||
replicas:
|
||||
glance-api: 1
|
||||
glance-registry: 1
|
||||
keystone: 3
|
||||
nova-api: 3
|
||||
nova-scheduler: 3
|
||||
nova-conductor: 3
|
||||
neutron-server: 3
|
||||
neutron-metadata-agent: 3
|
||||
horizon: 3
|
||||
heat-api: 1
|
||||
heat-api-cfn: 1
|
||||
heat-engine: 1
|
||||
roles:
|
||||
galera:
|
||||
- galera
|
||||
rabbitmq:
|
||||
- rabbitmq
|
||||
controller:
|
||||
- etcd
|
||||
- glance-api
|
||||
- glance-registry
|
||||
- horizon
|
||||
- keystone
|
||||
- memcached
|
||||
- neutron-dhcp-agent
|
||||
- neutron-l3-agent
|
||||
- neutron-metadata-agent
|
||||
- neutron-server
|
||||
- nova-api
|
||||
- nova-conductor
|
||||
- nova-consoleauth
|
||||
- nova-novncproxy
|
||||
- nova-scheduler
|
||||
compute:
|
||||
- nova-compute
|
||||
- nova-libvirt
|
||||
openvswitch:
|
||||
- neutron-openvswitch-agent
|
||||
- openvswitch-db
|
||||
- openvswitch-vswitchd
|
||||
backup:
|
||||
- backup
|
||||
heat:
|
||||
- heat-api
|
||||
- heat-api-cfn
|
||||
- heat-engine
|
@ -0,0 +1,71 @@
|
||||
images:
|
||||
tag: newton
|
||||
# image_specs:
|
||||
# keystone:
|
||||
# tag: newton
|
||||
|
||||
# horizon:
|
||||
# tag: newton
|
||||
|
||||
# nova-upgrade:
|
||||
# tag: newton
|
||||
# nova-api:
|
||||
# tag: newton
|
||||
# nova-conductor:
|
||||
# tag: newton
|
||||
# nova-consoleauth:
|
||||
# tag: newton
|
||||
# nova-novncproxy:
|
||||
# tag: newton
|
||||
# nova-scheduler:
|
||||
# tag: newton
|
||||
# nova-compute:
|
||||
# tag: newton
|
||||
# nova-libvirt:
|
||||
# tag: newton
|
||||
|
||||
# neutron-dhcp-agent:
|
||||
# tag: newton
|
||||
# neutron-l3-agent:
|
||||
# tag: newton
|
||||
# neutron-metadata-agent:
|
||||
# tag: newton
|
||||
# neutron-server:
|
||||
# tag: newton
|
||||
# neutron-openvswitch-agent:
|
||||
# tag: newton
|
||||
|
||||
# glance-api:
|
||||
# tag: newton
|
||||
# glance-registry:
|
||||
# tag: newton
|
||||
# glance-upgrade:
|
||||
# tag: newton
|
||||
sources:
|
||||
openstack/cinder:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/cinder.git
|
||||
openstack/glance:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/glance.git
|
||||
openstack/heat:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/heat.git
|
||||
openstack/horizon:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/horizon.git
|
||||
openstack/keystone:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/keystone.git
|
||||
openstack/neutron:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/neutron.git
|
||||
openstack/nova:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/nova.git
|
||||
openstack/requirements:
|
||||
git_ref: stable/newton
|
||||
git_url: https://git.openstack.org/openstack/requirements.git
|
||||
openstack/sahara-dashboard:
|
||||
git_ref: stable/newton
|
||||
git_url: https://git.openstack.org/openstack/sahara-dashboard.git
|
2086
doc/source/methodologies/monitoring/configs/dashboards/ETCD.json
Normal file
2086
doc/source/methodologies/monitoring/configs/dashboards/ETCD.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,103 @@
|
||||
[
|
||||
{
|
||||
"_id": "Response-Time-Dashboard",
|
||||
"_type": "dashboard",
|
||||
"_source": {
|
||||
"title": "Response Time Dashboard",
|
||||
"hits": 0,
|
||||
"description": "",
|
||||
"panelsJSON": "[{\"id\":\"Env-1-Response-Time\",\"type\":\"visualization\",\"panelIndex\":1,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":1},{\"id\":\"Env-2-Response-Time\",\"type\":\"visualization\",\"panelIndex\":2,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":1},{\"id\":\"Env-3-Response-Time\",\"type\":\"visualization\",\"panelIndex\":3,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":1},{\"id\":\"Env-4-Response-Time\",\"type\":\"visualization\",\"panelIndex\":4,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":3},{\"id\":\"Env-5-Response-Time\",\"type\":\"visualization\",\"panelIndex\":5,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":3},{\"id\":\"Env-6-Response-Time\",\"type\":\"visualization\",\"panelIndex\":6,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":3}]",
|
||||
"optionsJSON": "{\"darkTheme\":true}",
|
||||
"uiStateJSON": "{}",
|
||||
"version": 1,
|
||||
"timeRestore": false,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"filter\":[{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}}}]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-1-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-1 Response Time",
|
||||
"visState": "{\"title\":\"New Visualization\",\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"showCircles\":true,\"smoothLines\":false,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-1\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-4-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-4 Response Time",
|
||||
"visState": "{\"title\":\"Env-3 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-4\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-5-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-5 Response Time",
|
||||
"visState": "{\"title\":\"Env-4 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-5\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-6-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-6 Response Time",
|
||||
"visState": "{\"title\":\"Env-5 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-6\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-3-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-3 Response Time",
|
||||
"visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-2 Response Time\",\"type\":\"line\"}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-3\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": "Env-2-Response-Time",
|
||||
"_type": "visualization",
|
||||
"_source": {
|
||||
"title": "Env-2 Response Time",
|
||||
"visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-1 Response Time\",\"type\":\"line\"}",
|
||||
"uiStateJSON": "{}",
|
||||
"description": "",
|
||||
"version": 1,
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-2\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
: ${DB_CONNECTION_STRING:?"You need to specify DB_CONNECTION_STRING parameter"}
|
||||
: ${ENV_NAME:?"You need to specify ENV_NAME parameter"}
|
||||
|
||||
: ${MANAGEMENT_INTERFACE:="p1p1.602"}
|
||||
: ${COBBLER_ADDRESS:="172.20.8.34"}
|
||||
: ${CUSTOM_YAML}
|
||||
: ${KARGO_REPO}
|
||||
: ${KARGO_COMMIT}
|
||||
: ${FUEL_CCP_COMMIT}
|
||||
: ${ADMIN_USER}
|
||||
: ${ADMIN_PASSWORD}
|
||||
: ${ADMIN_NODE_CLEANUP}
|
||||
DEPLOY_METHOD="kargo"
|
||||
WORKSPACE="~/kargo_workspace_${ENV_NAME}"
|
||||
SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
|
||||
|
||||
get_env_nodes ()
|
||||
{
|
||||
ENV_NODES_NAMES=$(echo $(psql ${DB_CONNECTION_STRING} -c "select name from servers where environment_id in (select id from environments where name='${ENV_NAME}')" -P format=unaligned -t))
|
||||
if [ -z "${ENV_NODES_NAMES}" ]
|
||||
then
|
||||
echo "No nodes in environment with name ${ENV_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_env_nodes_ips ()
|
||||
{
|
||||
ENV_NODES_IPS=$(echo $(ssh ${SSH_OPTIONS} root@${COBBLER_ADDRESS} bash -ex << EOF
|
||||
for COBBLER_SYSTEM_NAME in ${ENV_NODES_NAMES}
|
||||
do
|
||||
NODE_IP=\$(cobbler system dumpvars --name=\${COBBLER_SYSTEM_NAME} | grep ^ip_address_${MANAGEMENT_INTERFACE} | awk '{print \$3}')
|
||||
NODE_IPS+=\${NODE_IP}" "
|
||||
done
|
||||
echo \${NODE_IPS}
|
||||
EOF
|
||||
))
|
||||
}
|
||||
|
||||
main ()
|
||||
{
|
||||
get_env_nodes
|
||||
get_env_nodes_ips
|
||||
export ADMIN_IP=$(echo ${ENV_NODES_IPS} | awk '{print $1}')
|
||||
export SLAVE_IPS=$(echo ${ENV_NODES_IPS})
|
||||
|
||||
# for SLAVE_IP in ${SLAVE_IPS}
|
||||
# do
|
||||
# ssh ${SSH_OPTIONS} root@${SLAVE_IP} bash -ex << EOF
|
||||
#echo "deb https://apt.dockerproject.org/repo ubuntu-\$(grep DISTRIB_CODENAME /etc/lsb-release | awk -F"=" '{print \$2}') main" >> /etc/apt/sources.list
|
||||
#apt-get update && apt-get install -y --allow-unauthenticated -o Dpkg::Options::="--force-confdef" docker-engine
|
||||
#EOF
|
||||
# done
|
||||
|
||||
if [ -d "$WORKSPACE" ] ; then
|
||||
rm -rf $WORKSPACE
|
||||
fi
|
||||
mkdir -p $WORKSPACE
|
||||
cd $WORKSPACE
|
||||
|
||||
if [ -d './fuel-ccp-installer' ] ; then
|
||||
rm -rf ./fuel-ccp-installer
|
||||
fi
|
||||
git clone https://review.openstack.org/openstack/fuel-ccp-installer
|
||||
cd ./fuel-ccp-installer
|
||||
|
||||
if [ "$FUEL_CCP_COMMIT" ]; then
|
||||
git fetch git://git.openstack.org/openstack/fuel-ccp-installer $FUEL_CCP_COMMIT && git checkout FETCH_HEAD
|
||||
fi
|
||||
|
||||
echo "Running on $NODE_NAME: $ENV_NAME"
|
||||
|
||||
bash -xe "./utils/jenkins/run_k8s_deploy_test.sh"
|
||||
}
|
||||
main
|
@ -0,0 +1,46 @@
|
||||
---
|
||||
- hosts: main-kuber
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Fetch heka package
|
||||
get_url:
|
||||
url: "{{ heka_package_url }}"
|
||||
dest: /tmp/heka_amd64.deb
|
||||
mode: 0664
|
||||
force: yes
|
||||
- name: Download heka package locally
|
||||
fetch:
|
||||
src: /tmp/heka_amd64.deb
|
||||
dest: ./heka_amd64.deb
|
||||
fail_on_missing: yes
|
||||
flat: yes
|
||||
|
||||
- hosts: cluster-nodes
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Propagate heka package across cluster nodes
|
||||
copy:
|
||||
src: ./heka_amd64.deb
|
||||
dest: /tmp/heka_amd64.deb
|
||||
|
||||
- hosts: all-cluster-nodes
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Install heka package
|
||||
apt: deb=/tmp/heka_amd64.deb
|
||||
- name: Adding heka user to docker group
|
||||
user: name='heka' groups=docker append=yes
|
||||
- name: Copy heka conf
|
||||
template: src=heka/00-hekad.toml.j2 dest=/etc/heka/conf.d/00-hekad.toml
|
||||
notify: restart heka
|
||||
- name: Copy heka lua scripts
|
||||
template: src=heka/kubeapi_to_int.lua.j2 dest=/usr/share/heka/lua_filters/kubeapi_to_int.lua
|
||||
register: heka_lua
|
||||
notify: restart heka
|
||||
- name: ensure heka is running
|
||||
systemd: state=started name=heka enabled=yes
|
||||
|
||||
handlers:
|
||||
- name: restart heka
|
||||
systemd: state=restarted name=heka
|
||||
|
@ -0,0 +1,71 @@
|
||||
#!/bin/bash -xe
|
||||
|
||||
HOSTNAME=`hostname`
|
||||
ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3}
|
||||
|
||||
# install java
|
||||
sudo add-apt-repository -y ppa:webupd8team/java
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install oracle-java8-installer
|
||||
|
||||
# install elastic by adding extra repository
|
||||
wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
|
||||
echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install elasticsearch
|
||||
|
||||
# edit configuration:
|
||||
sed -i -E -e 's/^.*cluster.name: .*$/ cluster.name: elasticsearch_k8s/g' /etc/elasticsearch/elasticsearch.yml
|
||||
sed -i -E -e "s/^.*node.name: .*$/ cluster.name: ${HOSTNAME}/g" /etc/elasticsearch/elasticsearch.yml
|
||||
sed -i -E -e "s/^.*network.host: .*$/ network.host: ${ELASTICSEARCH_NODE}/g" /etc/elasticsearch/elasticsearch.yml
|
||||
|
||||
# increase memory limits:
|
||||
sed -i -E -e "s/^.*ES_HEAP_SIZE=.*$/ES_HEAP_SIZE=10g/g" /etc/default/elasticsearch
|
||||
|
||||
# start service:
|
||||
sudo systemctl restart elasticsearch
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable elasticsearch
|
||||
|
||||
# install kibana from extra repository:
|
||||
echo "deb http://packages.elastic.co/kibana/4.5/debian stable main" | sudo tee -a /etc/apt/sources.list
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install kibana
|
||||
sed -i -E -e "s/^.*elasticsearch.url:.*$/ elasticsearch.url: \"http://${ELASTICSEARCH_NODE}:9200\"/g" /opt/kibana/config/kibana.yml
|
||||
|
||||
# enable kibana service:
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable kibana
|
||||
sudo systemctl start kibana
|
||||
|
||||
# install nginx:
|
||||
sudo apt-get -y install nginx
|
||||
|
||||
# set kibana admin:password (admin:admin)
|
||||
echo "admin:`openssl passwd admin`" | sudo tee -a /etc/nginx/htpasswd.users
|
||||
|
||||
# prepare nginx config:
|
||||
cat << EOF >> /etc/nginx/sites-available/default
|
||||
server {
|
||||
listen 80;
|
||||
|
||||
server_name ${HOSTNAME};
|
||||
|
||||
auth_basic "Restricted Access";
|
||||
auth_basic_user_file /etc/nginx/htpasswd.users;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:5601;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade \$http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_set_header Host \$host;
|
||||
proxy_cache_bypass \$http_upgrade;
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
# check and start nginx service:
|
||||
sudo nginx -t
|
||||
sudo systemctl restart nginx
|
||||
|
@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
export ANSIBLE_HOST_KEY_CHECKING=False
|
||||
export SSH_USER="root"
|
||||
export SSH_PASS="r00tme"
|
||||
cd $(dirname $(realpath $0))
|
||||
|
||||
ENV=${1}
|
||||
if [ -z "${ENV}" ]; then
|
||||
echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
|
||||
exit 1
|
||||
fi
|
||||
# elastic for k8s at rackspace as default
|
||||
ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3}
|
||||
# heka 0.10.0 as default
|
||||
HEKA_PACKAGE_URL=${HEKA_PACKAGE_URL:-https://github.com/mozilla-services/heka/releases/download/v0.10.0/heka_0.10.0_amd64.deb}
|
||||
KUBE_MAIN_NODE="172.20.8.6${ENV}"
|
||||
SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
||||
|
||||
echo "Get clusters nodes ..."
|
||||
NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'')
|
||||
ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}')
|
||||
GREP_STRING_TMP=""
|
||||
for i in $ALL_IP_ON_KUBER_NODE; do
|
||||
GREP_STRING_TMP="${GREP_STRING_TMP}${i}|"
|
||||
done
|
||||
GREP_STRING=${GREP_STRING_TMP:0:-1}
|
||||
SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}"
|
||||
echo "[main-kuber]" > cluster-hosts
|
||||
echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts
|
||||
echo "[cluster-nodes]" >> cluster-hosts
|
||||
set +e
|
||||
# Remove IP of kuber node
|
||||
for i in ${NODES_TMP} ; do
|
||||
TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})")
|
||||
NODES="${NODES} ${TMP_VAR}"
|
||||
done
|
||||
set -e
|
||||
for i in ${NODES} ; do
|
||||
if [ "$i" != "${KUBE_MAIN_NODE}" ]; then
|
||||
echo "${i} ${SSH_AUTH}" >> cluster-hosts
|
||||
fi
|
||||
done
|
||||
echo "[all-cluster-nodes:children]" >> cluster-hosts
|
||||
echo "main-kuber" >> cluster-hosts
|
||||
echo "cluster-nodes" >> cluster-hosts
|
||||
|
||||
# Calculate parallel ansible execution
|
||||
NODES_IPS=( $NODES )
|
||||
if [[ "${#NODES_IPS[@]}" -lt 50 ]] && [[ "${#NODES_IPS[@]}" -gt 5 ]]; then
|
||||
ANSIBLE_FORKS="${#NODES_IPS[@]}"
|
||||
elif [[ "${#NODES_IPS[@]}" -ge 50 ]]; then
|
||||
ANSIBLE_FORKS=50
|
||||
else
|
||||
ANSIBLE_FORKS=10
|
||||
fi
|
||||
|
||||
echo "Starting ansible ..."
|
||||
ansible-playbook -v --ssh-extra-args "-o\ StrictHostKeyChecking=no" -f ${ANSIBLE_FORKS} -i ./cluster-hosts -e env_num=${ENV} -e elasticsearch_node="${ELASTICSEARCH_NODE}" -e heka_package_url=${HEKA_PACKAGE_URL} ./deploy-heka.yaml --diff
|
||||
|
@ -0,0 +1,69 @@
|
||||
# vim: set syntax=yaml
|
||||
|
||||
[hekad]
|
||||
maxprocs = 2
|
||||
|
||||
[DockerLogInput]
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
#decoder = "KubeAPI_decoder"
|
||||
decoder = "MultiDecoder"
|
||||
|
||||
[MultiDecoder]
|
||||
type = "MultiDecoder"
|
||||
subs = ["KubeAPI_decoder", "EnvironmentScribbler"]
|
||||
cascade_strategy = "all"
|
||||
#log_sub_errors = true
|
||||
{% raw %}
|
||||
[KubeAPI_decoder]
|
||||
type = "PayloadRegexDecoder"
|
||||
match_regex = '\S+ \S+ .+ (?P<Code>\S+)\] (?P<Method>[A-Z]+) (?P<Url>\S+)\: \((?P<ResponseTime>\S+)ms\) (?P<StatusCode>\d+) \[\[(?P<Agent>.+)\] (?P<RemoteIP>\S+)\:(?P<RemotePort>\d+)\]'
|
||||
[KubeAPI_decoder.message_fields]
|
||||
Type = "KubeAPIlog"
|
||||
Logger = "Docker"
|
||||
Code = "%Code%"
|
||||
Method = "%Method%"
|
||||
Url|uri = "%Url%"
|
||||
ResponseTime = "%ResponseTime%"
|
||||
StatusCode = "%StatusCode%"
|
||||
Agent = "%Agent%"
|
||||
RemoteIP|ipv4 = "%RemoteIP%"
|
||||
RemotePort = "%RemotePort%"
|
||||
{% endraw %}
|
||||
[EnvironmentScribbler]
|
||||
type = "ScribbleDecoder"
|
||||
[EnvironmentScribbler.message_fields]
|
||||
Environment = "env-{{ env_num }}"
|
||||
|
||||
|
||||
[KubeAPI_to_int]
|
||||
type = "SandboxFilter"
|
||||
filename = "lua_filters/kubeapi_to_int.lua"
|
||||
message_matcher = "Type == 'KubeAPIlog'"
|
||||
|
||||
[ESJsonEncoder]
|
||||
index = "env-{{ env_num }}-{{ '%{Type}-%{%Y.%m.%d}' }}"
|
||||
#es_index_from_timestamp = true
|
||||
type_name = "%{Type}"
|
||||
|
||||
[ElasticSearchOutput]
|
||||
message_matcher = "Type == 'heka.sandbox.KubeAPIlog' || Type == 'DockerLog'"
|
||||
server = "http://{{ elasticsearch_node }}:9200"
|
||||
flush_interval = 5000
|
||||
flush_count = 10
|
||||
encoder = "ESJsonEncoder"
|
||||
|
||||
[PayloadEncoder]
|
||||
append_newlines = false
|
||||
#
|
||||
[LogOutput]
|
||||
<<<<<<< HEAD
|
||||
#message_matcher = "Type == 'KubeAPIlog'"
|
||||
message_matcher = "TRUE"
|
||||
#encoder = "ESJsonEncoder"
|
||||
encoder = "PayloadEncoder"
|
||||
=======
|
||||
message_matcher = "Type == 'heka.sandbox.KubeAPIlog' || Type == 'DockerLog'"
|
||||
#message_matcher = "TRUE"
|
||||
encoder = "ESJsonEncoder"
|
||||
#encoder = "PayloadEncoder"
|
||||
>>>>>>> b0caa3ceb82399dd16465645eebdebf90242662c
|
@ -0,0 +1,30 @@
|
||||
{% raw %}
|
||||
-- Invert Response time and some more fields to integer type
|
||||
|
||||
local fields = {["ResponseTime"] = 0, ["RemotePort"] = 0, ["StatusCode"] = 0}
|
||||
local msg = {
|
||||
Type = "KubeAPIlog",
|
||||
Severity = 6,
|
||||
Fields = fields
|
||||
}
|
||||
|
||||
function process_message ()
|
||||
fields["ResponseTime"] = tonumber(read_message("Fields[ResponseTime]"))
|
||||
fields["RemotePort"] = tonumber(read_message("Fields[RemotePort]"))
|
||||
fields["StatusCode"] = tonumber(read_message("Fields[StatusCode]"))
|
||||
msg.Payload = read_message("Payload")
|
||||
fields["Code"] = read_message("Fields[Code]")
|
||||
fields["ContainerID"] = read_message("Fields[ContainerID]")
|
||||
fields["ContainerName"] = read_message("Fields[ContainerName]")
|
||||
fields["Environment"] = read_message("Fields[Environment]")
|
||||
fields["Method"] = read_message("Fields[Method]")
|
||||
fields["RemoteIP"] = read_message("Fields[RemoteIP]")
|
||||
fields["Url"] = read_message("Fields[Url]")
|
||||
local ok, msg = pcall(inject_message, msg)
|
||||
if not ok then
|
||||
inject_payload("txt", "error", msg)
|
||||
end
|
||||
return 0
|
||||
end
|
||||
|
||||
{% endraw %}
|
BIN
doc/source/methodologies/monitoring/configs/node1.tar.gz
Normal file
BIN
doc/source/methodologies/monitoring/configs/node1.tar.gz
Normal file
Binary file not shown.
@ -0,0 +1,124 @@
|
||||
---
|
||||
- hosts: common
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Install common packages
|
||||
apt: name={{ item }} state=installed
|
||||
with_items:
|
||||
- python-pip
|
||||
tags: [ 'always' ]
|
||||
- name: Install docker for Ubuntu 14.04
|
||||
apt: name=docker.io state=installed
|
||||
when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '14.04'
|
||||
tags: [ 'always' ]
|
||||
- name: Install docker for Ubuntu 16.01
|
||||
apt: name=docker state=installed
|
||||
when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '16.0.'
|
||||
tags: [ 'always' ]
|
||||
- name: Install python deps
|
||||
pip: name={{ item }}
|
||||
with_items:
|
||||
- docker-py
|
||||
- docker-compose
|
||||
tags: [ 'always' ]
|
||||
|
||||
- hosts: grafana
|
||||
remote_user: root
|
||||
vars:
|
||||
postgresql_root_user: root
|
||||
postgresql_root_password: aijoom1Shiex
|
||||
grafana_postgresql_user: grafana
|
||||
grafana_postgresql_password: sHskdhos6se
|
||||
grafana_postgresql_db: grafana
|
||||
grafana_user: admin
|
||||
grafana_password: admin
|
||||
tasks:
|
||||
- name: Install packages for grafana
|
||||
apt: name={{ item }} state=installed
|
||||
with_items:
|
||||
- postgresql-client-9.3
|
||||
- python-psycopg2
|
||||
- name: Create postgres data dir
|
||||
file: path=/var/lib/postgres/data/db state=directory
|
||||
tags: [ 'grafana' ]
|
||||
- name: Run postgres in docker
|
||||
docker_container:
|
||||
name: postgres
|
||||
image: 'postgres:latest'
|
||||
ports: 5432:5432
|
||||
volumes: '/var/lib/postgres/data:/var/lib/postgres/data'
|
||||
env:
|
||||
POSTGRES_USER: "{{ postgresql_root_user }}"
|
||||
POSTGRES_PASSWORD: "{{ postgresql_root_password }}"
|
||||
PGDATA: /var/lib/postgres/data/db
|
||||
tags: [ 'grafana' ]
|
||||
- name: Create DB for grafana
|
||||
postgresql_db:
|
||||
name: "{{ grafana_postgresql_db }}"
|
||||
login_user: "{{ postgresql_root_user }}"
|
||||
login_password: "{{ postgresql_root_password }}"
|
||||
login_host: localhost
|
||||
encoding: 'UTF-8'
|
||||
tags: [ 'grafana' ]
|
||||
- name: Create user for grafana in postgres
|
||||
postgresql_user:
|
||||
name: "{{ grafana_postgresql_user }}"
|
||||
login_user: "{{ postgresql_root_user }}"
|
||||
login_password: "{{ postgresql_root_password }}"
|
||||
login_host: localhost
|
||||
password: "{{ grafana_postgresql_password }}"
|
||||
db: grafana
|
||||
priv: ALL
|
||||
tags: [ 'grafana' ]
|
||||
- name: Create data dir for Grafana
|
||||
file: path=/var/lib/grafana state=directory
|
||||
tags: [ 'grafana' ]
|
||||
- name: Start Grafana container
|
||||
docker_container:
|
||||
name: grafana
|
||||
image: 'grafana/grafana:4.0.1'
|
||||
volumes: '/var/lib/grafana:/var/lib/grafana'
|
||||
ports: 3000:3000
|
||||
env:
|
||||
GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_user }}"
|
||||
GF_SECURITY_ADMIN_USER: "{{ grafana_password }}"
|
||||
GF_DATABASE_TYPE: postgres
|
||||
GF_DATABASE_HOST: "{{ ansible_default_ipv4.address }}"
|
||||
GF_DATABASE_NAME: "{{ grafana_postgresql_db }}"
|
||||
GF_DATABASE_USER: "{{ grafana_postgresql_user }}"
|
||||
GF_DATABASE_PASSWORD: "{{ grafana_postgresql_password }}"
|
||||
GF_INSTALL_PLUGINS: grafana-piechart-panel
|
||||
tags: [ 'grafana' ]
|
||||
|
||||
- hosts: prometheuses
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Data dir for prometheus
|
||||
file: path=/var/lib/prometheus state=directory
|
||||
tags: [ 'prometheus' ]
|
||||
- include: docker_prometheus.yaml
|
||||
|
||||
- hosts: prometheus-kuber
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Copy prometheus config
|
||||
template: src=prometheus/prometheus-kuber.yml.j2 dest=/var/lib/prometheus/prometheus.yml
|
||||
register: prometheus_yml
|
||||
tags: [ 'prometheus', 'prometheus-conf' ]
|
||||
- include: docker_prometheus.yaml
|
||||
- name: Send kill -1 to prometheus if prometheus.yml changed
|
||||
command: pkill -1 prometheus
|
||||
when: prometheus_yml.changed
|
||||
tags: [ 'prometheus', 'prometheus-conf']
|
||||
- hosts: prometheus-system
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Copy prometheus config
|
||||
template: src=prometheus/prometheus-system.yml.j2 dest=/var/lib/prometheus/prometheus.yml
|
||||
register: prometheus_yml
|
||||
tags: [ 'prometheus', 'prometheus-conf' ]
|
||||
- include: docker_prometheus.yaml
|
||||
- name: Send kill -1 to prometheus if prometheus.yml changed
|
||||
command: pkill -1 prometheus
|
||||
when: prometheus_yml.changed
|
||||
tags: [ 'prometheus', 'prometheus-conf']
|
@ -0,0 +1,118 @@
|
||||
---
|
||||
- hosts: all-cluster-nodes
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Create user telegraf
|
||||
user: name=telegraf home=/opt/telegraf
|
||||
- name: Create /opt/telegraf
|
||||
file: path=/opt/telegraf state=directory owner=telegraf
|
||||
- name: Create bin dir for telegraf
|
||||
file: path=/opt/telegraf/bin state=directory owner=telegraf
|
||||
- name: Create etc dir for telegraf
|
||||
file: path=/opt/telegraf/etc state=directory owner=telegraf
|
||||
- name: Copy telegraf to server
|
||||
copy: src=../../telegraf/opt/bin/telegraf dest=/opt/telegraf/bin/telegraf mode=0755
|
||||
register: telegraf_bin
|
||||
- name: Copy telegraf.service
|
||||
copy: src=telegraf/telegraf.service dest=/etc/systemd/system/telegraf.service
|
||||
register: telegraf_service
|
||||
- name: Start and enable telegraf
|
||||
systemd: state=started enabled=yes daemon_reload=yes name=telegraf
|
||||
- name: Delete allmetrics.tmp.lock
|
||||
file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent
|
||||
when: telegraf_service.changed or telegraf_bin.changed
|
||||
- name: Restart telegraf if telegraf binary has been changed
|
||||
systemd: state=restarted name=telegraf
|
||||
when: telegraf_bin.changed
|
||||
- name: Install software
|
||||
apt: name={{ item }} state=installed
|
||||
with_items:
|
||||
- sysstat
|
||||
- numactl
|
||||
- name: Copy system metric scripts
|
||||
copy: src=../../telegraf/opt/system_stats/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
|
||||
with_items:
|
||||
- entropy.sh
|
||||
- iostat_per_device.sh
|
||||
- memory_bandwidth.sh
|
||||
- numa_stat_per_pid.sh
|
||||
- per_process_cpu_usage.sh
|
||||
- list_openstack_processes.sh
|
||||
- network_tcp_queue.sh
|
||||
- name: Copy pcm-memory-one-line.x
|
||||
copy: src=../../telegraf/opt/system_stats/intel_pcm_mem/pcm-memory-one-line.x dest=/opt/telegraf/bin/pcm-memory-one-line.x mode=0755
|
||||
- name: Add sysctl for pcm
|
||||
sysctl: name=kernel.nmi_watchdog value=0 state=present reload=yes
|
||||
- name: Load kernel module msr
|
||||
modprobe: name=msr state=present
|
||||
- name: Add module autoload
|
||||
lineinfile: dest=/etc/modules line='msr'
|
||||
- name: Add user telegraf to sudoers
|
||||
lineinfile:
|
||||
dest: /etc/sudoers
|
||||
state: present
|
||||
line: "telegraf ALL=(ALL) NOPASSWD: ALL"
|
||||
|
||||
|
||||
- hosts: cluster-nodes
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Copy telegraf config
|
||||
copy: src=./telegraf/telegraf-sys.conf dest=/opt/telegraf/etc/telegraf.conf
|
||||
register: telegraf_conf
|
||||
- name: Restart telegraf if config has been changed
|
||||
systemd: state=restarted name=telegraf
|
||||
when: telegraf_conf.changed
|
||||
|
||||
- hosts: main-kuber
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Copy openstack scripts
|
||||
copy: src=../../telegraf/opt/osapi/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
|
||||
with_items:
|
||||
- glog.sh
|
||||
- osapitime.sh
|
||||
- vmtime.sh
|
||||
tags: [ 'openstack' ]
|
||||
- name: Copy etcd scripts
|
||||
copy: src=../../telegraf/opt/k8s_etcd/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
|
||||
with_items:
|
||||
- etcd_get_metrics.sh
|
||||
- k8s_get_metrics.sh
|
||||
- name: Install software for scripts
|
||||
apt: name={{ item }} state=installed
|
||||
with_items:
|
||||
- mysql-client
|
||||
- bc
|
||||
- jq
|
||||
tags: [ 'openstack' ]
|
||||
- name: Create dirs for scripts
|
||||
file: path=/opt/telegraf/bin/{{ item }} state=directory owner=telegraf
|
||||
with_items:
|
||||
- log
|
||||
- data
|
||||
- name: Copy telegraf config
|
||||
template: src=telegraf/telegraf-openstack.conf.j2 dest=/opt/telegraf/etc/telegraf.conf
|
||||
register: telegraf_conf
|
||||
tags: [ 'openstack' ]
|
||||
- name: Delete allmetrics.tmp.lock
|
||||
file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent
|
||||
when: telegraf_conf.changed
|
||||
- name: Restart telegraf if config has been changed
|
||||
systemd: state=restarted name=telegraf
|
||||
when: telegraf_conf.changed
|
||||
tags: [ 'openstack' ]
|
||||
|
||||
- hosts: all-cluster-nodes
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: Reload telegraf is service file has been changed
|
||||
systemd: daemon_reload=yes state=reloaded name=telegraf
|
||||
when: telegraf_service.changed
|
||||
|
||||
- hosts: main
|
||||
remote_user: root
|
||||
tasks:
|
||||
- name: update prometheus config
|
||||
template: src=./prometheus/targets.yml.j2 dest=/var/lib/prometheus/targets-{{ cluster_tag }}.yml
|
||||
tags: [ 'prometheus' ]
|
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
CLUSTER=${1}
|
||||
TMP_YAML=$(mktemp -u)
|
||||
|
||||
export ANSIBLE_HOST_KEY_CHECKING=False
|
||||
export SSH_USER="root"
|
||||
export SSH_PASS="r00tme"
|
||||
cd $(dirname $(realpath $0))
|
||||
|
||||
ENV=${1}
|
||||
if [ -z "${ENV}" ]; then
|
||||
echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
|
||||
exit 1
|
||||
fi
|
||||
PROMETHEUS_HOST="172.20.9.115"
|
||||
KUBE_MAIN_NODE="172.20.8.6${ENV}"
|
||||
CLUSTER_TAG="env-${ENV}"
|
||||
|
||||
ETCD=""
|
||||
|
||||
SSH_OPTS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
||||
|
||||
|
||||
TARGETS=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} curl -ks https://127.0.0.1:2379/v2/members | python -m json.tool | grep 2379)
|
||||
|
||||
if [ -z "$TARGETS" ]; then
|
||||
echo "No etcd found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for i in ${TARGETS}; do
|
||||
TEMP_TARGET=${i#\"https://}
|
||||
ETCD="$ETCD ${TEMP_TARGET%\"}"
|
||||
done
|
||||
|
||||
echo "- targets:" > ${TMP_YAML}
|
||||
for i in ${ETCD}; do
|
||||
echo " - $i" >> ${TMP_YAML}
|
||||
done
|
||||
echo " labels:" >> ${TMP_YAML}
|
||||
echo " env: ${CLUSTER_TAG}" >> ${TMP_YAML}
|
||||
|
||||
echo "Targets file is ready"
|
||||
cat ${TMP_YAML}
|
||||
sshpass -p ${SSH_PASS} scp ${SSH_OPTS} ${TMP_YAML} root@${PROMETHEUS_HOST}:/var/lib/prometheus/etcd-env-${1}.yml
|
||||
rm ${TMP_YAML}
|
@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana"
|
@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "prometheus"
|
@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
export ANSIBLE_HOST_KEY_CHECKING=False
|
||||
export SSH_USER="root"
|
||||
export SSH_PASS="r00tme"
|
||||
cd $(dirname $(realpath $0))
|
||||
|
||||
ENV=${1}
|
||||
if [ -z "${ENV}" ]; then
|
||||
echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
|
||||
exit 1
|
||||
fi
|
||||
PROMETHEUS_NODE="172.20.124.25"
|
||||
KUBE_MAIN_NODE="172.20.8.6${ENV}"
|
||||
CLUSTER_TAG="env-${ENV}"
|
||||
|
||||
# Secret option
|
||||
ANSIBLE_TAG=$2
|
||||
|
||||
SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
|
||||
|
||||
echo "Get clusters nodes"
|
||||
|
||||
NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'')
|
||||
ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}')
|
||||
GREP_STRING_TMP=""
|
||||
for i in $ALL_IP_ON_KUBER_NODE; do
|
||||
GREP_STRING_TMP="${GREP_STRING_TMP}${i}|"
|
||||
done
|
||||
GREP_STRING=${GREP_STRING_TMP:0:-1}
|
||||
SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}"
|
||||
echo "[main]" > cluster-hosts
|
||||
echo "${PROMETHEUS_NODE} ${SSH_AUTH}" >> cluster-hosts
|
||||
echo "[main-kuber]" >> cluster-hosts
|
||||
echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts
|
||||
echo "[cluster-nodes]" >> cluster-hosts
|
||||
set +e
|
||||
# Remove IP of kuber node
|
||||
for i in ${NODES_TMP} ; do
|
||||
TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})")
|
||||
NODES="${NODES} ${TMP_VAR}"
|
||||
done
|
||||
set -e
|
||||
for i in ${NODES} ; do
|
||||
if [ "$i" != "${KUBE_MAIN_NODE}" ]; then
|
||||
echo "${i} ${SSH_AUTH}" >> cluster-hosts
|
||||
fi
|
||||
done
|
||||
echo "[all-cluster-nodes:children]" >> cluster-hosts
|
||||
echo "main-kuber" >> cluster-hosts
|
||||
echo "cluster-nodes" >> cluster-hosts
|
||||
LINES=$(wc -l cluster-hosts | awk '{print $1}')
|
||||
NUM_NODES=$(($LINES - 7))
|
||||
if [ ${NUM_NODES} -le 0 ]; then
|
||||
echo "Something wrong, $NUM_NODES nodes found"
|
||||
exit 1
|
||||
else
|
||||
echo "${NUM_NODES} nodes found"
|
||||
fi
|
||||
|
||||
if [ -z "${ANSIBLE_TAG}" ]; then
|
||||
ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} ./deploy-telegraf.yaml
|
||||
else
|
||||
ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} -t ${ANSIBLE_TAG} ./deploy-telegraf.yaml
|
||||
fi
|
@ -0,0 +1,10 @@
|
||||
---
|
||||
- name: Deploy prometheus in docker
|
||||
docker_container:
|
||||
name: prometheus
|
||||
image: 'prom/prometheus:v1.4.0'
|
||||
ports: 9090:9090
|
||||
state: started
|
||||
volumes: ['/var/lib/prometheus:/prometheus']
|
||||
command: '-config.file=/prometheus/prometheus.yml -storage.local.retention 168h0m0s -storage.local.max-chunks-to-persist 3024288 -storage.local.memory-chunks=50502740 -storage.local.num-fingerprint-mutexes=300960'
|
||||
tags: [ 'prometheus' ]
|
@ -0,0 +1,58 @@
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'codelab-monitor'
|
||||
|
||||
rule_files:
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
scrape_configs:
|
||||
|
||||
- job_name: 'prometheus'
|
||||
scrape_interval: 5s
|
||||
scrape_timeout: 5s
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ['172.20.9.115:9090']
|
||||
|
||||
{% for env_num in range(1,7) %}
|
||||
- job_name: 'k8-env-{{env_num}}'
|
||||
scrape_interval: 30s
|
||||
scrape_timeout: 30s
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
kubernetes_sd_configs:
|
||||
- api_server: 'https://172.20.8.6{{env_num}}:443'
|
||||
role: node
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
basic_auth:
|
||||
username: kube
|
||||
password: changeme
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- source_labels: [__address__]
|
||||
target_label: env
|
||||
regex: .*
|
||||
replacement: env-{{env_num}}
|
||||
|
||||
- job_name: 'etcd-env-{{env_num}}'
|
||||
scrape_interval: 5s
|
||||
scrape_timeout: 5s
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- etcd-env-{{env_num}}.yml
|
||||
{% endfor %}
|
||||
|
||||
|
@ -0,0 +1,33 @@
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'codelab-monitor'
|
||||
|
||||
rule_files:
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
scrape_configs:
|
||||
|
||||
- job_name: 'prometheus'
|
||||
scrape_interval: 5s
|
||||
scrape_timeout: 5s
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ['172.20.124.25:9090']
|
||||
|
||||
{% for env_num in range(1,7) %}
|
||||
- job_name: 'telegraf-systems-env-{{env_num}}'
|
||||
scrape_interval: 30s
|
||||
scrape_timeout: 30s
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- targets-env-{{env_num}}.yml
|
||||
{% endfor %}
|
||||
|
||||
|
@ -0,0 +1,6 @@
|
||||
- targets:
|
||||
{% for host in groups['all-cluster-nodes']%}
|
||||
- {{hostvars[host]['inventory_hostname']}}:9126
|
||||
{% endfor %}
|
||||
labels:
|
||||
env: {{ cluster_tag }}
|
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
export LANG=C
|
||||
set -o nounset # Treat unset variables as an error
|
||||
echo "system entropy=$(cat /proc/sys/kernel/random/entropy_avail)"
|
||||
|
@ -0,0 +1,33 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
ETCD=/usr/local/bin/etcdctl
|
||||
|
||||
type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 )
|
||||
type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 )
|
||||
|
||||
# get etcd members credentials
|
||||
MEMBERS="${ETCD} --endpoints https://127.0.0.1:2379 member list"
|
||||
LEADER_ID=$(eval "$MEMBERS" | awk -F ':' '/isLeader=true/ {print $1}')
|
||||
LEADER_ENDPOINT=$(eval "$MEMBERS" | awk '/isLeader=true/ {print $4}' | cut -d"=" -f2)
|
||||
SLAVE_ID=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk -F ":" '{print $1}')
|
||||
SLAVE_ENDPOINT=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk '{print $4}' | cut -d"=" -f2)
|
||||
|
||||
# member count:
|
||||
metric_members_count=`curl -s -k https://172.20.9.15:2379/v2/members | jq -c '.members | length'`
|
||||
metric_total_keys_count=`${ETCD} --endpoints https://127.0.0.1:2379 ls -r --sort | wc -l`
|
||||
metric_total_size_dataset=`pidof etcd | xargs ps -o rss | awk '{rss=+$1} END {print rss}'`
|
||||
metric_store_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/store| tr -d \"\{\} | sed -e 's/:/=/g'`
|
||||
metric_latency_from_leader_avg=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/leader | \
|
||||
jq -c ".followers.\"${SLAVE_ID}\".latency.average"`
|
||||
metric_leader_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/self | \
|
||||
jq -c "{ sendBandwidthRate: .sendBandwidthRate, sendAppendRequestCnt: \
|
||||
.sendAppendRequestCnt, sendPkgRate: .sendPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'`
|
||||
metric_slave_stats=`curl -s -k ${SLAVE_ENDPOINT}/v2/stats/self | \
|
||||
jq -c "{ recvBandwidthRate: .recvBandwidthRate, recvAppendRequestCnt: \
|
||||
.recvAppendRequestCnt, recvPkgRate: .recvPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'`
|
||||
cat << EOF
|
||||
etcd_general_stats,group=etcd_cluster_metrics members_count=${metric_members_count},dataset_size=${metric_total_size_dataset},total_keys_count=${metric_total_keys_count}
|
||||
etcd_leader_stats,group=etcd_cluster_metrics $metric_leader_stats
|
||||
etcd_follower_stats,group=etcd_cluster_metrics ${metric_slave_stats},latency_from_leader_avg=${metric_latency_from_leader_avg}
|
||||
etcd_store_stats,group=etcd_cluster_metrics $metric_store_stats
|
||||
EOF
|
@ -0,0 +1,105 @@
|
||||
#!/bin/bash
|
||||
# Logs extractor / parser
|
||||
# checking that we are good
|
||||
if [[ -z "${TMP_DIR}" || -z "${POD}" || -z "${CONTAINER}" || -z "${K8S_NS}" || -z "${OS_LOG_FIELDS}" || -z ${CONTID} ]]; then
|
||||
echo "Required variables are not set, exiting!"
|
||||
exit 1
|
||||
fi
|
||||
# Variables declaration
|
||||
SSH_USER="${SSH_USER:-root}"
|
||||
SSH_PASS="${SSH_PASS:-r00tme}"
|
||||
LOG_ENTRIES_NUMBER=${LOG_ENTRIES_NUMBER:-1000}
|
||||
LAST_TIME_STAMP_FILE="${TMP_DIR}/timestamp.tmp"
|
||||
# get | set last timestamp for log entries
|
||||
function last_ts_data()
|
||||
{
|
||||
local action
|
||||
action=${1}
|
||||
shift
|
||||
if [ "${action}" == "get" ]; then
|
||||
if [ -e ${LAST_TIME_STAMP_FILE} ]; then
|
||||
cat ${LAST_TIME_STAMP_FILE}
|
||||
fi
|
||||
else
|
||||
echo "$*" > ${LAST_TIME_STAMP_FILE}
|
||||
fi
|
||||
}
|
||||
function print_out()
|
||||
{
|
||||
if [ -z "${TMP_METRICS}" ];then
|
||||
echo "$@"
|
||||
else
|
||||
echo "$@" >> ${TMP_METRICS}
|
||||
fi
|
||||
}
|
||||
function micro_to_seconds()
|
||||
{
|
||||
local input
|
||||
local output
|
||||
input="${1}"
|
||||
output=$(echo "scale=4;${input}/1000000" | bc)
|
||||
if echo ${output} | grep -q '^\..'; then
|
||||
output="0${output}"
|
||||
fi
|
||||
echo "${output}"
|
||||
}
|
||||
# extract container logs from k8s
|
||||
function get_logs()
|
||||
{
|
||||
local sdate
|
||||
local stime
|
||||
local scalltime
|
||||
local lasttimestamp
|
||||
local is_foundlast
|
||||
local tmpdata
|
||||
tmpdata="${TMP_DIR}/tmpdata.log"
|
||||
if [ -e "${tmpdata}" ]; then rm -f ${tmpdata}; fi
|
||||
if [ "${CONTAINER}" == "keystone" ];then
|
||||
sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "tail -n${LOG_ENTRIES_NUMBER} /var/log/ccp/keystone/keystone-access.log | cut -d' ' -f${OS_LOG_FIELDS} | sed -e 's#\[##g' -e 's#\]##g'" 2>/dev/null > ${tmpdata}
|
||||
else
|
||||
sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "docker logs --tail ${LOG_ENTRIES_NUMBER} ${CONTID} 2>&1 | grep 'INFO' | grep 'GET /' | cut -d' ' -f${OS_LOG_FIELDS}" 2>/dev/null > ${tmpdata}
|
||||
fi
|
||||
is_foundlast=false
|
||||
lasttimestamp=$(last_ts_data "get")
|
||||
if [ -z "${lasttimestamp}" ]; then
|
||||
while read log
|
||||
do
|
||||
sdate=$(echo ${log} | cut -d' ' -f1)
|
||||
stime=$(echo ${log} | cut -d' ' -f2)
|
||||
scalltime=$(echo ${log} | cut -d' ' -f3)
|
||||
if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi
|
||||
if [ ! -z "${scalltime}" ]; then
|
||||
print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}"
|
||||
fi
|
||||
done < <(cat ${tmpdata})
|
||||
sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1)
|
||||
stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2)
|
||||
last_ts_data "set" "${sdate}${stime}"
|
||||
else
|
||||
while read log
|
||||
do
|
||||
sdate=$(echo ${log} | cut -d' ' -f1)
|
||||
stime=$(echo ${log} | cut -d' ' -f2)
|
||||
scalltime=$(echo ${log} | cut -d' ' -f3)
|
||||
if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi
|
||||
if [[ "${is_foundlast}" = "false" && "${lasttimestamp}" = "${sdate}${stime}" ]]; then
|
||||
#echo "FOUND: ${sdate}${stime} ${scalltime}"
|
||||
is_foundlast=true
|
||||
continue
|
||||
fi
|
||||
if [ "${is_foundlast}" == "true" ]; then
|
||||
if [ ! -z "${scalltime}" ]; then
|
||||
print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}"
|
||||
fi
|
||||
fi
|
||||
done < <(cat ${tmpdata})
|
||||
if [ "${is_foundlast}" == "true" ]; then
|
||||
sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1)
|
||||
stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2)
|
||||
last_ts_data "set" "${sdate}${stime}"
|
||||
fi
|
||||
fi
|
||||
rm -f ${tmpdata}
|
||||
}
|
||||
# Main logic
|
||||
get_logs
|
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# output from iostat -Ndx is
|
||||
# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util
|
||||
export LANG=C
|
||||
iostat -Ndx | tail -n +4 | head -n -1 | awk '{print "system_per_device_iostat,device="$1" read_merge="$2",write_merge="$3",await="$10",read_await="$11",write_await="$12",util="$14",average_queue="$9}'
|
||||
|
@ -0,0 +1,75 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
|
||||
K8S_MASTER=127.0.0.1
|
||||
|
||||
if [[ $1 ]] ; then
|
||||
K8S_MASTER=$1
|
||||
fi
|
||||
|
||||
type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 )
|
||||
type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 )
|
||||
|
||||
curl_get() {
|
||||
url="https://${K8S_MASTER}$@"
|
||||
curl -k -s -u kube:changeme $url || ( echo "Curl failed at: $url" 1>&2; exit 1 )
|
||||
}
|
||||
# gathering frequent API calls output to separate file(in order to avoid long timeouts):
|
||||
node_file=`mktemp /tmp/XXXXX`
|
||||
pods_file=`mktemp /tmp/XXXXX`
|
||||
endpoints_file=`mktemp /tmp/XXXXX`
|
||||
curl_get "/api/v1/nodes" > $node_file
|
||||
curl_get "/api/v1/pods" > $pods_file
|
||||
curl_get "/api/v1/endpoints" > $endpoints_file
|
||||
# metrics withdrawal:
|
||||
number_of_namespaces_total=`curl_get "/api/v1/namespaces" | jq '[ .items[] .metadata.name ] | length'`
|
||||
number_of_services_total=`curl_get "/api/v1/services" | jq -c '[ .items[] .metadata.name ] | length'`
|
||||
number_of_nodes_total=`jq -c '[ .items[] .metadata.name ] | length' $node_file`
|
||||
number_of_unsched=`jq -c '[ .items[] | select(.spec.unschedulable != null) .metadata.name ] | length' $node_file`
|
||||
number_in_each_status=`jq -c '[ .items[] | .status.conditions[] | select(.type == "Ready") .status \
|
||||
| gsub("(?<a>.+)"; "number_of_status_\(.a)" ) ] | group_by(.) | map({(.[0]): length}) | add ' $node_file \
|
||||
| tr -d \"\{\} | sed -e 's/:/=/g'`
|
||||
number_of_pods_total=`jq -c '[ .items[] .metadata.name ] | length' $pods_file`
|
||||
number_of_pods_state_Pending=`jq -c '[ .items[] .status.phase | select(. == "Pending")] | length' $pods_file`
|
||||
number_of_pods_state_Running=`jq -c '[ .items[] .status.phase | select(. == "Running")] | length' $pods_file`
|
||||
number_of_pods_state_Succeeded=`jq -c '[ .items[] .status.phase | select(. == "Succeeded")] | length' $pods_file`
|
||||
number_of_pods_state_Failed=`jq -c '[ .items[] .status.phase | select(. == "Failed")] | length' $pods_file`
|
||||
number_of_pods_state_Unknown=`jq -c '[ .items[] .status.phase | select(. == "Unknown")] | length' $pods_file`
|
||||
number_of_pods_per_node=`jq -c '[ .items[] | .spec.nodeName ] | group_by(.) | \
|
||||
map("k8s_pods_per_node,group=k8s_cluster_metrics,pod_node=\(.[0]) value=\(length)")' $pods_file \
|
||||
| sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
|
||||
number_of_pods_per_ns=`jq -c '[ .items[] | .metadata.namespace ] | group_by(.) | \
|
||||
map("k8s_pods_per_namespace,group=k8s_cluster_metrics,ns=\(.[0]) value=\(length)")' $pods_file \
|
||||
| sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
|
||||
number_of_endpoints_each_service=`jq -c '[ .items[] | { service: .metadata.name, endpoints: .subsets[] } | \
|
||||
. as { service: $svc, endpoints: $endp } | $endp.addresses | length | . as $addr | $endp.ports | length | \
|
||||
. as $prts | "k8s_services,group=k8s_cluster_metrics,service=\($svc) endpoints_number=\($addr * $prts)" ] ' $endpoints_file \
|
||||
| sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
|
||||
number_of_endpoints_total=`jq -c '[ .items[] | .subsets[] | { addrs: .addresses, ports: .ports } \
|
||||
| map (length ) | .[0] * .[1] ] | add' $endpoints_file`
|
||||
number_of_API_instances=`curl_get "/api/" | jq -c '.serverAddressByClientCIDRs | length'`
|
||||
number_of_controllers=`curl_get "/api/v1/replicationcontrollers" | jq '.items | length'`
|
||||
number_of_scheduler_instances=`curl_get /api/v1/namespaces/kube-system/pods?labelSelector='k8s-app=kube-scheduler' \
|
||||
| jq -c '.items | length' `
|
||||
cluster_resources_CPU=`jq -c '[ .items[] .status.capacity.cpu | tonumber ] | add' $node_file`
|
||||
cluster_resources_RAM=`jq -c '[ .items[] .status.capacity.memory| gsub("[a-z]+$"; "" ; "i") | tonumber] | add' $node_file`
|
||||
|
||||
# output:
|
||||
cat << EOF
|
||||
k8s_nodes,group=k8s_cluster_metrics number_of_nodes_total=${number_of_nodes_total},number_of_unsched=${number_of_unsched}
|
||||
k8s_nodes_states,group=k8s_cluster_metrics ${number_in_each_status}
|
||||
k8s_namespaces,group=k8s_cluster_metrics number_of_namespaces_total=${number_of_namespaces_total}
|
||||
k8s_pods,group=k8s_cluster_metrics number_of_pods_total=${number_of_pods_total}
|
||||
k8s_pods_states,group=k8s_cluster_metrics number_of_pods_state_Pending=${number_of_pods_state_Pending},number_of_pods_state_Running=${number_of_pods_state_Running},number_of_pods_state_Succeeded=${number_of_pods_state_Succeeded},number_of_pods_state_Failed=${number_of_pods_state_Failed},number_of_pods_state_Unknown=${number_of_pods_state_Unknown}
|
||||
${number_of_pods_per_node}
|
||||
${number_of_pods_per_ns}
|
||||
${number_of_endpoints_each_service}
|
||||
k8s_services,group=k8s_cluster_metrics number_of_services_total=${number_of_services_total},number_of_endpoints_total=${number_of_endpoints_total}
|
||||
k8s_number_of_API_instances,group=k8s_cluster_metrics value=${number_of_API_instances}
|
||||
k8s_number_of_controllers,group=k8s_cluster_metrics value=${number_of_controllers}
|
||||
k8s_number_of_scheduler_instances,group=k8s_cluster_metrics value=${number_of_scheduler_instances}
|
||||
k8s_cluster_resources,group=k8s_cluster_metrics cpu_total=${cluster_resources_CPU},ram_total=${cluster_resources_RAM}
|
||||
EOF
|
||||
|
||||
# cleanup
|
||||
rm -f $node_file $pods_file $endpoints_file
|
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
export LANG=C
|
||||
PS_ALL=$(ps --no-headers -A -o command | grep -vE '(sh|bash)')
|
||||
M_NAME=system_openstack_list
|
||||
|
||||
MARIADB=$(echo "${PS_ALL}" | grep 'mariadb' | wc -l)
|
||||
RABBITMQ=$(echo "${PS_ALL}" | grep 'rabbitmq' | wc -l)
|
||||
KEYSTONE=$(echo "${PS_ALL}" | grep 'keystone' | wc -l)
|
||||
GLANCE=$(echo "${PS_ALL}" | grep -E '(glance-api|glance-registry)' | wc -l)
|
||||
CINDER=$(echo "${PS_ALL}" | grep 'cinder' | wc -l)
|
||||
NOVA=$(echo "${PS_ALL}" | grep -E '(nova-api|nova-conductor|nova-consoleauth|nova-scheduler)' | wc -l)
|
||||
NEUTRON=$(echo "${PS_ALL}" | grep -E '(neutron-server|neutron-metadata-agent|neutron-dhcp-agent|neutron-l3-agent|neutron-openvswitch-agent)' | wc -l)
|
||||
OPENVSWITCH=$(echo "${PS_ALL}" | grep -E '(ovsdb-server|ovs-vswitchd|ovsdb-client)' | wc -l)
|
||||
|
||||
echo "${M_NAME} mariadb=${MARIADB},rabbitmq=${RABBITMQ},keystone=${KEYSTONE},glance=${GLANCE},cinder=${CINDER},nova=${NOVA},neutron=${NEUTRON},openvswitch=${OPENVSWITCH}"
|
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Output in MB/s
|
||||
# echo 0 > /proc/sys/kernel/nmi_watchdog
|
||||
# modprobe msr
|
||||
export LANG=C
|
||||
MEM_BW=$(sudo /opt/telegraf/bin/pcm-memory-one-line.x /csv 1 2>/dev/null | tail -n 1 | awk '{print $28}')
|
||||
echo "system_memory bandwidth=${MEM_BW}"
|
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
export LANG=C
|
||||
IFS='
|
||||
'
|
||||
SUM_RESV_Q=0
|
||||
SUM_SEND_Q=0
|
||||
for i in $(netstat -4 -n); do
|
||||
RESV_Q=$(echo $i | awk '{print $2}')
|
||||
SEND_Q=$(echo $i | awk '{print $3}')
|
||||
SUM_RESV_Q=$((${SUM_RESV_Q} + ${RESV_Q}))
|
||||
SUM_SEND_Q=$((${SUM_SEND_Q} + ${SEND_Q}))
|
||||
done
|
||||
echo "system_tcp_queue sum_recv=${SUM_RESV_Q},sum_send=${SUM_SEND_Q}"
|
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
set -o nounset # Treat unset variables as an error
|
||||
#set -x
|
||||
export LANG=C
|
||||
if [ ! -d '/sys/devices/system/node' ]; then
|
||||
# This host does not have NUMA
|
||||
exit 44
|
||||
fi
|
||||
ALL_PROCESS="$(ps --no-headers -A -o pid,ucomm)"
|
||||
for i in $(echo "${ALL_PROCESS}" | awk '{print $1}'); do
|
||||
if [ -f "/proc/$i/numa_maps" ]; then
|
||||
NUM_STAT=$(numastat -p $i)
|
||||
PROC_NAME=$(echo "${ALL_PROCESS}" | grep -E "( $i |^$i )" | awk '{print $2}')
|
||||
echo "${NUM_STAT}" | grep Huge | awk -v p=$i -v n=$PROC_NAME \
|
||||
'{printf "system_numa_memory_per_pid,pid="p",name="n" memory_huge="$NF","}'
|
||||
echo "${NUM_STAT}" | grep Heap | awk '{printf "memory_heap="$NF","}'
|
||||
echo "${NUM_STAT}" | grep Stack | awk '{printf "memory_stack="$NF","}'
|
||||
echo "${NUM_STAT}" | grep Private | awk '{print "memory_private="$NF}'
|
||||
fi
|
||||
done
|
||||
|
||||
|
@ -0,0 +1,215 @@
|
||||
#!/bin/bash
|
||||
# Variables declaration
|
||||
WORKDIR="$(cd "$(dirname ${0})" && pwd)"
|
||||
OS_LOG_PARSER="${WORKDIR}/glog.sh"
|
||||
TMPDATADIR="${WORKDIR}/data"
|
||||
TMP_METRICS="${TMPDATADIR}/allmetrics.tmp"
|
||||
MODE="${MODE:-bg}"
|
||||
SCRIPT_LOG_DIR="${WORKDIR}/logs"
|
||||
SCRIPT_LOG_FILE="${SCRIPT_LOG_DIR}/run_results_$(date +%Y-%m-%d).log"
|
||||
SCRIPT_LOG_LVL=2
|
||||
K8S_NS="${K8S_NS:-ccp}"
|
||||
declare -a OSCONTROLLER=(
|
||||
'cinder-api:1,2,21'
|
||||
'glance-api:1,2,22'
|
||||
'heat-api:1,2,22'
|
||||
'neutron-metadata-agent:1,2,17'
|
||||
'neutron-server:1,2,22'
|
||||
'nova-api:1,2,21'
|
||||
'keystone:4,5,11'
|
||||
)
|
||||
declare -a OSCOMPUTE=(
|
||||
'nova-compute:'
|
||||
)
|
||||
# crete subfolder under working directory
|
||||
function mk_dir()
|
||||
{
|
||||
local newdir="${TMPDATADIR}/${1}"
|
||||
if [ ! -d "${newdir}" ]; then
|
||||
mkdir -p ${newdir}
|
||||
fi
|
||||
}
|
||||
# log function
|
||||
function log()
|
||||
{
|
||||
local input
|
||||
local dtstamp
|
||||
input="$*"
|
||||
dtstamp="$(date +%Y-%m-%d_%H%M%S)"
|
||||
if [ ! -d "${SCRIPT_LOG_DIR}" ]; then
|
||||
mkdir -p "${SCRIPT_LOG_DIR}"
|
||||
fi
|
||||
case "${SCRIPT_LOG_LVL}" in
|
||||
3)
|
||||
if [ ! -z "${input}" ]; then
|
||||
echo "${dtstamp}: ${input}" | tee -a "${SCRIPT_LOG_FILE}"
|
||||
fi
|
||||
;;
|
||||
2)
|
||||
if [ ! -z "${input}" ]; then
|
||||
echo "${dtstamp}: ${input}" >> "${SCRIPT_LOG_FILE}"
|
||||
fi
|
||||
;;
|
||||
1)
|
||||
if [ ! -z "${input}" ]; then
|
||||
echo "${dtstamp}: ${input}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
}
|
||||
# get roles according to predefined in OSCONTROLLER & OSCOMPUTE
|
||||
function get_role()
|
||||
{
|
||||
local role
|
||||
local input
|
||||
local arr_name
|
||||
local arr_name_fields
|
||||
role=${1}
|
||||
shift
|
||||
input=$*
|
||||
case ${role} in
|
||||
"controller")
|
||||
for i in $(seq 0 $(( ${#OSCONTROLLER[@]} - 1)))
|
||||
do
|
||||
arr_name=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f1)
|
||||
arr_name_fields=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f2)
|
||||
if [[ "${arr_name}" == "${input}" ]]; then
|
||||
echo "${arr_name_fields}"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
;;
|
||||
"compute")
|
||||
for i in $(seq 0 $(( ${#OSCOMPUTE[@]} - 1)))
|
||||
do
|
||||
arr_name=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f1)
|
||||
arr_name_fields=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f2)
|
||||
if [ "${arr_name}" == "${input}" ]; then
|
||||
echo "${arr_name_fields}"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
# diff in seconds
|
||||
function tdiff()
|
||||
{
|
||||
local now
|
||||
local datetime
|
||||
local result
|
||||
datetime="$(date -d "${1}" +%s)"
|
||||
now="$(date +%s)"
|
||||
result=$(( ${now} - ${datetime} ))
|
||||
echo ${result}
|
||||
}
|
||||
# lock file function
|
||||
function glock()
|
||||
{
|
||||
local action
|
||||
local lockfile
|
||||
local accessdate
|
||||
local old_in_sec=120
|
||||
action="${1}"
|
||||
# lockfile="${TMP_METRICS}.lock"
|
||||
lockfile="${TMPDATADIR}/allmetrics.tmp.lock"
|
||||
if [[ "${action}" == "lock" && ! -e "${lockfile}" ]]; then
|
||||
touch "${lockfile}"
|
||||
elif [[ "${action}" == "lock" && -e "${lockfile}" ]]; then
|
||||
accessdate="$(stat ${lockfile} | grep Modify | cut -d' ' -f2,3)"
|
||||
if [ "$(tdiff "${accessdate}")" -ge "${old_in_sec}" ]; then
|
||||
rm "${lockfile}"
|
||||
touch "${lockfile}"
|
||||
else
|
||||
log "Lock file ${lockfile} exists!"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
rm "${lockfile}"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
# wait for parcers launched in backgroud mode
|
||||
function gatherchildren()
|
||||
{
|
||||
local childrencount
|
||||
while true
|
||||
do
|
||||
childrencount=$(ps axf| grep ${OS_LOG_PARSER} | grep -v grep | wc -l)
|
||||
if [ "${childrencount}" -eq 0 ]; then
|
||||
return
|
||||
fi
|
||||
log "Children running ${childrencount}."
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
# list of running contaners
|
||||
function get_k8s_containers()
|
||||
{
|
||||
local cont_host
|
||||
local cont_pod
|
||||
local cont_name
|
||||
local cont_id
|
||||
local os_log_fields
|
||||
local cont_tmp_dir
|
||||
local _raw_data
|
||||
glock "lock"
|
||||
if [ "$?" -ne 0 ]; then exit 1;fi
|
||||
#echo '[' > ${TMP_METRICS}
|
||||
_raw_data="${TMPDATADIR}/._raw_data"
|
||||
rm -rf ${_raw_data}
|
||||
kubectl get pods -n "${K8S_NS}" -o 'go-template={{range .items}}{{if or (ne .status.phase "Succeeded") (eq .status.phase "Running")}}{{.spec.nodeName}},{{.metadata.name}},{{range .status.containerStatuses}}{{.name}},{{.containerID}}{{end}}{{"\n"}}{{end}}{{end}}' > ${_raw_data}
|
||||
for data in $(cat ${_raw_data})
|
||||
do
|
||||
cont_host=$(echo ${data} | cut -d',' -f1)
|
||||
cont_pod=$(echo ${data} | cut -d',' -f2)
|
||||
cont_name=$(echo ${data} | cut -d',' -f3)
|
||||
cont_id=$(echo ${data} | cut -d',' -f4 | sed 's|^docker://||')
|
||||
cont_tmp_dir="${cont_host}_${cont_pod}_${cont_name}"
|
||||
os_log_fields=$(get_role "controller" "${cont_name}")
|
||||
if [ "$?" -eq 0 ]; then
|
||||
mk_dir "${cont_tmp_dir}"
|
||||
export K8S_NS=${K8S_NS}
|
||||
export TMP_DIR=${TMPDATADIR}/${cont_tmp_dir}
|
||||
# export TMP_METRICS=${TMP_METRICS}
|
||||
export TMP_METRICS="${TMPDATADIR}/results/${cont_pod}.tmp"
|
||||
export CONTID=${cont_id}
|
||||
export CONTAINER=${cont_name}
|
||||
export HOST=${cont_host}
|
||||
export POD=${cont_pod}
|
||||
export OS_LOG_FIELDS=${os_log_fields}
|
||||
log "MODE=${MODE} CONTID=${cont_id} TMP_METRICS=${TMP_METRICS} ROLE=controller HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} K8S_NS=${K8S_NS} ${OS_LOG_PARSER}"
|
||||
if [[ "${MODE}" == "bg" ]]; then
|
||||
log "${cont_pod} ${cont_name} ${cont_id}"
|
||||
${OS_LOG_PARSER} &
|
||||
else
|
||||
${OS_LOG_PARSER}
|
||||
fi
|
||||
unset TMP_METRICS
|
||||
unset CONTID
|
||||
unset CONTAINER
|
||||
unset POD
|
||||
unset OS_LOG_FIELDS
|
||||
unset HOST
|
||||
fi
|
||||
# os_log_fields=$(get_role "compute" "${cont_name}")
|
||||
# if [ "$?" -eq 0 ]; then
|
||||
# mk_dir "${cont_tmp_dir}"
|
||||
# log "ROLE=compute HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} K8S_NS=${K8S_NS} ${OS_LOG_PARSER}"
|
||||
# fi
|
||||
done
|
||||
gatherchildren
|
||||
if [ "$(ls ${TMPDATADIR}/results/ | wc -l)" -gt 0 ]; then
|
||||
cat ${TMPDATADIR}/results/*.tmp
|
||||
log "Resulting lines $(cat ${TMPDATADIR}/results/*.tmp | wc -l)"
|
||||
rm -rf ${TMPDATADIR}/results/*
|
||||
fi
|
||||
glock "unlock"
|
||||
}
|
||||
# Main logic
|
||||
mk_dir
|
||||
mk_dir "results"
|
||||
get_k8s_containers
|
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
export LANG=C
|
||||
for i in $(ps --no-headers -A -o pid); do
|
||||
pidstat -p $i | tail -n 1 | grep -v PID | awk '{print "system_per_process_cpu_usage,process="$9" user="$4",system="$5}'
|
||||
done
|
||||
|
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
WORKDIR="$(cd "$(dirname ${0})" && pwd)"
|
||||
SCRIPT="${WORKDIR}/$(basename ${0})"
|
||||
MYSQLUSER="nova"
|
||||
MYSQPASSWD="password"
|
||||
MYSQLHOST="mariadb.ccp"
|
||||
avgdata=$(mysql -u${MYSQLUSER} -p${MYSQPASSWD} -h ${MYSQLHOST} -D nova --skip-column-names --batch -e "select diff from (select avg(unix_timestamp(launched_at) - unix_timestamp(created_at)) as diff from instances where vm_state != 'error' and launched_at >= subtime(now(),'30')) t1 where diff IS NOT NULL;" 2>/dev/null | sed 's/\t/,/g';)
|
||||
if [ ! -z "${avgdata}" ]; then
|
||||
echo "vm_spawn_avg_time timediffinsec=${avgdata}"
|
||||
fi
|
||||
|
@ -0,0 +1,116 @@
|
||||
[global_tags]
|
||||
metrics_source="system_openstack"
|
||||
[agent]
|
||||
interval = "10s"
|
||||
round_interval = true
|
||||
metric_batch_size = 1000
|
||||
metric_buffer_limit = 10000
|
||||
collection_jitter = "0s"
|
||||
flush_interval = "15s"
|
||||
flush_jitter = "5s"
|
||||
precision = ""
|
||||
debug = false
|
||||
quiet = false
|
||||
hostname = ""
|
||||
omit_hostname = false
|
||||
[[outputs.prometheus_client]]
|
||||
listen = ":9126"
|
||||
[[inputs.cpu]]
|
||||
percpu = true
|
||||
totalcpu = true
|
||||
fielddrop = ["time_*"]
|
||||
[[inputs.disk]]
|
||||
ignore_fs = ["tmpfs", "devtmpfs"]
|
||||
[[inputs.diskio]]
|
||||
[[inputs.kernel]]
|
||||
[[inputs.mem]]
|
||||
[[inputs.processes]]
|
||||
[[inputs.swap]]
|
||||
[[inputs.system]]
|
||||
[[inputs.kernel_vmstat]]
|
||||
[[inputs.net]]
|
||||
[[inputs.netstat]]
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/vmtime.sh",
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "30s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/osapitime.sh",
|
||||
]
|
||||
timeout = "60s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/etcd_get_metrics.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/k8s_get_metrics.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.openstack]]
|
||||
interval = '40s'
|
||||
identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3"
|
||||
domain = "default"
|
||||
project = "admin"
|
||||
username = "admin"
|
||||
password = "password"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/iostat_per_device.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/per_process_cpu_usage.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/entropy.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "60s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/numa_stat_per_pid.sh"
|
||||
]
|
||||
timeout = "60s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/memory_bandwidth.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/list_openstack_processes.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/network_tcp_queue.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
@ -0,0 +1,81 @@
|
||||
[global_tags]
|
||||
metrics_source="system"
|
||||
[agent]
|
||||
interval = "10s"
|
||||
round_interval = true
|
||||
metric_batch_size = 1000
|
||||
metric_buffer_limit = 10000
|
||||
collection_jitter = "0s"
|
||||
flush_interval = "15s"
|
||||
flush_jitter = "5s"
|
||||
precision = ""
|
||||
debug = false
|
||||
quiet = false
|
||||
hostname = ""
|
||||
omit_hostname = false
|
||||
[[outputs.prometheus_client]]
|
||||
listen = ":9126"
|
||||
[[inputs.cpu]]
|
||||
percpu = true
|
||||
totalcpu = true
|
||||
fielddrop = ["time_*"]
|
||||
[[inputs.disk]]
|
||||
ignore_fs = ["tmpfs", "devtmpfs"]
|
||||
[[inputs.diskio]]
|
||||
[[inputs.kernel]]
|
||||
[[inputs.mem]]
|
||||
[[inputs.processes]]
|
||||
[[inputs.swap]]
|
||||
[[inputs.system]]
|
||||
[[inputs.kernel_vmstat]]
|
||||
[[inputs.net]]
|
||||
[[inputs.netstat]]
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/iostat_per_device.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/per_process_cpu_usage.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/entropy.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "60s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/numa_stat_per_pid.sh"
|
||||
]
|
||||
timeout = "60s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/memory_bandwidth.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/list_openstack_processes.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
[[inputs.exec]]
|
||||
interval = "15s"
|
||||
commands = [
|
||||
"/opt/telegraf/bin/network_tcp_queue.sh"
|
||||
]
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
948
doc/source/methodologies/monitoring/index.rst
Normal file
948
doc/source/methodologies/monitoring/index.rst
Normal file
@ -0,0 +1,948 @@
|
||||
|
||||
.. _Methodology_for_Containerized_Openstack_Monitoring:
|
||||
|
||||
**************************************************
|
||||
Methodology for Containerized Openstack Monitoring
|
||||
**************************************************
|
||||
|
||||
:Abstract:
|
||||
|
||||
This document describes one of the Containerized Openstack monitoring solutions
|
||||
to provide scalable and comprehensive architecture and obtain all crucial performance
|
||||
metrics on each structure layer.
|
||||
|
||||
|
||||
Containerized Openstack Monitoring Architecture
|
||||
===============================================
|
||||
|
||||
This part of documentation describes required performance metrics in each
|
||||
distinguished Containerized Openstack layer.
|
||||
|
||||
Containerized Openstack comprises three layers where Monitoring System should
|
||||
be able to query all necessary counters:
|
||||
- OS layer
|
||||
- Kubernetes layer
|
||||
- Openstack layer
|
||||
|
||||
Monitoring instruments must be logically divided in two groups:
|
||||
- Monitoring Server Side
|
||||
- Node Client Side
|
||||
|
||||
Operation System Layer
|
||||
----------------------
|
||||
|
||||
We were using Ubuntu Xenial on top of bare-metal servers for both server and node side.
|
||||
|
||||
Baremetal hardware description
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
We deployed everything at 200 servers environment with following hardware characteristics:
|
||||
|
||||
.. table::
|
||||
|
||||
+-------+----------------+------------------------+
|
||||
|server |vendor,model |HP,DL380 Gen9 |
|
||||
+-------+----------------+------------------------+
|
||||
|CPU |vendor,model |Intel,E5-2680 v3 |
|
||||
| +----------------+------------------------+
|
||||
| |processor_count |2 |
|
||||
| +----------------+------------------------+
|
||||
| |core_count |12 |
|
||||
| +----------------+------------------------+
|
||||
| |frequency_MHz |2500 |
|
||||
+-------+----------------+------------------------+
|
||||
|RAM |vendor,model |HP,752369-081 |
|
||||
| +----------------+------------------------+
|
||||
| |amount_MB |262144 |
|
||||
+-------+----------------+------------------------+
|
||||
|NETWORK|interface_name |p1p1 |
|
||||
| +----------------+------------------------+
|
||||
| |vendor,model |Intel,X710 Dual Port |
|
||||
| +----------------+------------------------+
|
||||
| |bandwidth |10G |
|
||||
+-------+----------------+------------------------+
|
||||
|STORAGE|dev_name |/dev/sda |
|
||||
| +----------------+------------------------+
|
||||
| |vendor,model | | raid10 - HP P840 |
|
||||
| | | | 12 disks EH0600JEDHE |
|
||||
| +----------------+------------------------+
|
||||
| |SSD/HDD |HDD |
|
||||
| +----------------+------------------------+
|
||||
| |size | 3,6TB |
|
||||
+-------+----------------+------------------------+
|
||||
|
||||
Operating system configuration
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Baremetal nodes were provisioned with Cobbler with our in-home preseed scripts.
|
||||
OS versions we used:
|
||||
|
||||
.. table:: Versions Operating Systems
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|Ubuntu |Ubuntu 16.04.1 LTS |
|
||||
+--------------------+-----------------------------------------+
|
||||
|Kernel |4.4.0-47-generic |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
You can find /etc folder contents from the one of the typical system we were using:
|
||||
|
||||
:download:`etc_tarball <configs/node1.tar.gz>`
|
||||
|
||||
Required system metrics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
At this layer we must get this list of processes:
|
||||
|
||||
.. table::
|
||||
|
||||
+------------------------+-----------------------------------------+
|
||||
|List of processes |Mariadb |
|
||||
| +-----------------------------------------+
|
||||
| |Rabbitmq |
|
||||
| |-----------------------------------------+
|
||||
| |Keystone |
|
||||
| +-----------------------------------------+
|
||||
| |Glance |
|
||||
| +-----------------------------------------+
|
||||
| |Cinder |
|
||||
| +-----------------------------------------+
|
||||
| |Nova |
|
||||
| +-----------------------------------------+
|
||||
| |Neutron |
|
||||
| +-----------------------------------------+
|
||||
| |Openvswitch |
|
||||
| +-----------------------------------------+
|
||||
| |Kubernetes |
|
||||
+------------------------+-----------------------------------------+
|
||||
|
||||
And following list of metrics:
|
||||
|
||||
.. table::
|
||||
|
||||
+------------------------+-----------------------------------------+
|
||||
|Node load average |1min |
|
||||
| +-----------------------------------------+
|
||||
| |5min |
|
||||
| |-----------------------------------------+
|
||||
| |15min |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Global process stats |Running |
|
||||
| +-----------------------------------------+
|
||||
| |Stopped |
|
||||
| |-----------------------------------------+
|
||||
| |Waiting |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Global CPU Usage | Steal |
|
||||
| +-----------------------------------------+
|
||||
| | Wait |
|
||||
| +-----------------------------------------+
|
||||
| | User |
|
||||
| +-----------------------------------------+
|
||||
| | System |
|
||||
| +-----------------------------------------+
|
||||
| | Interrupt |
|
||||
| +-----------------------------------------+
|
||||
| | Nice |
|
||||
| +-----------------------------------------+
|
||||
| | Idle |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Per CPU Usage | User |
|
||||
| +-----------------------------------------+
|
||||
| | System |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Global memory usage |bandwidth |
|
||||
| +-----------------------------------------+
|
||||
| |Cached |
|
||||
| +-----------------------------------------+
|
||||
| |Buffered |
|
||||
| +-----------------------------------------+
|
||||
| |Free |
|
||||
| +-----------------------------------------+
|
||||
| |Used |
|
||||
| +-----------------------------------------+
|
||||
| |Total |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Numa monitoring |Numa_hit |
|
||||
|For each node +-----------------------------------------+
|
||||
| |Numa_miss |
|
||||
| |-----------------------------------------+
|
||||
| |Numa_foreign |
|
||||
| +-----------------------------------------+
|
||||
| |Local_node |
|
||||
| +-----------------------------------------+
|
||||
| |Other_node |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Numa monitoring |Huge |
|
||||
|For each pid +-----------------------------------------+
|
||||
| |Heap |
|
||||
| |-----------------------------------------+
|
||||
| |Stack |
|
||||
| +-----------------------------------------+
|
||||
| |Private |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Global IOSTAT \+ |Merge reads /s |
|
||||
|Per device IOSTAT +-----------------------------------------+
|
||||
| |Merge write /s |
|
||||
| +-----------------------------------------+
|
||||
| |read/s |
|
||||
| +-----------------------------------------+
|
||||
| |write/s |
|
||||
| +-----------------------------------------+
|
||||
| |Read transfer |
|
||||
| +-----------------------------------------+
|
||||
| |Write transfer |
|
||||
| +-----------------------------------------+
|
||||
| |Read latency |
|
||||
| +-----------------------------------------+
|
||||
| |Write latency |
|
||||
| +-----------------------------------------+
|
||||
| |Write transfer |
|
||||
| +-----------------------------------------+
|
||||
| |Queue size |
|
||||
| +-----------------------------------------+
|
||||
| |Await |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Network per interface |Octets /s (in, out) |
|
||||
| +-----------------------------------------+
|
||||
| |Packet /s (in, out) |
|
||||
| |-----------------------------------------+
|
||||
| |Dropped /s |
|
||||
+------------------------+-----------------------------------------+
|
||||
|Other system metrics |Entropy |
|
||||
| +-----------------------------------------+
|
||||
| |DF per device |
|
||||
+------------------------+-----------------------------------------+
|
||||
|
||||
Kubernetes Layer
|
||||
----------------
|
||||
|
||||
`Kargo`_ from `Fuel-CCP-installer`_ was our main tool to deploy K8S
|
||||
on top of provisioned systems (monitored nodes).
|
||||
|
||||
Kargo sets up Kubernetes in the following way:
|
||||
|
||||
- masters: Calico, Kubernetes API services
|
||||
- nodes: Calico, Kubernetes minion services
|
||||
- etcd: etcd service
|
||||
|
||||
Kargo deployment parameters
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can find Kargo deployment script in `Kargo deployment script`_ section
|
||||
|
||||
.. code:: bash
|
||||
|
||||
docker_options: "--insecure-registry 172.20.8.35:5000 -D"
|
||||
upstream_dns_servers: [172.20.8.34, 8.8.4.4]
|
||||
nameservers: [172.20.8.34, 8.8.4.4]
|
||||
kube_service_addresses: 10.224.0.0/12
|
||||
kube_pods_subnet: 10.240.0.0/12
|
||||
kube_network_node_prefix: 22
|
||||
kube_apiserver_insecure_bind_address: "0.0.0.0"
|
||||
dns_replicas: 3
|
||||
dns_cpu_limit: "100m"
|
||||
dns_memory_limit: "512Mi"
|
||||
dns_cpu_requests: "70m"
|
||||
dns_memory_requests: "70Mi"
|
||||
deploy_netchecker: false
|
||||
|
||||
.. table::
|
||||
|
||||
+----------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+----------------------+-----------------------------------------+
|
||||
|`Fuel-CCP-Installer`_ |6fd81252cb2d2c804f388337aa67d4403700f094 |
|
||||
| | |
|
||||
+----------------------+-----------------------------------------+
|
||||
|`Kargo`_ |2c23027794d7851ee31363c5b6594180741ee923 |
|
||||
+----------------------+-----------------------------------------+
|
||||
|
||||
Required K8S metrics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Here we should get K8S health
|
||||
metrics and ETCD performance metrics:
|
||||
|
||||
.. table::
|
||||
|
||||
+------------------------+-----------------------------------------+
|
||||
|ETCD performance metrics|members count / states |
|
||||
| +-----------------------------------------+
|
||||
| |numbers of keys in a cluster |
|
||||
| |-----------------------------------------+
|
||||
| |Size of data set |
|
||||
| +-----------------------------------------+
|
||||
| |Avg. latency from leader to followers |
|
||||
| +-----------------------------------------+
|
||||
| |Bandwidth rate, send/receive |
|
||||
| +-----------------------------------------+
|
||||
| |Create store success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Get success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Set success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Package rate, send/receive |
|
||||
| +-----------------------------------------+
|
||||
| |Expire count |
|
||||
| +-----------------------------------------+
|
||||
| |Update success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Compare-and-swap success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Watchers |
|
||||
| +-----------------------------------------+
|
||||
| |Delete success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Compare-and-delete success/fail |
|
||||
| +-----------------------------------------+
|
||||
| |Append req, send/ receive |
|
||||
+------------------------+-----------------------------------------+
|
||||
|K8S health metrics |Number of node in each state |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of namespaces |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of PODs per cluster,node,ns |
|
||||
| +-----------------------------------------+
|
||||
| |Total of number of services |
|
||||
| +-----------------------------------------+
|
||||
| |Endpoints in each service |
|
||||
| +-----------------------------------------+
|
||||
| |Number of API service instances |
|
||||
| +-----------------------------------------+
|
||||
| |Number of controller instances |
|
||||
| +-----------------------------------------+
|
||||
| |Number of scheduler instances |
|
||||
| +-----------------------------------------+
|
||||
| |Cluster resources, scheduler view |
|
||||
+------------------------+-----------------------------------------+
|
||||
|K8S API log analysis |Number of responses (per each HTTP code) |
|
||||
| +-----------------------------------------+
|
||||
| |Response Time |
|
||||
+------------------------+-----------------------------------------+
|
||||
|
||||
For last two metrics we should utilize log collector to store and parse all
|
||||
log records within K8S environments.
|
||||
|
||||
Openstack Layer
|
||||
-----------------
|
||||
|
||||
CCP stands for "Containerized Control Plane". CCP aims to build, run and manage
|
||||
production-ready OpenStack containers on top of Kubernetes cluster.
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Fuel-CCP`_ |8570d0e0e512bd16f8449f0a10b1e3900fd09b2d |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
|
||||
CCP configuration
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
CCP was deployed on top of 200 nodes K8S cluster in the following configuration:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
node[1-3]: Kubernetes
|
||||
node([4-6])$: # 4-6
|
||||
roles:
|
||||
- controller
|
||||
- openvswitch
|
||||
node[7-9]$: # 7-9
|
||||
roles:
|
||||
- rabbitmq
|
||||
node10$: # 10
|
||||
roles:
|
||||
- galera
|
||||
node11$: # 11
|
||||
roles:
|
||||
- heat
|
||||
node(1[2-9])$: # 12-19
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node[2-9][0-9]$: # 20-99
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node(1[0-9][0-9])$: # 100-199
|
||||
roles:
|
||||
- compute
|
||||
- openvswitch
|
||||
node200$:
|
||||
roles:
|
||||
- backup
|
||||
|
||||
|
||||
CCP Openstack services list ( `versions.yaml`_ ):
|
||||
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
openstack/cinder:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/cinder.git
|
||||
openstack/glance:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/glance.git
|
||||
openstack/heat:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/heat.git
|
||||
openstack/horizon:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/horizon.git
|
||||
openstack/keystone:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/keystone.git
|
||||
openstack/neutron:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/neutron.git
|
||||
openstack/nova:
|
||||
git_ref: stable/newton
|
||||
git_url: https://github.com/openstack/nova.git
|
||||
openstack/requirements:
|
||||
git_ref: stable/newton
|
||||
git_url: https://git.openstack.org/openstack/requirements.git
|
||||
openstack/sahara-dashboard:
|
||||
git_ref: stable/newton
|
||||
git_url: https://git.openstack.org/openstack/sahara-dashboard.git
|
||||
|
||||
|
||||
`K8S Ingress Resources`_ rules were enabled during CCP deployment to expose Openstack services
|
||||
endpoints to external routable network.
|
||||
|
||||
|
||||
See CCP deployment script and configuration files in the
|
||||
`CCP deployment and configuration files`_ section.
|
||||
|
||||
Required Openstack-related metrics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
At this layer we should get openstack environment metrics,
|
||||
API and resources utilization metrics.
|
||||
|
||||
.. table:: Versions of CCP-related software
|
||||
|
||||
+------------------------+-----------------------------------------+
|
||||
|Openstack metrics |Total number of controller nodes |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of services |
|
||||
| |-----------------------------------------+
|
||||
| |Total number of compute nodes |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of nodes |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of VMs |
|
||||
| +-----------------------------------------+
|
||||
| |Number of VMs per tenant, per node |
|
||||
| +-----------------------------------------+
|
||||
| |Resource utilization per project,service |
|
||||
| +-----------------------------------------+
|
||||
| |Total number of tenants |
|
||||
| +-----------------------------------------+
|
||||
| |API request time |
|
||||
| +-----------------------------------------+
|
||||
| |Mean time to spawn VM |
|
||||
+------------------------+-----------------------------------------+
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
This part of documentation describes Monitoring System implementation.
|
||||
Here is software list that we chose to realize all required tasks:
|
||||
|
||||
.. table::
|
||||
|
||||
+-----------------------------------------+-----------------------------------------+
|
||||
|Monitoring Node Server Side |Monitored Node Client Side |
|
||||
+--------------------+--------------------+--------------------+--------------------+
|
||||
|Metrics server |Log storage |Metrics agent |Log collector |
|
||||
| | | | |
|
||||
+--------------------+--------------------+--------------------+--------------------+
|
||||
| `Prometheus`_ \+ | `ElasticSearch`_ |`Telegraf`_ | `Heka`_ |
|
||||
| `Grafana`_ | \+ `Kibana`_ | | |
|
||||
+--------------------+--------------------+--------------------+--------------------+
|
||||
|
||||
Server Side Software
|
||||
---------------------
|
||||
|
||||
Prometheus
|
||||
^^^^^^^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Prometheus GitHub`_|7e369b9318a4d5d97a004586a99f10fa51a46b26 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
Due to high load rate we faced an issue with Prometheus performance at metrics count up to 15 millions.
|
||||
We split Prometheus setup in 2 standalone nodes. First node - to poll API metrics from K8S-related services
|
||||
that natively available at `/metrics` uri and exposed by K8S API and ETCD API by default.
|
||||
Second node - to store all other metrics that should be collected and calculated locally on environment
|
||||
servers via Telegraf.
|
||||
|
||||
Prometheus nodes deployments scripts and configuration files could be found at `Prometheus deployment and configuration files`_ section
|
||||
|
||||
Grafana
|
||||
^^^^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Grafana`_ |v4.0.1 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
Grafana was used as a metrics visualizer with several dashboards for each metrics group.
|
||||
Separate individual dashboards were built for each group of metrics:
|
||||
|
||||
- System nodes metrics
|
||||
- Kubernetes metrics
|
||||
- ETCD metrics
|
||||
- Openstack metrics
|
||||
|
||||
You can find their setting at `Grafana dashboards configuration`_
|
||||
|
||||
Grafana server deployment script:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
#!/bin/bash
|
||||
ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana"
|
||||
|
||||
It uses the same yaml configuration file `deploy-graf-prom.yaml`_ from `Prometheus deployment and configuration files`_ section.
|
||||
|
||||
ElasticSearch
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`ElasticSearch`_ |2.4.2 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
ElasticSearch is well-known proven log storage and we used it as a standalone
|
||||
node for collecting Kubernetes API logs and all other logs from containers across environment.
|
||||
For appropriate performance at 200 nodes lab we increased `ES_HEAP_SIZE` from default 1G to 10G
|
||||
in /etc/default/elasticsearch configuration file.
|
||||
|
||||
Elastic search and Kibana dashboard were installed with
|
||||
`deploy_elasticsearch_kibana.sh`_ deployment script.
|
||||
|
||||
Kibana
|
||||
^^^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Kibana`_ |4.5.4 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
We used Kibana as a main visualization tool for Elastic Search. We were able to create chart
|
||||
graphs based on K8S API logs analysis. Kibana was installed on a single separate node
|
||||
with a single dashboard representing K8S API Response time graph.
|
||||
|
||||
Dashboard settings:
|
||||
|
||||
:download:`Kibana_dashboard.json <configs/dashboards/Kibana_dashboard.json>`
|
||||
|
||||
Client side Software
|
||||
--------------------
|
||||
|
||||
Telegraf
|
||||
^^^^^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Telegraf`_ |v1.0.0-beta2-235-gbc14ac5 |
|
||||
| |git: openstack_stats |
|
||||
| |bc14ac5b9475a59504b463ad8f82ed810feed3ec |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
Telegraf was chosen as client-side metrics agent. It provides multiple ways to poll and calculate from variety of
|
||||
different sources. With regard to its plugin-driven nature, it takes data from different inputs and
|
||||
exposes calculated metrics in Prometheus format. We used forked version of Telegraf with custom patches to
|
||||
be able to utilize custom Openstack-input plugin:
|
||||
|
||||
- `GitHub Telegraf Fork`_
|
||||
- `Go SDK for OpenStack`_
|
||||
|
||||
Following automation scripts and configuration files were used to start Telegraf agent
|
||||
across environment nodes.
|
||||
|
||||
`Telegraf deployment and configuration files`_
|
||||
|
||||
Below you can see which plugins were used to obtain metrics.
|
||||
|
||||
Standart Plugins
|
||||
""""""""""""""""
|
||||
|
||||
.. code:: bash
|
||||
|
||||
inputs.cpu CPU
|
||||
inputs.disk
|
||||
inputs.diskio
|
||||
inputs.kernel
|
||||
inputs.mem
|
||||
inputs.processes
|
||||
inputs.swap
|
||||
inputs.system
|
||||
inputs.kernel_vmstat
|
||||
inputs.net
|
||||
inputs.netstat
|
||||
inputs.exec
|
||||
|
||||
Openstack input plugin
|
||||
""""""""""""""""""""""
|
||||
`inputs.openstack` custom plugin was used to gather the most of required Openstack-related metrics.
|
||||
|
||||
settings:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
interval = '40s'
|
||||
identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3"
|
||||
domain = "default"
|
||||
project = "admin"
|
||||
username = "admin"
|
||||
password = "password"
|
||||
|
||||
|
||||
`System.exec` plugin
|
||||
""""""""""""""""""""
|
||||
`system.exec` plugin was used to trigger scripts to poll
|
||||
and calculate all non-standard metrics.
|
||||
|
||||
common settings:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
interval = "15s"
|
||||
timeout = "30s"
|
||||
data_format = "influx"
|
||||
|
||||
commands:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
"/opt/telegraf/bin/list_openstack_processes.sh"
|
||||
"/opt/telegraf/bin/per_process_cpu_usage.sh"
|
||||
"/opt/telegraf/bin/numa_stat_per_pid.sh"
|
||||
"/opt/telegraf/bin/iostat_per_device.sh"
|
||||
"/opt/telegraf/bin/memory_bandwidth.sh"
|
||||
"/opt/telegraf/bin/network_tcp_queue.sh"
|
||||
"/opt/telegraf/bin/etcd_get_metrics.sh"
|
||||
"/opt/telegraf/bin/k8s_get_metrics.sh"
|
||||
"/opt/telegraf/bin/vmtime.sh"
|
||||
"/opt/telegraf/bin/osapitime.sh"
|
||||
|
||||
You can see full Telegraf configuration file and its custom input scripts in the
|
||||
section `Telegraf deployment and configuration files`_.
|
||||
|
||||
Heka
|
||||
^^^^
|
||||
|
||||
.. table::
|
||||
|
||||
+--------------------+-----------------------------------------+
|
||||
|Software |Version |
|
||||
+--------------------+-----------------------------------------+
|
||||
|`Heka`_ |0.10.0 |
|
||||
+--------------------+-----------------------------------------+
|
||||
|
||||
We chose Heka as log collecting agent for its wide variety of inputs
|
||||
(possibility to feed data from Docker socket), filters (custom shorthand SandBox filters in LUA language)
|
||||
and possibility to encode data for ElasticSearch.
|
||||
|
||||
With Heka agent started across environment servers we were able to send containers' logs to ElasticSearch
|
||||
server. With custom LUA filter we extracted K8S API data and convert it in appropriate format to
|
||||
visualize API timing counters (Average Response Time).
|
||||
|
||||
Heka deployment scripts and configuration file with LUA custom filter are in
|
||||
`Heka deployment and configuration`_ section.
|
||||
|
||||
Applications
|
||||
============
|
||||
|
||||
Kargo deployment script
|
||||
-----------------------
|
||||
|
||||
deploy_k8s_using_kargo.sh
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/deploy_k8s_using_kargo.sh
|
||||
:language: bash
|
||||
|
||||
CCP deployment and configuration files
|
||||
---------------------------------------
|
||||
|
||||
deploy-ccp.sh
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/deploy-ccp.sh
|
||||
:language: bash
|
||||
|
||||
ccp.yaml
|
||||
^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/ccp.yaml
|
||||
:language: yaml
|
||||
|
||||
configs.yaml
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/configs.yaml
|
||||
:language: yaml
|
||||
|
||||
topology.yaml
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/topology.yaml
|
||||
:language: yaml
|
||||
|
||||
repos.yaml
|
||||
^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/repos.yaml
|
||||
:language: yaml
|
||||
|
||||
versions.yaml
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/ccp/versions.yaml
|
||||
:language: yaml
|
||||
|
||||
Prometheus deployment and configuration files
|
||||
---------------------------------------------
|
||||
|
||||
Deployment scripts
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
deploy_prometheus.sh
|
||||
""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_prometheus.sh
|
||||
:language: bash
|
||||
|
||||
deploy-graf-prom.yaml
|
||||
"""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml
|
||||
:language: yaml
|
||||
|
||||
docker_prometheus.yaml
|
||||
""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/docker_prometheus.yaml
|
||||
:language: yaml
|
||||
|
||||
deploy_etcd_collect.sh
|
||||
""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh
|
||||
:language: bash
|
||||
|
||||
Configuration files
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
prometheus-kuber.yml.j2
|
||||
"""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2
|
||||
:language: bash
|
||||
|
||||
prometheus-system.yml.j2
|
||||
""""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2
|
||||
:language: bash
|
||||
|
||||
targets.yml.j2
|
||||
""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2
|
||||
:language: bash
|
||||
|
||||
Grafana dashboards configuration
|
||||
--------------------------------
|
||||
|
||||
:download:`Systems_nodes_statistics.json <configs/dashboards/Systems_nodes_statistics.json>`
|
||||
|
||||
:download:`Kubernetes_statistics.json <configs/dashboards/Kubernetes_statistics.json>`
|
||||
|
||||
:download:`ETCD.json <configs/dashboards/ETCD.json>`
|
||||
|
||||
:download:`OpenStack.json <configs/dashboards/OpenStack.json>`
|
||||
|
||||
ElasticSearch deployment script
|
||||
-------------------------------
|
||||
|
||||
deploy_elasticsearch_kibana.sh
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh
|
||||
:language: bash
|
||||
|
||||
Telegraf deployment and configuration files
|
||||
-------------------------------------------
|
||||
|
||||
deploy_telegraf.sh
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_telegraf.sh
|
||||
:language: bash
|
||||
|
||||
deploy-telegraf.yaml
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/deploy-telegraf.yaml
|
||||
:language: yaml
|
||||
|
||||
Telegraf system
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
telegraf-sys.conf
|
||||
"""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf
|
||||
:language: bash
|
||||
|
||||
Telegraf openstack
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
telegraf-openstack.conf.j2
|
||||
""""""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2
|
||||
:language: bash
|
||||
|
||||
Telegraf inputs scripts
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
list_openstack_processes.sh
|
||||
"""""""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh
|
||||
:language: bash
|
||||
|
||||
per_process_cpu_usage.sh
|
||||
""""""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh
|
||||
:language: bash
|
||||
|
||||
numa_stat_per_pid.sh
|
||||
""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh
|
||||
:language: bash
|
||||
|
||||
iostat_per_device.sh
|
||||
""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh
|
||||
:language: bash
|
||||
|
||||
memory_bandwidth.sh
|
||||
"""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh
|
||||
:language: bash
|
||||
|
||||
network_tcp_queue.sh
|
||||
""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh
|
||||
:language: bash
|
||||
|
||||
etcd_get_metrics.sh
|
||||
"""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh
|
||||
:language: bash
|
||||
|
||||
k8s_get_metrics.sh
|
||||
""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh
|
||||
:language: bash
|
||||
|
||||
vmtime.sh
|
||||
"""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh
|
||||
:language: bash
|
||||
|
||||
osapitime.sh
|
||||
""""""""""""
|
||||
|
||||
.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh
|
||||
:language: bash
|
||||
|
||||
Heka deployment and configuration
|
||||
---------------------------------
|
||||
|
||||
Deployment
|
||||
^^^^^^^^^^
|
||||
|
||||
deploy_heka.sh
|
||||
""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/elasticsearch-heka/deploy_heka.sh
|
||||
:language: bash
|
||||
|
||||
deploy-heka.yaml
|
||||
""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/elasticsearch-heka/deploy-heka.yaml
|
||||
:language: yaml
|
||||
|
||||
Configuration
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
00-hekad.toml.j2
|
||||
""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/elasticsearch-heka/heka/00-hekad.toml.j2
|
||||
:language: bash
|
||||
|
||||
kubeapi_to_int.lua.j2
|
||||
"""""""""""""""""""""
|
||||
|
||||
.. literalinclude:: configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2
|
||||
:language: bash
|
||||
|
||||
|
||||
.. references:
|
||||
|
||||
.. _Fuel-CCP-Installer: https://github.com/openstack/fuel-ccp-installer
|
||||
.. _Kargo: https://github.com/kubernetes-incubator/kargo.git
|
||||
.. _Fuel-CCP: https://github.com/openstack/fuel-ccp
|
||||
.. _Prometheus: https://prometheus.io/
|
||||
.. _Prometheus GitHub: https://github.com/prometheus/prometheus
|
||||
.. _Grafana: http://grafana.org/
|
||||
.. _ElasticSearch: https://www.elastic.co/products/elasticsearch
|
||||
.. _Kibana: https://www.elastic.co/products/kibana
|
||||
.. _Telegraf: https://www.influxdata.com/time-series-platform/telegraf/
|
||||
.. _GitHub Telegraf Fork: https://github.com/spjmurray/telegraf/tree/openstack_stats/plugins/inputs/openstack
|
||||
.. _Go SDK for OpenStack: https://github.com/rackspace/gophercloud/
|
||||
.. _Heka: https://hekad.readthedocs.io/en/v0.10.0/
|
||||
.. _K8S Ingress Resources: http://kubernetes.io/docs/user-guide/ingress/
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user