Expose the carbon port (2003) when installing Carbon/Graphite and docs

* Expose carbon_cache_port to the config file and via the firewall
* Small cleanup job around the Graphite/Grafana playbook for consistency
* Docs for building a monitor host

Change-Id: I9e15eac3ce749c341ebf464f165f73c0a87212e5
This commit is contained in:
akrzos 2017-06-01 16:25:48 -04:00
parent 9668743887
commit c30e8b950c
8 changed files with 294 additions and 83 deletions

View File

@ -1,10 +1,10 @@
--- ---
# #
# Playbook to install grafana # Playbook to install Grafana
# #
- hosts: grafana - hosts: grafana
remote_user: root remote_user: root
roles: roles:
- { role: epel } - epel
- { role: grafana } - grafana

View File

@ -1,10 +1,10 @@
--- ---
# #
# Playbook to install graphite-web # Playbook to install Carbon and Graphite
# #
- hosts: graphite - hosts: graphite
remote_user: root remote_user: root
roles: roles:
- { role: epel } - epel
- { role: graphite } - graphite

View File

@ -220,12 +220,13 @@ graphite_web_docker_image: kambiz/graphite-web:0.9.15
grafana_docker_image: grafana/grafana:2.6.0 grafana_docker_image: grafana/grafana:2.6.0
######################################## ########################################
# Graphite Configuration # Carbon/Graphite Configuration
######################################## ########################################
# Graphite Server ip address (Collectd -> Graphite server) # Graphite Server ip address (Collectd -> Graphite server)
# you must fill out graphite_host prior to playbook execution # you must fill out graphite_host prior to playbook execution
graphite_host: graphite_host:
graphite_port: 80 graphite_port: 80
carbon_cache_port: 2003
# Graphite prefix / Cloud name used both with graphite and grafana dashboards # Graphite prefix / Cloud name used both with graphite and grafana dashboards
graphite_prefix: openstack graphite_prefix: openstack
# Graphite username and password for login on the dashboard # Graphite username and password for login on the dashboard

View File

@ -1,6 +1,18 @@
---
#
# Carbon and Graphite Handlers
#
- name: restart apache - name: restart apache
service: name=httpd state=restarted enabled=true service:
name: httpd
state: restarted
enabled: true
become: true become: true
- name: restart carbon-cache - name: restart carbon-cache
service: name=carbon-cache state=restarted enabled=true service:
name: carbon-cache
state: restarted
enabled: true
become: true become: true

View File

@ -4,7 +4,9 @@
# #
- name: Install graphite rpms - name: Install graphite rpms
yum: name={{ item }} state=present yum:
name: "{{ item }}"
state: present
become: true become: true
with_items: with_items:
- graphite-web - graphite-web
@ -18,11 +20,11 @@
- name: Copy setup-graphite-db.exp - name: Copy setup-graphite-db.exp
copy: copy:
src=setup-graphite-db.exp src: setup-graphite-db.exp
dest=/root/setup-graphite-db.exp dest: /root/setup-graphite-db.exp
owner=root owner: root
group=root group: root
mode=0755 mode: 0755
become: true become: true
- name: Create initial graphite db - name: Create initial graphite db
@ -30,18 +32,18 @@
become: true become: true
when: graphite_db_installed.rc != 0 when: graphite_db_installed.rc != 0
notify: notify:
- restart apache - restart apache
- name: Setup httpd graphite-web config - name: Setup httpd graphite-web config
template: template:
src=graphite-web.conf.j2 src: graphite-web.conf.j2
dest=/etc/httpd/conf.d/graphite-web.conf dest: /etc/httpd/conf.d/graphite-web.conf
owner=root owner: root
group=root group: root
mode=0644 mode: 0644
become: true become: true
notify: notify:
- restart apache - restart apache
### begin firewall ### ### begin firewall ###
# we need TCP/80 open # we need TCP/80 open
@ -68,6 +70,12 @@
register: firewalld_graphite_port_exists register: firewalld_graphite_port_exists
no_log: true no_log: true
- name: (carbon) Determine if TCP/{{carbon_cache_port}} is already active
shell: firewall-cmd --list-ports | egrep -q "^{{carbon_cache_port}}/tcp"
ignore_errors: true
register: firewalld_carbon_cache_port_exists
no_log: true
# add firewall rule via firewall-cmd # add firewall rule via firewall-cmd
- name: (graphite-web) Add firewall rule for TCP/{{graphite_port}} (firewalld) - name: (graphite-web) Add firewall rule for TCP/{{graphite_port}} (firewalld)
command: "{{ item }}" command: "{{ item }}"
@ -78,6 +86,16 @@
become: true become: true
when: firewalld_in_use.rc == 0 and firewalld_is_active.rc == 0 and firewalld_graphite_port_exists.rc != 0 when: firewalld_in_use.rc == 0 and firewalld_is_active.rc == 0 and firewalld_graphite_port_exists.rc != 0
# add firewall rule via firewall-cmd
- name: (carbon) Add firewall rule for TCP/{{carbon_cache_port}} (firewalld)
command: "{{ item }}"
with_items:
- firewall-cmd --zone=public --add-port={{carbon_cache_port}}/tcp --permanent
- firewall-cmd --reload
ignore_errors: true
become: true
when: firewalld_in_use.rc == 0 and firewalld_is_active.rc == 0 and firewalld_carbon_cache_port_exists.rc != 0
# iptables-services # iptables-services
- name: (graphite-web) check firewall rules for TCP/{{graphite_port}} (iptables-services) - name: (graphite-web) check firewall rules for TCP/{{graphite_port}} (iptables-services)
shell: grep "dport {{graphite_port}} \-j ACCEPT" /etc/sysconfig/iptables | wc -l shell: grep "dport {{graphite_port}} \-j ACCEPT" /etc/sysconfig/iptables | wc -l
@ -86,6 +104,13 @@
failed_when: iptables_graphite_port_exists == 127 failed_when: iptables_graphite_port_exists == 127
no_log: true no_log: true
- name: (carbon) check firewall rules for TCP/{{carbon_cache_port}} (iptables-services)
shell: grep "dport {{carbon_cache_port}} \-j ACCEPT" /etc/sysconfig/iptables | wc -l
ignore_errors: true
register: iptables_carbon_cache_port_exists
failed_when: iptables_carbon_cache_port_exists == 127
no_log: true
- name: (graphite-web) Add firewall rule for TCP/{{graphite_port}} (iptables-services) - name: (graphite-web) Add firewall rule for TCP/{{graphite_port}} (iptables-services)
lineinfile: lineinfile:
dest: /etc/sysconfig/iptables dest: /etc/sysconfig/iptables
@ -96,6 +121,16 @@
when: firewalld_in_use.rc != 0 and firewalld_is_active.rc != 0 and iptables_graphite_port_exists.stdout|int == 0 when: firewalld_in_use.rc != 0 and firewalld_is_active.rc != 0 and iptables_graphite_port_exists.stdout|int == 0
register: iptables_needs_restart register: iptables_needs_restart
- name: (carbon) Add firewall rule for TCP/{{carbon_cache_port}} (iptables-services)
lineinfile:
dest: /etc/sysconfig/iptables
line: '-A INPUT -p tcp -m tcp --dport {{carbon_cache_port}} -j ACCEPT'
regexp: '^INPUT -i lo -j ACCEPT'
insertbefore: '-A INPUT -i lo -j ACCEPT'
backup: yes
when: firewalld_in_use.rc != 0 and firewalld_is_active.rc != 0 and iptables_carbon_cache_port_exists.stdout|int == 0
register: iptables_needs_restart
- name: (graphite-web) Restart iptables-services for TCP/{{graphite_port}} (iptables-services) - name: (graphite-web) Restart iptables-services for TCP/{{graphite_port}} (iptables-services)
shell: systemctl restart iptables.service shell: systemctl restart iptables.service
ignore_errors: true ignore_errors: true
@ -103,57 +138,51 @@
### end firewall ### ### end firewall ###
# Start graphite-web service
- name: Setup httpd service - name: Setup httpd service
service: name=httpd state=started enabled=true service:
name: httpd
state: started
enabled: true
become: true become: true
# remove silly welcome from apache (if it exists)
- name: Remove httpd welcome config - name: Remove httpd welcome config
become: true become: true
file: path=/etc/httpd/conf.d/welcome.conf state=absent file:
path: /etc/httpd/conf.d/welcome.conf
state: absent
notify: notify:
- restart apache - restart apache
#
# setup the python-carbon service
#
- name: Setup carbon-cache service - name: Setup carbon-cache service
service: name=carbon-cache state=started enabled=true service:
name: carbon-cache
state: started
enabled: true
become: true become: true
- name: copy carbon storage schema config - name: Copy Carbon storage scheme and aggregation config files
copy: copy:
src=storage-schemas.conf src: "{{item.src}}"
dest=/etc/carbon/storage-schemas.conf dest: "{{item.dest}}"
owner=root owner: root
group=root group: root
mode=0644 mode: 0644
become: true
with_items:
- src: storage-schemas.conf
dest: /etc/carbon/storage-schemas.conf
- src: storage-aggregation.conf
dest: /etc/carbon/storage-aggregation.conf
notify:
- restart carbon-cache
- name: Configure carbon.conf
template:
src: carbon.conf.j2
dest: /etc/carbon/carbon.conf
owner: root
group: root
mode: 0644
become: true become: true
notify: notify:
- restart carbon-cache - restart carbon-cache
- name: copy carbon storage aggregation config
copy:
src=storage-aggregation.conf
dest=/etc/carbon/storage-aggregation.conf
owner=root
group=root
mode=0644
become: true
notify:
- restart carbon-cache
- name: copy carbon config
copy:
src=carbon.conf
dest=/etc/carbon/carbon.conf
owner=root
group=root
mode=0644
become: true
notify:
- restart carbon-cache

View File

@ -8,7 +8,7 @@
# Defaults to ../ # Defaults to ../
# GRAPHITE_CONF_DIR - Configuration directory (where this file lives). # GRAPHITE_CONF_DIR - Configuration directory (where this file lives).
# Defaults to $GRAPHITE_ROOT/conf/ # Defaults to $GRAPHITE_ROOT/conf/
# GRAPHITE_STORAGE_DIR - Storage directory for whipser/rrd/log/pid files. # GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files.
# Defaults to $GRAPHITE_ROOT/storage/ # Defaults to $GRAPHITE_ROOT/storage/
# #
# To change other directory paths, add settings to this file. The following # To change other directory paths, add settings to this file. The following
@ -37,15 +37,14 @@ CONF_DIR = /etc/carbon/
LOG_DIR = /var/log/carbon/ LOG_DIR = /var/log/carbon/
PID_DIR = /var/run/ PID_DIR = /var/run/
# Enable daily log rotation. If disabled, a kill -HUP can be used after a manual rotate # Enable daily log rotation. If disabled, carbon will automatically re-open
# the file if it's rotated out of place (e.g. by logrotate daemon)
ENABLE_LOGROTATION = True ENABLE_LOGROTATION = True
# Specify the user to drop privileges to # Specify the user to drop privileges to
# If this is blank carbon runs as the user that invokes it # If this is blank carbon runs as the user that invokes it
# This user must have write access to the local data directory # This user must have write access to the local data directory
USER = carbon USER = carbon
# #
# NOTE: The above settings must be set under [relay] and [aggregator] # NOTE: The above settings must be set under [relay] and [aggregator]
# to take effect for those daemons as well # to take effect for those daemons as well
@ -69,16 +68,24 @@ MAX_UPDATES_PER_SECOND = 500
# MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 # MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000
# Softly limits the number of whisper files that get created each minute. # Softly limits the number of whisper files that get created each minute.
# Setting this value low (like at 50) is a good way to ensure your graphite # Setting this value low (e.g. 50) is a good way to ensure that your carbon
# system will not be adversely impacted when a bunch of new metrics are # system will not be adversely impacted when a bunch of new metrics are
# sent to it. The trade off is that it will take much longer for those metrics' # sent to it. The trade off is that any metrics received in excess of this
# database files to all get created and thus longer until the data becomes usable. # value will be silently dropped, and the whisper file will not be created
# Setting this value high (like "inf" for infinity) will cause graphite to create # until such point as a subsequent metric is received and fits within the
# the files quickly but at the risk of slowing I/O down considerably for a while. # defined rate limit. Setting this value high (like "inf" for infinity) will
# cause carbon to create the files quickly but at the risk of increased I/O.
MAX_CREATES_PER_MINUTE = 2000 MAX_CREATES_PER_MINUTE = 2000
# Set the interface and port for the line (plain text) listener. Setting the
# interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to
# disable this listener if it is not required.
LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2003 LINE_RECEIVER_PORT = {{carbon_cache_port}}
# Set the TCP backlog for the listen socket created by the line receiver. You
# shouldn't change this unless you know what you're doing.
# LINE_RECEIVER_BACKLOG = 1024
# Set this to True to enable the UDP listener. By default this is off # Set this to True to enable the UDP listener. By default this is off
# because it is very common to run multiple carbon daemons and managing # because it is very common to run multiple carbon daemons and managing
@ -87,9 +94,16 @@ ENABLE_UDP_LISTENER = False
UDP_RECEIVER_INTERFACE = 0.0.0.0 UDP_RECEIVER_INTERFACE = 0.0.0.0
UDP_RECEIVER_PORT = 2003 UDP_RECEIVER_PORT = 2003
# Set the interface and port for the pickle listener. Setting the interface to
# 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this
# listener if it is not required.
PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2004 PICKLE_RECEIVER_PORT = 2004
# Set the TCP backlog for the listen socket created by the pickle receiver. You
# shouldn't change this unless you know what you're doing.
# PICKLE_RECEIVER_BACKLOG = 1024
# Set to false to disable logging of successful connections # Set to false to disable logging of successful connections
LOG_LISTENER_CONNECTIONS = True LOG_LISTENER_CONNECTIONS = True
@ -101,6 +115,10 @@ USE_INSECURE_UNPICKLER = False
CACHE_QUERY_INTERFACE = 0.0.0.0 CACHE_QUERY_INTERFACE = 0.0.0.0
CACHE_QUERY_PORT = 7002 CACHE_QUERY_PORT = 7002
# Set the TCP backlog for the listen socket created by the cache query
# listener. You shouldn't change this unless you know what you're doing.
# CACHE_QUERY_BACKLOG = 1024
# Set this to False to drop datapoints received after the cache # Set this to False to drop datapoints received after the cache
# reaches MAX_CACHE_SIZE. If this is True (the default) then sockets # reaches MAX_CACHE_SIZE. If this is True (the default) then sockets
# over which metrics are received will temporarily stop accepting # over which metrics are received will temporarily stop accepting
@ -224,7 +242,6 @@ WHISPER_FALLOCATE_CREATE = True
[relay] [relay]
USER = carbon
LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2013 LINE_RECEIVER_PORT = 2013
PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_INTERFACE = 0.0.0.0
@ -253,6 +270,14 @@ RELAY_METHOD = rules
# datapoint to more than one machine. # datapoint to more than one machine.
REPLICATION_FACTOR = 1 REPLICATION_FACTOR = 1
# For REPLICATION_FACTOR >=2, set DIVERSE_REPLICAS to True to guarantee replicas
# across distributed hosts. With this setting disabled, it's possible that replicas
# may be sent to different caches on the same host. This has been the default
# behavior since introduction of 'consistent-hashing' relay method.
# Note that enabling this on an existing pre-0.9.14 cluster will require rebalancing
# your metrics across the cluster nodes using a tool like Carbonate.
#DIVERSE_REPLICAS = False
# This is a list of carbon daemons we will send any relayed or # This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single # generated metrics to. The default provided would send to a single
# carbon-cache instance on the default port. However if you # carbon-cache instance on the default port. However if you
@ -273,13 +298,22 @@ DESTINATIONS = 127.0.0.1:2004
# This defines the maximum "message size" between carbon daemons. # This defines the maximum "message size" between carbon daemons.
# You shouldn't need to tune this unless you really know what you're doing. # You shouldn't need to tune this unless you really know what you're doing.
MAX_DATAPOINTS_PER_MESSAGE = 2000 MAX_DATAPOINTS_PER_MESSAGE = 500
MAX_QUEUE_SIZE = 400000 MAX_QUEUE_SIZE = 10000
# This is the percentage that the queue must be empty before it will accept
# more messages. For a larger site, if the queue is very large it makes sense
# to tune this to allow for incoming stats. So if you have an average
# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense
# to allow stats to start flowing when you've cleared the queue to 95% since
# you should have space to accommodate the next minute's worth of stats
# even before the relay incrementally clears more of the queue
QUEUE_LOW_WATERMARK_PCT = 0.8
# Set this to False to drop datapoints when any send queue (sending datapoints # Set this to False to drop datapoints when any send queue (sending datapoints
# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the
# default) then sockets over which metrics are received will temporarily stop accepting # default) then sockets over which metrics are received will temporarily stop accepting
# data until the send queues fall below 80% MAX_QUEUE_SIZE. # data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE.
USE_FLOW_CONTROL = True USE_FLOW_CONTROL = True
# Set this to True to enable whitelisting and blacklisting of metrics in # Set this to True to enable whitelisting and blacklisting of metrics in
@ -295,7 +329,6 @@ USE_FLOW_CONTROL = True
[aggregator] [aggregator]
USER = carbon
LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2023 LINE_RECEIVER_PORT = 2023
@ -307,8 +340,14 @@ LOG_LISTENER_CONNECTIONS = True
# If set true, metric received will be forwarded to DESTINATIONS in addition to # If set true, metric received will be forwarded to DESTINATIONS in addition to
# the output of the aggregation rules. If set false the carbon-aggregator will # the output of the aggregation rules. If set false the carbon-aggregator will
# only ever send the output of aggregation. # only ever send the output of aggregation. Default value is set to false and will not forward
FORWARD_ALL = True FORWARD_ALL = False
# Filenames of the configuration files to use for this instance of aggregator.
# Filenames are relative to CONF_DIR.
#
# AGGREGATION_RULES = aggregation-rules.conf
# REWRITE_RULES = rewrite-rules.conf
# This is a list of carbon daemons we will send any relayed or # This is a list of carbon daemons we will send any relayed or
# generated metrics to. The default provided would send to a single # generated metrics to. The default provided would send to a single

View File

@ -373,3 +373,133 @@ will be available via http on different ports.
from multiple Browbeat users at the same time will introduce variation into from multiple Browbeat users at the same time will introduce variation into
resulting performance data if the machine on which Browbeat is installed is resulting performance data if the machine on which Browbeat is installed is
resource constrained. resource constrained.
==================================
Additional Components Installation
==================================
Install Monitoring Host (Carbon/Graphite/Grafana)
-------------------------------------------------
A monitoring host exposes System and Application performance metrics to the
Browbeat user via Grafana. It helps expose what may be causing your bottleneck
when you encounter a performance issue.
Prerequisites
~~~~~~~~~~~~~
Hardware
* Baremetal or Virtual Machine
* SSD storage
Operating System
* RHEL 7
* CentOS 7
Repos
* Red Hat Enterprise Linux 7Server - x86_64 - Server
* Red Hat Enterprise Linux 7Server - x86_64 - Server Optional
RPM
* epel-release
* ansible
* git
Installation
~~~~~~~~~~~~
1. Deploy machine (RHEL7 is used in this example)
2. Install RPMS
::
[root@dhcp23-93 ~]# yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
...
[root@dhcp23-93 ~]# yum install -y ansible git
3. Clone Browbeat
::
[root@dhcp23-93 ~]# git clone https://github.com/openstack/browbeat.git
Cloning into 'browbeat'...
remote: Counting objects: 7533, done.
remote: Compressing objects: 100% (38/38), done.
remote: Total 7533 (delta 30), reused 36 (delta 23), pack-reused 7469
Receiving objects: 100% (7533/7533), 5.26 MiB | 5.79 MiB/s, done.
Resolving deltas: 100% (4330/4330), done.
4. Add a hosts file into ansible directory
::
[root@dhcp23-93 ~]# cd browbeat/ansible/
[root@dhcp23-93 ansible]# vi hosts
Content of hosts file should be following
::
[graphite]
localhost
[grafana]
localhost
5. Setup SSH config, SSH key and exchange for Ansible
::
[root@dhcp23-93 ansible]# touch ssh-config
[root@dhcp23-93 ansible]# ssh-keygen
Generating public/private rsa key pair.
...
[root@dhcp23-93 ansible]# ssh-copy-id root@localhost
...
6. Edit install variables
::
[root@dhcp23-93 ansible]# vi install/group_vars/all.yml
Depending on the environment you may need to edit more than just the following
variables - graphite_host and grafana_host
7. Install Carbon and Graphite via Ansible playbook
::
[root@dhcp23-93 ansible]# ansible-playbook -i hosts install/graphite.yml
...
8. Install Grafana via Ansible playbook
::
[root@dhcp23-93 ansible]# ansible-playbook -i hosts install/grafana.yml
...
9. Install Grafana dashboards via Ansible playbook
::
[root@dhcp23-93 ansible]# ansible-playbook -i hosts install/grafana-dashboards.yml -e 'cloud_dashboards=false'
...
Now navigate to http://monitoring-host-address:3000 to verify Grafana is
installed, the Graphite data source exists and custom dashboards are uploaded.
You can now point other clouds at this host in order to view System and
Application performance metrics. Depending on the number of clouds and
machines pointed at your monitoring server, you may need to add more disk IO
capacity, disk storage or carbon-cache+carbon-relay processes depending
entirely on the number of metrics and your environments capacity. There is a
Graphite dashboard included and it is recommended to install collectd on your
monitoring host such that you can see if you hit resource issues with your
monitoring host.

View File

@ -1,6 +1,6 @@
===================== ============
Browbeat Introduction Introduction
===================== ============
This started as a project to help determine the number of database This started as a project to help determine the number of database
connections a given OpenStack deployment uses via stress tests. It has connections a given OpenStack deployment uses via stress tests. It has