diff --git a/cluster_metrics/playbook-influx-telegraf.yml b/cluster_metrics/playbook-influx-telegraf.yml index 44b32dcb..e5e616f6 100644 --- a/cluster_metrics/playbook-influx-telegraf.yml +++ b/cluster_metrics/playbook-influx-telegraf.yml @@ -37,12 +37,37 @@ state: directory mode: "0755" - name: Drop telegraf plugin file(s) - copy: - src: "files/{{ item }}" - dest: "/opt/telegraf/{{ item }}" + template: + src: "templates/telegraf-plugins/{{ item.value.plugin_name }}" + dest: "/opt/telegraf/{{ item.value.plugin_name }}" mode: '0755' - with_items: - - kvm_virsh.py + with_dict: "{{ command_plugins }}" + when: + - item.value.when_group | bool + - item.value.group == inventory_hostname or + inventory_hostname in item.value.group | default([]) + - name: Add to command plugins + set_fact: + commands: "{{ commands | union(item.value.command) }}" + with_dict: "{{ command_plugins }}" + when: + - item.value.when_group | bool + - item.value.group == inventory_hostname or + inventory_hostname in item.value.group | default([]) + tags: + - always + - name: Store my_cnf + slurp: + src: "/root/.my.cnf" + register: _my_cnf + changed_when: false + - name: Copy my.cnf file into telegraf home + copy: + content: "{{ _my_cnf.content | b64decode }}" + dest: "/root/.my.cnf" + mode: "0440" + owner: "telegraf" + group: "telegraf" - name: Drop telegraf config file template: src: templates/telegraf.conf.j2 @@ -62,4 +87,14 @@ when: not telegraf_config | changed vars_files: - vars.yml - + vars: + commands : [] + command_plugins: + ironic: + plugin_name: "ironic_nodes.py" + command: + - "python /opt/telegraf/ironic_nodes.py" + group: "{{ groups['utility_all'][0] }}" + when_group: "{{ (groups['ironic_api'] | length) > 0 }}" + influx_telegraf_targets: + - "{{ influxdb_host|default(internal_lb_vip_address) }}:{{ influxdb_port }}" diff --git a/cluster_metrics/readme.rst b/cluster_metrics/readme.rst index e872c7b3..1000c5ba 100644 --- a/cluster_metrics/readme.rst +++ b/cluster_metrics/readme.rst @@ -54,19 +54,25 @@ Install InfluxDB Install Influx Telegraf +If you wish to install telegraf and point it at a specific target, or list of targets, set the ``influx_telegraf_targets`` +variable in the ``user_variables.yml`` file as a list containing all targets that telegraf should ship metrics to. + .. code-block:: bash openstack-ansible playbook-influx-telegraf.yml --forks 100 Install grafana -If you're proxy'ing grafana you will need to provide the full ``root_path`` when you run the playbook add the following ``-e grafana_root_url='https://cloud.something:8443/grafana/'`` +If you're proxy'ing grafana you will need to provide the full ``root_path`` when you run the playbook add the following +``-e grafana_root_url='https://cloud.something:8443/grafana/'`` .. code-block:: bash openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1' -Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory. +Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system +active and collecting metrics. Grafana will need some setup, however functional dashboards have been provided in the +``grafana-dashboards`` directory. Install Kapacitor diff --git a/cluster_metrics/templates/telegraf-plugins/ironic_nodes.py b/cluster_metrics/templates/telegraf-plugins/ironic_nodes.py new file mode 100644 index 00000000..a4aa8159 --- /dev/null +++ b/cluster_metrics/templates/telegraf-plugins/ironic_nodes.py @@ -0,0 +1,202 @@ +#!/bin/python +# +# Copyright 2016, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import dbm +import json +import os +import tempfile + +import MySQLdb as mysql +from MySQLdb.constants import FIELD_TYPE + +from openstack import connection as os_conn +from openstack import exceptions as os_exp + + +OS_AUTH_ARGS = { + 'auth_url': '{{ keystone_service_internalurl }}', + 'project_name': '{{ keystone_admin_tenant_name }}', + 'user_domain_name': '{{ openrc_os_domain_name }}', + 'project_domain_name': '{{ openrc_os_domain_name }}', + 'username': '{{ keystone_admin_user_name }}', + 'password': '{{ keystone_auth_admin_password }}', +} + + +OS_CONNECTION = {'conn': None} + + +def line_return(collection, metric_name): + system_states_return = '%s ' % metric_name + for key, value in collection.items(): + system_states_return += '%s=%s,' % (key.replace(' ', '_'), value) + else: + system_states_return = system_states_return.rstrip(',') + return system_states_return + + +def run_query(db_name, query): + db = mysql.connect( + db=db_name, + read_default_file=os.path.expanduser('~/.my.cnf'), + conv={FIELD_TYPE.LONG: int} + ) + try: + db.query(query) + output = db.store_result() + except mysql.OperationalError: + SystemExit('DB Query failed') + else: + return output.fetch_row(maxrows=0) + finally: + db.close() + + +def _connect(): + if OS_CONNECTION['conn']: + return OS_CONNECTION['conn'] + else: + OS_CONNECTION['conn'] = os_conn.Connection(**OS_AUTH_ARGS) + return OS_CONNECTION['conn'] + + +def consumer_db(consumer_id): + cdb = dbm.open(os.path.join(tempfile.gettempdir(), 'cdb.dbm'), 'c') + try: + project_name = cdb.get(consumer_id) + if not project_name: + conn = _connect() + project_info = conn.identity.get_project(consumer_id) + project_name = cdb[consumer_id] = project_info['name'] + except os_exp.ResourceNotFound: + return 'UNKNOWN' + else: + return project_name + finally: + cdb.close() + + +def consumer_limits(consumer_id): + conn = _connect() + url = conn.compute.session.get_endpoint( + interface='internal', + service_type='compute' + ) + quota_data = conn.compute.session.get( + url + '/os-quota-sets/' + consumer_id + ) + quota_data = quota_data.json() + return quota_data['quota_set']['instances'] + + +def main(): + return_data = [] + system_types = collections.Counter() + system_types_used = collections.Counter() + system_states = collections.Counter() + system_used = collections.Counter() + system_consumers = collections.Counter() + system_consumer_limits = dict() + system_consumer_map = dict() + + datas = run_query( + db_name='{{ ironic_galera_database|default("ironic") }}', + query="""select instance_uuid,properties,provision_state from nodes""" + ) + + for data in datas: + x = json.loads(data[1]) + system_states[data[-1]] += 1 + + node_consumed = data[0] + system_used['total'] += 1 + if node_consumed: + system_used['in_use'] += 1 + else: + system_used['available'] += 1 + + for capability in x['capabilities'].split(','): + if capability.startswith('system_type'): + system_type = capability.split(':')[-1] + system_types[system_type] += 1 + if node_consumed: + system_types_used[system_type] += 1 + _query = ( + """select project_id from instances where uuid='%s'""" + ) % node_consumed + _project_id = run_query( + db_name='{{ nova_galera_database|default("nova") }}', + query=_query + ) + project_id = _project_id[0][0] + project_name = consumer_db(project_id) + system_consumer_map[project_id] = project_name + system_consumers[project_name] += 1 + break + + if system_consumers: + for key, value in system_consumer_map.items(): + system_consumer_limits[value] = consumer_limits(key) + system_used['total_reserved'] = sum(system_consumer_limits.values()) + + return_data.append( + line_return( + collection=system_types, + metric_name='ironic_node_flavors' + ) + ) + + return_data.append( + line_return( + collection=system_types_used, + metric_name='ironic_node_flavors_used' + ) + ) + + return_data.append( + line_return( + collection=system_states, + metric_name='ironic_node_states' + ) + ) + + return_data.append( + line_return( + collection=system_used, + metric_name='ironic_nodes_used' + ) + ) + + return_data.append( + line_return( + collection=system_consumers, + metric_name='ironic_consumers' + ) + ) + + return_data.append( + line_return( + collection=system_consumer_limits, + metric_name='ironic_consumer_limits' + ) + ) + + for item in return_data: + print(item) + +if __name__ == '__main__': + main() diff --git a/cluster_metrics/templates/telegraf.conf.j2 b/cluster_metrics/templates/telegraf.conf.j2 index ec056c26..0341f04b 100644 --- a/cluster_metrics/templates/telegraf.conf.j2 +++ b/cluster_metrics/templates/telegraf.conf.j2 @@ -23,7 +23,7 @@ omit_hostname = false [[outputs.influxdb]] - urls = ["http://{{ internal_lb_vip_address }}:{{ influxdb_port }}"] + urls = [{{ influx_telegraf_targets | map('quote') | join(',') }}] database = "{{ influxdb_db_name }}" precision = "s" write_consistency = "any" @@ -33,6 +33,13 @@ [[inputs.system]] +{% if commands %} +[[inputs.exec]] + commands = [{{ commands | map('quote') | join(',') }}] + timeout = "15s" + data_format = "influx" +{% endif %} + {% if inventory_hostname in groups['all_containers'] %} [[inputs.net]] @@ -57,11 +64,5 @@ [[inputs.swap]] -{% if inventory_hostname in groups['nova_compute'] %} -[[inputs.exec]] - commands = ["/opt/telegraf/kvm_virsh.py"] - timeout = "15s" - data_format = "json" - name_prefix = "custom_" -{% endif %} {% endif %} +