From c2772c2984e4f29aa6032725e4f7d8680a54ed19 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Fri, 8 Apr 2022 08:48:49 -0700 Subject: [PATCH] Gather performance data after tempest This makes us gather a bunch of consistent statistics after we run tempest that can be use to measure the impact of a given change. These are stable metrics such as "number of DB queries made" and "how much memory is each service using after a tempest run." Note that this will always run after devstack to generate the JSON file, but there are two things that control its completeness: - MYSQL_GATHER_PERFORMANCE must be enabled to get per-db stats - Unless tls-proxy is enabled, we will only get API stats for keystone Change-Id: Ie3b1504256dc1c9c6b59634e86fa98494bcb07b1 --- .zuul.yaml | 1 + lib/databases/mysql | 9 + playbooks/post.yaml | 3 + roles/capture-performance-data/README.rst | 25 +++ .../defaults/main.yaml | 3 + .../capture-performance-data/tasks/main.yaml | 15 ++ stackrc | 4 + tools/get-stats.py | 155 ++++++++++++++++++ 8 files changed, 215 insertions(+) create mode 100644 roles/capture-performance-data/README.rst create mode 100644 roles/capture-performance-data/defaults/main.yaml create mode 100644 roles/capture-performance-data/tasks/main.yaml create mode 100755 tools/get-stats.py diff --git a/.zuul.yaml b/.zuul.yaml index 0dda2624d2..329cb527f3 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -389,6 +389,7 @@ '{{ devstack_log_dir }}/worlddump-latest.txt': logs '{{ devstack_full_log}}': logs '{{ stage_dir }}/verify_tempest_conf.log': logs + '{{ stage_dir }}/performance.json': logs '{{ stage_dir }}/apache': logs '{{ stage_dir }}/apache_config': logs '{{ stage_dir }}/etc': logs diff --git a/lib/databases/mysql b/lib/databases/mysql index 0f45273d4b..6b3ea0287c 100644 --- a/lib/databases/mysql +++ b/lib/databases/mysql @@ -150,6 +150,15 @@ function configure_database_mysql { iniset -sudo $my_conf mysqld log-queries-not-using-indexes 1 fi + if [[ "$MYSQL_GATHER_PERFORMANCE" == "True" ]]; then + echo "enabling MySQL performance_schema items" + # Enable long query history + iniset -sudo $my_conf mysqld \ + performance-schema-consumer-events-statements-history-long TRUE + iniset -sudo $my_conf mysqld \ + performance_schema_events_stages_history_long_size 1000000 + fi + restart_service $MYSQL_SERVICE_NAME } diff --git a/playbooks/post.yaml b/playbooks/post.yaml index 9e66f20e9e..d8d5f6833c 100644 --- a/playbooks/post.yaml +++ b/playbooks/post.yaml @@ -20,6 +20,9 @@ roles: - export-devstack-journal - apache-logs-conf + # This should run as early as possible to make sure we don't skew + # the post-tempest results with other activities. + - capture-performance-data - devstack-project-conf # capture-system-logs should be the last role before stage-output - capture-system-logs diff --git a/roles/capture-performance-data/README.rst b/roles/capture-performance-data/README.rst new file mode 100644 index 0000000000..b7a37c223f --- /dev/null +++ b/roles/capture-performance-data/README.rst @@ -0,0 +1,25 @@ +Generate performance logs for staging + +Captures usage information from mysql, systemd, apache logs, and other +parts of the system and generates a performance.json file in the +staging directory. + +**Role Variables** + +.. zuul:rolevar:: stage_dir + :default: {{ ansible_user_dir }} + + The base stage directory + +.. zuul:rolevar:: devstack_conf_dir + :default: /opt/stack + + The base devstack destination directory + +.. zuul:rolevar:: debian_suse_apache_deref_logs + + The apache logs found in the debian/suse locations + +.. zuul:rolevar:: redhat_apache_deref_logs + + The apache logs found in the redhat locations diff --git a/roles/capture-performance-data/defaults/main.yaml b/roles/capture-performance-data/defaults/main.yaml new file mode 100644 index 0000000000..7bd79f4c4f --- /dev/null +++ b/roles/capture-performance-data/defaults/main.yaml @@ -0,0 +1,3 @@ +devstack_base_dir: /opt/stack +devstack_conf_dir: "{{ devstack_base_dir }}" +stage_dir: "{{ ansible_user_dir }}" diff --git a/roles/capture-performance-data/tasks/main.yaml b/roles/capture-performance-data/tasks/main.yaml new file mode 100644 index 0000000000..2d2cfe4b8b --- /dev/null +++ b/roles/capture-performance-data/tasks/main.yaml @@ -0,0 +1,15 @@ +- name: Generate statistics + shell: + executable: /bin/bash + cmd: | + source {{ devstack_conf_dir }}/stackrc + python3 {{ devstack_conf_dir }}/tools/get-stats.py \ + --db-user="$DATABASE_USER" \ + --db-pass="$DATABASE_PASSWORD" \ + --db-host="$DATABASE_HOST" \ + {{ apache_logs }} > {{ stage_dir }}/performance.json + vars: + apache_logs: >- + {% for i in debian_suse_apache_deref_logs.results | default([]) + redhat_apache_deref_logs.results | default([]) %} + --apache-log="{{ i.stat.path }}" + {% endfor %} diff --git a/stackrc b/stackrc index d22fa88373..c3254dcce4 100644 --- a/stackrc +++ b/stackrc @@ -193,6 +193,10 @@ ADDITIONAL_VENV_PACKAGES=${ADITIONAL_VENV_PACKAGES:-""} # (currently only implemented for MySQL backend) DATABASE_QUERY_LOGGING=$(trueorfalse False DATABASE_QUERY_LOGGING) +# This can be used to turn on various non-default items in the +# performance_schema that are of interest to us +MYSQL_GATHER_PERFORMANCE=$(trueorfalse True MYSQL_GATHER_PERFORMANCE) + # Set a timeout for git operations. If git is still running when the # timeout expires, the command will be retried up to 3 times. This is # in the format for timeout(1); diff --git a/tools/get-stats.py b/tools/get-stats.py new file mode 100755 index 0000000000..dc0bd0f9e5 --- /dev/null +++ b/tools/get-stats.py @@ -0,0 +1,155 @@ +#!/usr/bin/python3 + +import argparse +import datetime +import glob +import itertools +import json +import os +import psutil +import re +import socket +import subprocess +import sys +import pymysql + +# https://www.elastic.co/blog/found-crash-elasticsearch#mapping-explosion + + +def tryint(value): + try: + return int(value) + except (ValueError, TypeError): + return value + + +def get_service_stats(service): + stats = {'MemoryCurrent': 0} + output = subprocess.check_output(['/usr/bin/systemctl', 'show', service] + + ['-p%s' % stat for stat in stats]) + for line in output.decode().split('\n'): + if not line: + continue + stat, val = line.split('=') + stats[stat] = int(val) + + return stats + + +def get_services_stats(): + services = [os.path.basename(s) for s in + glob.glob('/etc/systemd/system/devstack@*.service')] + return [dict(service=service, **get_service_stats(service)) + for service in services] + + +def get_process_stats(proc): + cmdline = proc.cmdline() + if 'python' in cmdline[0]: + cmdline = cmdline[1:] + return {'cmd': cmdline[0], + 'pid': proc.pid, + 'args': ' '.join(cmdline[1:]), + 'rss': proc.memory_info().rss} + + +def get_processes_stats(matches): + me = os.getpid() + procs = psutil.process_iter() + + def proc_matches(proc): + return me != proc.pid and any( + re.search(match, ' '.join(proc.cmdline())) + for match in matches) + + return [ + get_process_stats(proc) + for proc in procs + if proc_matches(proc)] + + +def get_db_stats(host, user, passwd): + dbs = [] + db = pymysql.connect(host=host, user=user, password=passwd, + database='performance_schema', + cursorclass=pymysql.cursors.DictCursor) + with db: + with db.cursor() as cur: + cur.execute( + 'SELECT COUNT(*) AS queries,current_schema AS db FROM ' + 'events_statements_history_long GROUP BY current_schema') + for row in cur: + dbs.append({k: tryint(v) for k, v in row.items()}) + return dbs + + +def get_http_stats_for_log(logfile): + stats = {} + for line in open(logfile).readlines(): + m = re.search('"([A-Z]+) /([^" ]+)( HTTP/1.1)?" ([0-9]{3}) ([0-9]+)', + line) + if m: + method = m.group(1) + path = m.group(2) + status = m.group(4) + size = int(m.group(5)) + + try: + service, rest = path.split('/', 1) + except ValueError: + # Root calls like "GET /identity" + service = path + rest = '' + + stats.setdefault(service, {'largest': 0}) + stats[service].setdefault(method, 0) + stats[service][method] += 1 + stats[service]['largest'] = max(stats[service]['largest'], size) + + # Flatten this for ES + return [{'service': service, 'log': os.path.basename(logfile), + **vals} + for service, vals in stats.items()] + + +def get_http_stats(logfiles): + return list(itertools.chain.from_iterable(get_http_stats_for_log(log) + for log in logfiles)) + + +def get_report_info(): + return { + 'timestamp': datetime.datetime.now().isoformat(), + 'hostname': socket.gethostname(), + } + + +if __name__ == '__main__': + process_defaults = ['privsep', 'mysqld', 'erlang', 'etcd'] + parser = argparse.ArgumentParser() + parser.add_argument('--db-user', default='root', + help=('MySQL user for collecting stats ' + '(default: "root")')) + parser.add_argument('--db-pass', default=None, + help='MySQL password for db-user') + parser.add_argument('--db-host', default='localhost', + help='MySQL hostname') + parser.add_argument('--apache-log', action='append', default=[], + help='Collect API call stats from this apache log') + parser.add_argument('--process', action='append', + default=process_defaults, + help=('Include process stats for this cmdline regex ' + '(default is %s)' % ','.join(process_defaults))) + args = parser.parse_args() + + data = { + 'services': get_services_stats(), + 'db': args.db_pass and get_db_stats(args.db_host, + args.db_user, + args.db_pass) or [], + 'processes': get_processes_stats(args.process), + 'api': get_http_stats(args.apache_log), + 'report': get_report_info(), + } + + print(json.dumps(data, indent=2))