From aa3d640122e633493f535e7099445be3ea1a98d1 Mon Sep 17 00:00:00 2001 From: Scott Little Date: Wed, 1 Aug 2018 12:29:58 -0400 Subject: [PATCH] Relocate parsers to stx-integ/tools/engtools/parsers Move content from stx-utils into stx-integ or stx-update Packages will be relocated to stx-update: enable-dev-patch extras stx-integ: config-files/ io-scheduler filesystem/ filesystem-scripts grub/ grubby logging/ logmgmt tools/ collector monitor-tools tools/engtools/ hostdata-collectors parsers utilities/ build-info branding (formerly wrs-branding) platform-util Change-Id: I5613b2a2240f723295fbbd2783786922ef5d0f8b Story: 2002801 Task: 22687 Signed-off-by: Scott Little --- tools/engtools/parsers/README | 1 + .../parsers/common/cleanup-uncompressed.sh | 33 + .../engtools/parsers/common/csv-to-influx.py | 474 ++++++ .../engtools/parsers/common/download-data.sh | 43 + tools/engtools/parsers/common/parse-all.sh | 376 +++++ .../parsers/common/parse-controllers.sh | 52 + tools/engtools/parsers/common/parse-daily.sh | 115 ++ .../engtools/parsers/common/parse-postgres.sh | 209 +++ .../parsers/common/parse-rabbitmq-queue.sh | 57 + .../engtools/parsers/common/parse-rabbitmq.sh | 161 ++ .../engtools/parsers/common/parse-schedtop.sh | 99 ++ tools/engtools/parsers/common/parse-util.sh | 26 + tools/engtools/parsers/core/parse_filestats | 896 ++++++++++++ tools/engtools/parsers/core/parse_iostat | 228 +++ tools/engtools/parsers/core/parse_memstats | 1009 +++++++++++++ tools/engtools/parsers/core/parse_netstats | 229 +++ tools/engtools/parsers/core/parse_postgres | 202 +++ tools/engtools/parsers/core/parse_schedtop | 1301 +++++++++++++++++ tools/engtools/parsers/host.conf | 88 ++ tools/engtools/parsers/lab.conf | 100 ++ .../parsers/large_office/download-computes.sh | 25 + .../parsers/large_office/parse-computes.sh | 71 + .../parsers/large_office/parse-everything.sh | 14 + .../parsers/large_office/parse-vswitch.sh | 68 + 24 files changed, 5877 insertions(+) create mode 100644 tools/engtools/parsers/README create mode 100755 tools/engtools/parsers/common/cleanup-uncompressed.sh create mode 100755 tools/engtools/parsers/common/csv-to-influx.py create mode 100755 tools/engtools/parsers/common/download-data.sh create mode 100755 tools/engtools/parsers/common/parse-all.sh create mode 100755 tools/engtools/parsers/common/parse-controllers.sh create mode 100755 tools/engtools/parsers/common/parse-daily.sh create mode 100755 tools/engtools/parsers/common/parse-postgres.sh create mode 100755 tools/engtools/parsers/common/parse-rabbitmq-queue.sh create mode 100755 tools/engtools/parsers/common/parse-rabbitmq.sh create mode 100755 tools/engtools/parsers/common/parse-schedtop.sh create mode 100755 tools/engtools/parsers/common/parse-util.sh create mode 100755 tools/engtools/parsers/core/parse_filestats create mode 100755 tools/engtools/parsers/core/parse_iostat create mode 100755 tools/engtools/parsers/core/parse_memstats create mode 100755 tools/engtools/parsers/core/parse_netstats create mode 100755 tools/engtools/parsers/core/parse_postgres create mode 100755 tools/engtools/parsers/core/parse_schedtop create mode 100644 tools/engtools/parsers/host.conf create mode 100644 tools/engtools/parsers/lab.conf create mode 100755 tools/engtools/parsers/large_office/download-computes.sh create mode 100755 tools/engtools/parsers/large_office/parse-computes.sh create mode 100755 tools/engtools/parsers/large_office/parse-everything.sh create mode 100755 tools/engtools/parsers/large_office/parse-vswitch.sh diff --git a/tools/engtools/parsers/README b/tools/engtools/parsers/README new file mode 100644 index 000000000..5400b89a8 --- /dev/null +++ b/tools/engtools/parsers/README @@ -0,0 +1 @@ +SE tools wiki: http://wiki.wrs.com/PBUeng/InformationAboutSEToolsAndDataAnalysis diff --git a/tools/engtools/parsers/common/cleanup-uncompressed.sh b/tools/engtools/parsers/common/cleanup-uncompressed.sh new file mode 100755 index 000000000..3bd1fa21f --- /dev/null +++ b/tools/engtools/parsers/common/cleanup-uncompressed.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +#Copyright (c) 2016 Wind River Systems, Inc. +# +#SPDX-License-Identifier: Apache-2.0 +# +# This script removes uncompressed file. It can save a huge amount of disk space +# on the analysis server. Run this script after the very last time the data is parsed +# and BEFORE running parse-daily.sh script. +# If it is run after each intermediary parse, the download-data.sh script will download the +# uncompressed files again. + +if [ ! -f lab.conf ]; then + echo "Lab configuration file is missing." + echo "See http://wiki.wrs.com/PBUeng/TitaniumServerSysengToolsAndDataAnalysis for more info." + exit 1 +fi + +source ./lab.conf +YEAR=`date +'%Y'` + +files="${FILE_LIST// /, }" +read -p "Are you sure you want to remove all uncompressed $files files? [Y/N]: " -n 1 -r +echo +if [[ $REPLY =~ ^[Y]$ ]] +then + for FILE in ${FILE_LIST}; do + rm -v */*_${YEAR}-*${FILE} + done +else + echo "Remove request cancelled." +fi + diff --git a/tools/engtools/parsers/common/csv-to-influx.py b/tools/engtools/parsers/common/csv-to-influx.py new file mode 100755 index 000000000..54eeaff13 --- /dev/null +++ b/tools/engtools/parsers/common/csv-to-influx.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2017 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +This script is for parsing post-data analysis. It takes the csv files generated from the parser scripts and imports +the data to an influx database. All influx information should be specified in the lab.conf file. Please see the wiki +for more details. +""" + +import os +import sys +import time +import datetime +from optparse import OptionParser +from multiprocessing import Pool + + +# command line arguments +def init(): + parser = OptionParser() + parser.add_option("-a", "--all", dest="parse_all", action="store_true", default=False, help="use this option to parse all csv files for all nodes specified within lab.conf") + parser.add_option("-n", "--node", dest="node_list", action="append", type="string", help="the specific node(s) to be parsed, otherwise all nodes within lab.conf will be parsed") + parser.add_option("-f", "--file", dest="file_list", action="append", type="string", help="the specific csv file(s) to be parsed. Must use with the -n option. Ex: -n controller-0 -f postgres-conns.csv") + parser.add_option("-p", "--postgres_svc", dest="postgres_list", action="append", type="string", help="use this option to parse postgres CSV files given specific services. Ex: -p nova") + parser.add_option("-b", "--batch-size", dest="batch_size", action="store", type="int", default="100", help="Influx accepts data in batches. Use this option to change the batch size from the default value of 100. Note that Influx can timeout if the batch size is to large") + (options, args) = parser.parse_args() + if len(sys.argv[1:]) == 0: + parser.print_help() + sys.exit(0) + else: + return options + + +# converts given UTC time into epoch time +def convertTime(file, node, start, lc, utcTime): + try: + # diskstats csv requires special work as no timestamp is provided + if file.startswith("diskstats"): + t = " ".join(start) + pattern = '%Y-%m-%d %H%M' + epoch = int(time.mktime(time.strptime(t, pattern))) + # add 15 minutes to current timestamp + epoch += 900 * lc + else: + if utcTime.endswith("AM"): + pattern = '%m/%d/%Y %H:%M:%S' + epoch = int(time.mktime(time.strptime(utcTime[:19], pattern))) + elif utcTime.endswith("PM"): + tmp = int(utcTime[11:13]) + if tmp < 12: + tmp += 12 + str1 = utcTime[:11] + str2 = utcTime[13:19] + utcTime = str1 + str(tmp) + str2 + pattern = '%m/%d/%Y %H:%M:%S' + epoch = int(time.mktime(time.strptime(utcTime, pattern))) + elif file.startswith("memstats") or file.startswith("filestats"): + pattern = '%Y-%m-%d %H:%M:%S' + epoch = int(time.mktime(time.strptime(utcTime[:19], pattern))) + else: + pattern = '%Y-%m-%d %H:%M:%S.%f' + epoch = int(time.mktime(time.strptime(utcTime[:23], pattern))) + return str(epoch) + except Exception as e: + appendToFile("/tmp/csv-to-influx.log", "Error: Issue converting time for {} for {}. Please check the csv and re-parse as some data may be incorrect\n-{}".format(file, node, e.message)) + return None + + +# go through each node folder to parse csv files +def processFiles(path, node, options, influx_info): + prefixes = ["postgres-conns", "postgres", "memtop", "occtop", "iostat", "netstats", "rabbitmq", "schedtop", "vswitch", "filestats-summary", "memstats-summary", "diskstats"] + if options.file_list is None: + for file in os.listdir(path): + if file.endswith(".csv"): + if file.startswith(tuple(prefixes)): + if options.parse_all is True or options.node_list is not None: + parse(path, file, node, options, influx_info) + elif options.postgres_list is not None: + for svc in options.postgres_list: + if svc in list(file.split("_")): + parse(path, file, node, options, influx_info) + else: + continue + # if -f option is used + elif options.file_list is not None: + for file in options.file_list: + parse(path, file, node, options, influx_info) + + # let the log know when a thread has finished parsing a folder + appendToFile("/tmp/csv-to-influx.log", "-Process for {} finished parsing at {}".format(node, datetime.datetime.utcnow())) + + +# parse the csv files and add data to influx +# needs to be cleaned up +def parse(path, file, node, options, influx_info): + file_loc = os.path.join(path, file) + # until able to access the file + while True: + if os.access(file_loc, os.R_OK): + try: + with open(file_loc, "r") as f: + file_name = file.replace("-", "_").replace(".csv", "").replace("_{}".format(node.replace("-", "_")), + "").strip("\n") + appendToFile("/tmp/csv-to-influx.log", "Parsing {} for {}".format(file_name, node)) + header = f.readline().split(",") + # vswitch CSV files have no headers... + if file_name.startswith("vswitch"): + if file_name.replace("vswitch_", "").split("_")[0] == "engine": + header = "date/time,id,cpuid,rx-packets,tx-packets,tx-disabled,tx-overflow,rx-discard,tx-discard,usage".split( + ",") + elif file_name.replace("vswitch_", "").split("_")[0] == "interface": + header = "date/time,rx-packets,tx-packets,rx-bytes,tx-bytes,tx-errors,rx-errors,tx-discards,rx-discards,rx-floods,rx-no-vlan".split( + ",") + elif file_name.replace("vswitch_", "").split("_")[0] == "port": + header = "date/time,rx-packets,tx-packets,rx-bytes,tx-bytes,tx-errors,rx-errors,rx-nombuf".split( + ",") + elif file_name.startswith("memstats"): + if header[0] != "Date": + header = "date/time,rss,vrz" + influx_string = "" + measurement = "" + tag_names = ["node"] + init_tags = [node] + line_count = 0 + batch = 0 + start_time = "" # used for diskstats + bad_string = False + # set tag information needed for influx. Each file needs different things + if file_name.startswith("postgres_conns"): + measurement = "postgres_connections" + elif file_name.startswith("postgres"): + if file_name.endswith("_size"): + measurement = "postgres_db_size" + service = file_name.replace("postgres_", "").replace("_size", "") + if service == "size": + service = "postgres" + tag_names = ["node", "service"] + init_tags = [node, service] + else: + measurement = "postgres_svc_stats" + service = file_name.replace("postgres_", "").split("_")[0] + tag_names = ["node", "service", "schema", "table"] + init_tags = [node, service] + elif file_name.startswith("memtop"): + if file_name == "memtop_detailed": + measurement = "memtop_detailed" + else: + measurement = "memtop" + elif file_name.startswith("occtop"): + if file_name == "occtop_detailed": + measurement = "occtop_detailed" + else: + measurement = "occtop" + elif file_name.startswith("iostat"): + measurement = "iostat" + tag_names = ["node", "device"] + init_tags = [node, header[1]] + elif file_name.startswith("netstats"): + measurement = "netstats" + interface = file.replace("{}-".format(measurement), "").replace("{}-".format(node), "").replace( + ".csv", "") + tag_names = ["node", "interface"] + init_tags = [node, interface] + elif file_name.startswith("rabbitmq"): + if file_name.endswith("info"): + measurement = "rabbitmq_svc" + service = file_name.replace("rabbitmq_", "") + tag_names = ["node", "service"] + init_tags = [node, service] + else: + measurement = "rabbitmq" + elif file_name.startswith("schedtop"): + measurement = "schedtop" + service = file_name.replace("schedtop_", "").replace("_", "-") + tag_names = ["node", "service"] + init_tags = [node, service] + elif file_name.startswith("vswitch"): + measurement = "vswitch" + identifier = file_name.replace("vswitch_", "").split("_") + tag_names = ["node", identifier[0]] + if identifier[0] == "engine": + init_tags = [node, "engine_id_{}".format(identifier[1])] + elif identifier[0] == "interface": + init_tags = [node, identifier[1]] + elif identifier[0] == "port": + init_tags = [node, "port_{}".format(identifier[1])] + elif file_name.startswith("filestats"): + measurement = "filestats" + service = file_name.replace("filestats_summary_", "").replace(".csv", "").replace("_", "-") + tag_names = ["node", "service"] + init_tags = [node, service] + elif file_name.startswith("memstats"): + measurement = "memstats" + service = file_name.replace("memstats_summary_", "").replace(".csv", "").replace("_", "-") + tag_names = ["node", "service"] + init_tags = [node, service] + elif file_name.startswith("diskstats"): + measurement = "diskstats" + mount = file_name.replace("diskstats_", "") + tag_names = ["node", "mount", "file_system", "type"] + init_tags = [node, mount] + # find the bz2 file with the earliest date + start = float('inf') + for t in os.listdir(path): + if t.startswith(node) and t.endswith("bz2"): + next = int( + str(t.replace("{}_".format(node), "")[2:15]).replace("-", "").replace("_", "")) + if next < start: + start = next + start_time = t.split("_")[1:3] + + # go through header, determine the fields, skip the tags + field_names = [] + for i in header: + j = i.lower().replace(" ", "_").replace("-", "_").replace("used(%)", "usage").replace("(%)", "").replace("(s)", "").strip(" ").strip("\n") + if j in tag_names or i in init_tags or j == 'pid' or j == 'name': + continue + else: + # for occtop core info + if j.isdigit(): + j = "core_{}".format(j) + field_names.append(j) + + # go through each line + bad_count = 0 + for lines in f: + line = lines.strip("\n").split(",") + timestamp = convertTime(file, node, start_time, line_count, line[0].strip("\n")) + if timestamp is None: + bad_count += 1 + if bad_count == 3: + bad_string = True + break + else: + continue + tag_values = init_tags + field_values = [] + line_count += 1 + batch += 1 + + # go through data in each line and determine whether it belongs to a tag or a field + for word in line: + word = word.strip("\n") + # is non-number, interface, or device, add to tags, otherwise add to fields + if word.replace("_", "").replace("-", "").replace(" ", "").isalpha() or (word in init_tags) or word.endswith(".info") or word.startswith("ext"): + tag_values.append(word) + elif word.startswith("/dev"): + tag_values.append(word.split("/")[-1]) + elif word.startswith("= options.batch_size: + writing = True + influx_string = "curl -s -i -o /dev/null -XPOST 'http://'{}':'{}'/write?db='{}'&precision=s' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string.strip("\n")) + while writing: + begin = time.time() + os.system(influx_string + "\n") + end = time.time() + if end - begin >= 4.5: + appendToFile("/tmp/csv-to-influx.log", "Timeout warning: {} for {}. Retrying now".format(file_name, node)) + else: + batch = 0 + influx_string = "" + writing = False + # leave while loop due to incorrectly formatted csv data + if bad_string: + f.close() + break + else: + # get remainder of data from csv + if batch < options.batch_size: + writing = True + influx_string = "curl -s -i -o /dev/null -XPOST 'http://'{}':'{}'/write?db='{}'&precision=s' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string.strip("\n")) + while writing: + begin = time.time() + os.system(influx_string + "\n") + end = time.time() + if end - begin >= 4.5: + appendToFile("/tmp/csv-to-influx.log", "Timeout warning: {} for {}. Retrying now".format(file_name, node)) + else: + writing = False + f.close() + appendToFile("/tmp/csv-to-influx.log", + "{} lines parsed in {} for {}".format(line_count, file_name, node)) + break + except IOError as e: + appendToFile("/tmp/csv-to-influx.log", "Error: Issue opening {}\n-{}".format(file_loc, e.message)) + except (KeyboardInterrupt, SystemExit): + sys.exit(0) + else: + appendToFile("/tmp/csv-to-influx.log", "Error: Could not access {}".format(file_loc)) + + +# generate http api string to send data to influx +def generateString(file, node, meas, tag_n, tag_v, field_n, field_v, lc, date): + base = "{},".format(meas) + try: + if file.startswith("diskstats"): + for i in range(len(tag_n)): + if i == len(tag_n)-1: + base = base + "'{}'='{}' ".format(tag_n[i], str(tag_v[i])) + else: + base = base + "'{}'='{}',".format(tag_n[i], str(tag_v[i])) + for i in range(len(field_v)): + if str(field_v[i]).replace(".", "").isdigit(): + if i == len(field_v)-1: + base = base + "'{}'='{}' {}".format(field_n[i], str(field_v[i]), date) + else: + base = base + "'{}'='{}',".format(field_n[i], str(field_v[i])) + else: + appendToFile("/tmp/csv-to-influx.log", "Error: Issue with line {} with {} for {}. Please check the csv and re-parse as some data may be incorrect".format(lc, file, node)) + return None + else: + for i in range(len(tag_n)): + if i == len(tag_n)-1: + base = base + "'{}'='{}' ".format(tag_n[i], str(tag_v[i])) + else: + base = base + "'{}'='{}',".format(tag_n[i], str(tag_v[i])) + for i in range(1, len(field_v)): + if str(field_v[i]).replace(".", "").isdigit(): + if i == len(field_v)-1: + base = base + "'{}'='{}' {}".format(field_n[i], str(field_v[i]), date) + else: + base = base + "'{}'='{}',".format(field_n[i], str(field_v[i])) + else: + appendToFile("/tmp/csv-to-influx.log", "Error: Issue with line {} with {} for {}. Please check the csv and re-parse as some data may be incorrect".format(lc, file, node)) + return None + return base + '\n' + except Exception as e: + appendToFile("/tmp/csv-to-influx.log", "Error: Issue with http api string with {} for {}\n-{}".format(file, node, e.message)) + return None + + +# append to error log +def appendToFile(file, content): + with open(file, "a") as f: + f.write(content + '\n') + + +# main method +if __name__ == "__main__": + # get command-line args + options = init() + controller_list = [] + compute_list = [] + storage_list = [] + influx_host = influx_port = influx_db = "" + influx_info = [] + pool_size = 0 + + # create the files + file = open("/tmp/csv-to-influx.log", "w") + file.close() + file = open("output.txt", "w") + file.close() + appendToFile("/tmp/csv-to-influx.log", "Starting parsing at {}".format(datetime.datetime.utcnow())) + appendToFile("/tmp/csv-to-influx.log", "----------------------------------------------") + + # get node and influx info from lab.conf + with open("lab.conf", "r") as lc: + for lines in lc: + line = lines.strip("\n") + if line.startswith("CONTROLLER_LIST"): + controller_list = list(line.strip(" ").split("="))[1].strip("\"").split(" ") + elif line.startswith("COMPUTE_LIST"): + compute_list = list(line.strip(" ").split("="))[1].strip("\"").split(" ") + elif line.startswith("STORAGE_LIST"): + storage_list = list(line.strip(" ").split("="))[1].strip("\"").split(" ") + elif line.startswith("INFLUX_HOST"): + influx_host = list(line.strip(" ").split("="))[1].strip("\"").split(" ")[0] + elif line.startswith("INFLUX_PORT"): + influx_port = list(line.strip(" ").split("="))[1].strip("\"").split(" ")[0] + elif line.startswith("INFLUX_DB"): + influx_db = list(line.strip(" ").split("="))[1].strip("\"").split(" ")[0] + break + lc.close() + + influx_info.append(influx_host) + influx_info.append(influx_port) + influx_info.append(influx_db) + + # if -n option is used, remove unneeded nodes + if options.node_list is not None: + tmp_controller_list = [] + tmp_compute_list = [] + tmp_storage_list = [] + for n in controller_list: + if n in options.node_list: + tmp_controller_list.append(n) + for n in compute_list: + if n in options.node_list: + tmp_compute_list.append(n) + for n in storage_list: + if n in options.node_list: + tmp_storage_list.append(n) + controller_list = tmp_controller_list + compute_list = tmp_compute_list + storage_list = tmp_storage_list + + pool_size = len(controller_list) + len(compute_list) + len(storage_list) + + if options.file_list is not None and options.parse_all is True: + print "You cannot use the -a option with the -f option" + sys.exit(0) + if options.postgres_list is not None and options.file_list is not None: + print "You cannot use the -p option with the -f option" + sys.exit(0) + if options.parse_all is True and options.node_list is not None: + print "You cannot use the -a option with the -n option. Ex: -n controller-0" + sys.exit(0) + if options.file_list is not None and options.node_list is None: + print "You must specify a node and a file. Ex: -n controller-0 -f postgres-conns.csv" + sys.exit(0) + + working_dir = os.getcwd() + pool = Pool(processes=pool_size) + proc_list = [] + + print "Sending data to InfluxDB. Please tail /tmp/csv-to-influx.log" + + # create a process per node + if len(controller_list) > 0: + for i in range(len(controller_list)): + path = os.path.join(working_dir, controller_list[i]) + proc_list.append(pool.apply_async(processFiles, (path, controller_list[i], options, influx_info,))) + + if len(compute_list) > 0: + for i in range(len(compute_list)): + path = os.path.join(working_dir, compute_list[i]) + proc_list.append(pool.apply_async(processFiles, (path, compute_list[i], options, influx_info,))) + + if len(storage_list) > 0: + for i in range(len(storage_list)): + path = os.path.join(working_dir, storage_list[i]) + proc_list.append(pool.apply_async(processFiles, (path, storage_list[i], options, influx_info,))) + + pool.close() + pool.join() diff --git a/tools/engtools/parsers/common/download-data.sh b/tools/engtools/parsers/common/download-data.sh new file mode 100755 index 000000000..0c5726dd1 --- /dev/null +++ b/tools/engtools/parsers/common/download-data.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +#Copyright (c) 2016 Wind River Systems, Inc. +# +#SPDX-License-Identifier: Apache-2.0 +# +# This script is used to download syseng data from all hosts to the analysis server +# for post processing. +# Syseng data are stored under /scratch/syseng_data on the controllers. Syseng data +# for storage and compute hosts, which are stored under /tmp/syseng_data, are pulled +# to the controllers via the script download-computes.sh and stored under +# /opt/backups/tmp/syseng-data. +# +# This script is to be run after running download-computes.sh on one of the controllers. + +if [ ! -f lab.conf ]; then + echo "Lab configuration file is missing." + echo "See http://wiki.wrs.com/PBUeng/TitaniumServerSysengToolsAndDataAnalysis for more info." + exit 1 +fi + +source ./lab.conf + +rsync -azvh wrsroot@${CONTROLLER0_IP}:/scratch/syseng_data/* . +rsync -azvh wrsroot@${CONTROLLER1_IP}:/scratch/syseng_data/* . + +rsync -azvh wrsroot@${CONTROLLER0_IP}:/opt/backups/tmp/syseng-data/* . +rsync -azvh wrsroot@${CONTROLLER1_IP}:/opt/backups/tmp/syseng-data/* . + +# Compress the newly download data files if they have not been compressed +CURDIR=$(pwd) +ALL_HOSTS="${CONTROLLER_LIST} ${STORAGE_LIST} ${COMPUTE_LIST}" + +for HOST in ${ALL_HOSTS}; do + if [ -e ${HOST} ]; then + echo "Compressing ${HOST}" + cd ${CURDIR}/${HOST} + bzip2 ${HOST}* + cd ${CURDIR} + else + echo "${HOST} not found" + fi +done diff --git a/tools/engtools/parsers/common/parse-all.sh b/tools/engtools/parsers/common/parse-all.sh new file mode 100755 index 000000000..99d1e4eb5 --- /dev/null +++ b/tools/engtools/parsers/common/parse-all.sh @@ -0,0 +1,376 @@ +#!/bin/bash + +#Copyright (c) 2016-2017 Wind River Systems, Inc. +# +#SPDX-License-Identifier: Apache-2.0 +# + +# This script is used to parse all stats data. It is designed to be called by either +# parse-controllers.sh or parse-computes.sh and not used as a standalone script. +# If the input node is a controller, it will parse controller specific postgres & +# and rabbitmq stats first. If the input node is a compute, it will pars the compute +# specific vswitch stats first. +# +# The following parsing steps are common to all hosts and are executed in the specified order: +# - Parse occtop +# - Parse memtop +# - Parse memstats (summary) +# - Parse netstats +# - Parse schedtop (summary) +# - Parse iostats +# - Parse diskstats +# - Parse filestats (summary) +# - Parse process level schedtop (optional step, configured in lab.conf) +# - Generate tarball + +if [[ $# != 1 ]]; then + echo "ERROR: This script is meant to be called by either parse-controllers.sh or parse-computes.sh script." + echo "To run it separately, copy the script to the host directory that contains *.bz2 files." + echo "It takes a single argument - the name of the host directory (e.g. ./parse-all.sh controller-0)." + exit 1 +fi + +source ../lab.conf +source ./host.conf + +PARSERDIR=$(dirname $0) +. ${PARSERDIR}/parse-util.sh + +NODE=$1 + +CURDATE=$(date) +DATESTAMP=$(date +%b-%d) + +function sedit() +{ + local FILETOSED=$1 + sed -i -e "s/ */ /g" ${FILETOSED} + sed -i -e "s/ /,/g" ${FILETOSED} + # Remove any trailing comma + sed -i "s/,$//" ${FILETOSED} +} + +function get_filename_from_mountname() +{ + local name=$1 + local fname + if test "${name#*"scratch"}" != "${name}"; then + fname="scratch" + elif test "${name#*"log"}" != "${name}"; then + fname="log" + elif test "${name#*"backup"}" != "${name}"; then + fname="backup" + elif test "${name#*"ceph/mon"}" != "${name}"; then + fname="cephmon" + elif test "${name#*"conversion"}" != "${name}"; then + fname="img-conversion" + elif test "${name#*"platform"}" != "${name}"; then + fname="platform" + elif test "${name#*"postgres"}" != "${name}"; then + fname="postgres" + elif test "${name#*"cgcs"}" != "${name}"; then + fname="cgcs" + elif test "${name#*"rabbitmq"}" != "${name}"; then + fname="rabbitmq" + elif test "${name#*"instances"}" != "${name}"; then + fname="pv" + elif test "${name#*"ceph/osd"}" != "${name}"; then + # The ceph disk partition has the following mount name convention + # /var/lib/ceph/osd/ceph-0 + fname=`basename ${name}` + fi + echo $fname +} + +function parse_process_schedtop_data() +{ + # Logic has been moved to a separate script so that parsing process level schedtop + # can be run either as part of parse-all.sh script or independently. + LOG "Process level schedtop parsing is turned on in lab.conf. Parsing schedtop detail..." + cd .. + ./parse-schedtop.sh ${NODE} + cd ${NODE} +} + +function parse_controller_specific() +{ + # Parsing Postgres data, removing data from previous run if there are any. Generate summary + # data for each database and detail data for specified tables + LOG "Parsing postgres data for ${NODE}" + if [ -z "${DATABASE_LIST}" ]; then + WARNLOG "DATABASE_LIST is not set in the lab.conf file. Use default setting" + DATABASE_LIST="cinder glance keystone nova neutron ceilometer heat sysinv aodh postgres nova_api" + fi + + for DB in ${DATABASE_LIST}; do + rm /tmp/${DB}*.csv + done + ../parse_postgres *postgres.bz2 >postgres-summary-${NODE}-${DATESTAMP}.txt + for DB in ${DATABASE_LIST}; do + cp /tmp/${DB}_size.csv postgres_${DB}_size.csv + done + for TABLE in ${TABLE_LIST}; do + cp /tmp/${TABLE}.csv postgres_${TABLE}.csv + done + + # Parsing RabbitMQ data + LOG "Parsing rabbitmq data for ${NODE}" + ../parse-rabbitmq.sh rabbitmq-${NODE}.csv + + for QUEUE in ${RABBITMQ_QUEUE_LIST}; do + # If node is not a controller node then parse-rabbitmq-queue.sh should skip + ../parse-rabbitmq-queue.sh rabbitmq-${QUEUE}-${NODE}.csv ${QUEUE} + done +} + +function parse_compute_specific() +{ + LOG "Parsing vswitch data for ${NODE}" + ../parse-vswitch.sh ${NODE} +} + +function parse_occtop_data() +{ + LOG "Parsing occtop data for ${NODE}" + bzcat *occtop.bz2 >occtop-${NODE}-${DATESTAMP}.txt + cp occtop-${NODE}-${DATESTAMP}.txt tmp.txt + sedit tmp.txt + # Get the highest column count + column_count=$(awk -F "," '{print NF}' tmp.txt | sort -nu | tail -n 1) + grep '^[0-9]' tmp.txt |cut -d, -f1,2 | awk -F "," '{print $1" "$2}' > tmpdate.txt + grep '^[0-9]' tmp.txt |cut -d, -f3-$column_count > tmpcore.txt + paste -d, tmpdate.txt tmpcore.txt > tmp2.txt + # Generate header based on the number of columns. The Date/Time column consists of date and time fields + header="Date/Time,Total" + count=$(($column_count-3)) + for i in $(seq 0 $(($count-1))); do + header="$header,$i" + done + + # Generate detailed CSV with Date/Time, Total CPU occupancy and individual core occupancies e.g. + # Date/Time,Total,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35 + # 2016-11-22 00:29:16.523,759.5,21.4,18.9,43.8,24.5,23.1,25.3,28.1,25.5,20.5,27.8,26.8,32.7,27.3,25.1,21.1,23.2,21.7,36.4,23.3,16.6,15.3,13.9,14.4,15.0,14.7,14.4,16.4,13.8,17.0,17.8,19.0,15.1,14.0,13.2,14.5,17.8 + echo "${header}" > occtop-${NODE}-detailed.csv + cat tmp2.txt >> occtop-${NODE}-detailed.csv + + # Generate simple CSV file which is used to generate host CPU occupancy chart. Platform cores are + # defined in the host.conf. The simple CSV contains only the Date/Time and Total platform CPU occupancy e.g. + # Date/Time,Total + # 2016-11-22 00:29:16.523,94.9 + # 2016-11-22 00:30:16.526,71.3 + + if [ -z "${PLATFORM_CPU_LIST}" ]; then + # A controller node in standard system. In this case, all cores are dedicated to platform use. + # Simply extract the Date/Time and Total CPU occupancy + cut -d, -f1,2 occtop-${NODE}-detailed.csv > occtop-${NODE}.csv + else + # A CPE, compute or storage node. The cores dedicated to platform use are specified in the config. + echo "Date/Time,Total" > occtop-${NODE}.csv + while read -r line || [[ -n "$line" ]]; do + IFS="," read -r -a arr <<< "${line}" + total=0 + for CORE in ${PLATFORM_CPU_LIST}; do + # Add 2 to the index as occupancy of each individual core starts after Date/Time and Total + idx=$(($CORE+2)) + total=`echo $total + ${arr[$idx]} | bc` + done + echo "${arr[0]},${total}" >> occtop-${NODE}.csv + done < tmp2.txt + fi + # Remove temporary files + rm tmp.txt tmp2.txt tmpdate.txt tmpcore.txt +} + +function parse_memtop_data() +{ + LOG "Parsing memtop data for ${NODE}" + bzcat *memtop.bz2 > memtop-${NODE}-${DATESTAMP}.txt + cp memtop-${NODE}-${DATESTAMP}.txt tmp.txt + sedit tmp.txt + + # After dumping all memtop bz2 output into one text file and in-place sed, grab only relevant data + # for CSV output. Generate both detailed and simple CSV files. Simple output will be used to generate + # chart. + grep '^[0-9]' tmp.txt | awk -F "," '{print $1" "$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14","$15","$16","$17","$18}' > tmp2.txt + echo "Date/Time,Total,Used,Free,Cached,Buf,Slab,CAS,CLim,Dirty,WBack,Anon,Avail,0:Avail,0:HFree,1:Avail,1:HFree" > memtop-${NODE}-detailed.csv + cat tmp2.txt >> memtop-${NODE}-detailed.csv + echo "Date/Time,Total,Anon" > memtop-${NODE}.csv + cut -d, -f1-2,12 tmp2.txt >> memtop-${NODE}.csv + # Remove temporary files + rm tmp.txt tmp2.txt +} + +function parse_netstats_data() +{ + LOG "Parsing netstats data for ${NODE}" + # First generate the summary data then detail data for specified interfaces + ../parse_netstats *netstats.bz2 > netstats-summary-${NODE}-${DATESTAMP}.txt + if [ -z "${NETSTATS_INTERFACE_LIST}" ]; then + ERRLOG "NETSTATS_INTERFACE_LIST is not set in host.conf. Skipping detail netstats..." + else + for INTERFACE in ${NETSTATS_INTERFACE_LIST}; do + echo "Date/Time,Interface,Rx PPS,Rx Mbps,Rx Packet Size,Tx PPS,Tx Mbps,Tx Packet Size" > netstats-${NODE}-${INTERFACE}.csv + ../parse_netstats *netstats.bz2 | grep " ${INTERFACE} " > tmp.txt + sed -i -e "s/|/ /g" tmp.txt + sed -i -e "s/ */ /g;s/ */ /g" tmp.txt + sed -i -e "s/ /,/g" tmp.txt + # Remove the leading comma + sed -i 's/,//' tmp.txt + while read -r line || [[ -n "$line" ]]; do + IFS="," read -r -a arr <<< "${line}" + echo "${arr[8]} ${arr[9]},${arr[0]},${arr[2]},${arr[3]},${arr[4]},${arr[5]},${arr[6]},${arr[7]}" >> netstats-${NODE}-${INTERFACE}.csv + done < tmp.txt + done + rm tmp.txt + fi +} + +function parse_iostats_data() +{ + LOG "Parsing iostat data for ${NODE}" + if [ -z "${IOSTATS_DEVICE_LIST}" ]; then + ERRLOG "IOSTAT_DEVICE_LIST is not set in host.conf. Skipping iostats..." + else + for DEVICE in ${IOSTATS_DEVICE_LIST}; do + # Add header to output csv file + echo "Date/Time,${DEVICE},rqm/s,wrqm/s,r/s,w/s,rkB/s,wkB/s,avgrq-sz,avgqu-sz,await,r_await,w_await,svctm,%util" > iostat-${NODE}-${DEVICE}.csv + # Dumping iostat content to tmp file + bzcat *iostat.bz2 | grep -E "/2015|/2016|/2017|${DEVICE}" | awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14}' > tmp.txt + while IFS= read -r current + do + if test "${current#*Linux}" != "$current" + then + # Skip the line that contains the word "Linux" + continue + else + if test "${current#*$DEVICE}" == "$current" + then + # It's a date entry, look ahead + read -r next + if test "${next#*$DEVICE}" != "${next}" + then + # This next line contains the device stats + # Combine date and time fields + current="${current//2016,/2016 }" + current="${current//2017,/2017 }" + # Combine time and AM/PM fields + current="${current//,AM/ AM}" + current="${current//,PM/ PM}" + # Write both lines to intermediate file + echo "${current}" >> tmp2.txt + echo "${next}" >> tmp2.txt + fi + fi + fi + done < tmp.txt + mv tmp2.txt tmp.txt + # Combine the time and stats data into one line + # 11/22/2016 06:34:00 AM,,,,,,,,,,, + # dm-0,0.00,0.00,0.00,1.07,0.00,38.00,71.25,0.00,0.19,0.00,0.19,0.03,0.00 + paste -d "" - - < tmp.txt > tmp2.txt + # Remove empty fields, use "complement" option for contiguous fields/range + cut -d, -f2-11 --complement tmp2.txt > tmp.txt + # Write final content to output csv + cat tmp.txt >> iostat-${NODE}-${DEVICE}.csv + rm tmp.txt tmp2.txt + done + fi +} + +function parse_diskstats_data() +{ + LOG "Parsing diskstats data for ${NODE}" + + if [ -z "${DISKSTATS_FILESYSTEM_LIST}" ]; then + ERRLOG "DISKSTATS_FILESYSTEM_LIST is not set in host.conf. Skipping diskstats..." + else + for FS in ${DISKSTATS_FILESYSTEM_LIST}; do + fspair=(${FS//|/ }) + fsname=${fspair[0]} + mountname=${fspair[1]} + if [ ${mountname} == "/" ]; then + mountname=" /" + echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-root.csv + bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-root.csv + else + fname=$(get_filename_from_mountname $mountname) + echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-$fname.csv + bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-$fname.csv + fi + done + fi +} + +# Parsing starts here ... +LOG "Parsing ${NODE} files - ${CURDATE}" + +# Let's get the host specific parsing out of the way +if test "${NODE#*"controller"}" != "${NODE}"; then + parse_controller_specific +elif test "${NODE#*"compute"}" != "${NODE}"; then + parse_compute_specific +fi + +# Parsing CPU occtop data +parse_occtop_data + +# Parsing memtop data +parse_memtop_data + +# Parsing memstats data to generate the high level report. The most important piece of info is the list of +# hi-runners at the end of the file. If there is a leak, run parse-daily.sh script to generate the time +# series data for the offending processes only. Use process name, not PID as most Titanium Cloud processes have +# workers. +LOG "Parsing memstats summary for ${NODE}" +../parse_memstats --report *memstats.bz2 > memstats-summary-${NODE}-${DATESTAMP}.txt +#tar czf pidstats.tgz pid-*.csv +rm pid-*.csv + + +# Parsing netstats data +parse_netstats_data + +# Parsing schedtop data to generate the high level report. Leave the process level schedtop parsing till +# the end as it is a long running task. +LOG "Parsing schedtop summary for ${NODE}" +FILES=$(ls *schedtop.bz2) +../parse_schedtop ${FILES} > schedtop-summary-${NODE}-${DATESTAMP}.txt + +# Parsing iostats data +parse_iostats_data + +# Parsing diskstats data +parse_diskstats_data + +# Parsing filestats data to generate the high level report. If there is a file descriptor leak, run parse-daily.sh +# script to generate the time series data for the offending processes only. Use process name, not PID as most +# Titanium Cloud processes have workers. +LOG "Parsing filestats summary for ${NODE}" +../parse_filestats --all *filestats.bz2 > filestats-summary-${NODE}-${DATESTAMP}.txt + +# Parsing process level schedtop data. This is a long running task. To skip this step or generate data for +# only specific processes, update the lab.conf and host.conf files. +[[ ${GENERATE_PROCESS_SCHEDTOP} == Y ]] && parse_process_schedtop_data || WARNLOG "Parsing process level schedtop is skipped." + +# Done parsing for this host. If it's a controller host, check if the parsing of postgres connection stats which is run in +# parallel is done before creating a tar file. +if test "${NODE#*"controller"}" != "${NODE}"; then + # If postgres-conns.csv file has not been created which is highly unlikely, wait a couple of minutes + [ ! -e postgres-conns.csv ] && sleep 120 + + # Now check the stats of this file every 5 seconds to see if it's still being updated. Another option + # is to use inotify which requires another inotify-tools package. + oldsize=0 + newsize=0 + while true + do + newsize=$(stat -c %s postgres-conns.csv) + if [ "$oldsize" == "$newsize" ]; then + break + fi + oldsize=$newsize + sleep 5 + done +fi +tar czf syseng-data-${NODE}-${DATESTAMP}.tgz *.csv *.txt +LOG "Parsing stats data for ${NODE} completed!" diff --git a/tools/engtools/parsers/common/parse-controllers.sh b/tools/engtools/parsers/common/parse-controllers.sh new file mode 100755 index 000000000..61a77c585 --- /dev/null +++ b/tools/engtools/parsers/common/parse-controllers.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +#Copyright (c) 2016 Wind River Systems, Inc. +# +#SPDX-License-Identifier: Apache-2.0 +# + +# This script is used to parse stats data for controller/CPE hosts. For large office, +# it is called by parse-everything.sh. For CPE, it should be called on its own. +# File lab.conf must exist with CONTROLLER_LIST config parameter set for the script to run +# Usage: ./parse-controllers.sh + +PARSERDIR=$(dirname $0) +. ${PARSERDIR}/parse-util.sh + +if [ ! -f lab.conf ]; then + echo "Lab configuration file is missing." + echo "See http://wiki.wrs.com/PBUeng/TitaniumServerSysengToolsAndDataAnalysis for more info." + exit 1 +fi + +source ./lab.conf + +if [ -z "${CONTROLLER_LIST}" ]; then + echo "ERROR: Controller list is not set in lab.conf file. Exiting..." + exit 1 +fi + +for HOST in ${CONTROLLER_LIST}; do + LOG "Parsing stats data for controller host ${HOST}" + if [ -d ${HOST} ]; then + cd ${HOST} + bzip2 ${HOST}* > /dev/null 2>&1 + ../parse-all.sh ${HOST} > /dev/null 2>&1 & + # Delay the next controller because they both write to /tmp + sleep 120 + cd .. + else + ERRLOG "${HOST} does not exist. Parsing skipped." + fi +done + +# Parsing postgres connection stats is a time consuming step, run it in parallel with parse-all +# script. +for HOST in ${CONTROLLER_LIST}; do + if [ -d ${HOST} ]; then + LOG "Parsing postgres connection stats data for controller host ${HOST}" + cd ${HOST} + ../parse-postgres.sh *postgres.bz2 > /dev/null 2>&1 & + cd .. + fi +done diff --git a/tools/engtools/parsers/common/parse-daily.sh b/tools/engtools/parsers/common/parse-daily.sh new file mode 100755 index 000000000..7298441c3 --- /dev/null +++ b/tools/engtools/parsers/common/parse-daily.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +#Copyright (c) 2016 Wind River Systems, Inc. +# +#SPDX-License-Identifier: Apache-2.0 +# +# The following script is used when either memstats or filestats summary reports +# a possible memory or file leak respectively. It can be run for a particular host or +# for all hosts as configured in the lab.conf. +# Make sure to run cleanup-uncompressed.sh script before running this script to remove +# any uncompressed files as memstats/filestats parser can produce erronous result if +# there are both uncompressed and compressed version of the same file. +# +# Usage: +# ./parse-daily.sh to generate daily stats for all hosts +# ./parse-daily.sh to generate daily stats for +# specified host. +# +# e.g. >./parse-daily.sh memstats sm-eru +# >./parse-daily.sh controller-0 filestats postgress + +function print_usage() +{ + echo "Usage: ./parse-daily.sh will parse daily data for all hosts." + echo "Usage: ./parse-daily.sh will parse daily data for specified host." + echo "Valid parsers for daily stats are: memstats & filestats." + exit 1 +} + +function parse_daily_stats() +{ + local PARSER_NAME=$1 + local PROCESS_NAME=$2 + local TMPFILE="tmp.txt" + # Inserting the header in the summary csv file. The summary file is a concatenation + # of the daily file. If there is a large number of files, the parser may not have + # enough memory to process them all. The safest way is to parse one day at a time. + if [ ${PARSER_NAME} == "memstats" ]; then + local SUMMARYFILE=memstats-summary-${PROCESS_NAME}.csv + echo "Date,RSS,VSZ" > ${SUMMARYFILE} + else + local SUMMARYFILE=filestats-summary-${PROCESS_NAME}.csv + echo "Date,Read/Write,Write,Read" > ${SUMMARYFILE} + fi + # Get the list of dates for memstats/filestats bz2 files in this directory. + # The filename convention is : _YYYY-MM-DD_