71342f7520
It turned out, that first results portion was collected on the environment with sporadic connectivity issue between Keystone and Memcache. Valid results were collected and processed. Change-Id: I6c8f388ef380a19016c4bc9cf49582c8a8f433d1
362 lines
14 KiB
Python
362 lines
14 KiB
Python
import argparse
|
|
import collections
|
|
import copy
|
|
import itertools
|
|
import json
|
|
import numpy as np
|
|
import os
|
|
import prettytable
|
|
|
|
|
|
NODES_TEMPLATE = {
|
|
"DB queries": {
|
|
"total": 0,
|
|
"total_time_spent": 0,
|
|
"select1": 0,
|
|
"select1_time_spent": 0,
|
|
"real": 0,
|
|
"real_time_spent": 0,
|
|
"SELECT": {
|
|
"total": 0,
|
|
"INNER JOIN": 0,
|
|
"LEFT JOIN": 0,
|
|
"RIGHT JOIN": 0,
|
|
"FULL JOIN": 0,
|
|
"time_spent": collections.OrderedDict(),
|
|
},
|
|
"INSERT": {
|
|
"total": 0,
|
|
"time_spent": collections.OrderedDict(),
|
|
},
|
|
"UPDATE": {
|
|
"total": 0,
|
|
"time_spent": collections.OrderedDict(),
|
|
},
|
|
"DELETE": {
|
|
"total": 0,
|
|
"time_spent": collections.OrderedDict(),
|
|
},
|
|
"red_flag": {
|
|
"2joins": {
|
|
"total": 0,
|
|
"queries": [],
|
|
"time_spent": collections.OrderedDict(),
|
|
},
|
|
"3+joins": {
|
|
"total": 0,
|
|
"queries": [],
|
|
"time_spent": collections.OrderedDict(),
|
|
}
|
|
}
|
|
},
|
|
"Memcache cached operations": {},
|
|
"Context cached operations": {},
|
|
"Cached time spent": collections.OrderedDict(),
|
|
}
|
|
|
|
NODES = copy.deepcopy(NODES_TEMPLATE)
|
|
OUTLIER_QUERIES = {}
|
|
|
|
|
|
def define_node(node):
|
|
if node["info"]["project"] != "keystone":
|
|
return
|
|
|
|
if node["info"]["name"] not in ["db", "cache", "memoize"]:
|
|
return
|
|
|
|
time_spent = node["info"]["finished"] - node["info"]["started"]
|
|
|
|
if node["info"]["name"] == "db":
|
|
process_db_calls(node, time_spent)
|
|
elif node["info"]["name"] == "cache":
|
|
if not node["children"]:
|
|
process_cache_calls(node, time_spent, "cache")
|
|
else:
|
|
for child in node["children"]:
|
|
define_node(child)
|
|
elif node["info"]["name"] == "memoize":
|
|
if not node["children"] or len(node["children"]) == 1:
|
|
process_cache_calls(node, time_spent)
|
|
else:
|
|
for child in node["children"]:
|
|
define_node(child)
|
|
|
|
|
|
def process_cache_calls(node, time_spent, prefix="memoize"):
|
|
cache_info = node["info"]["meta.raw_payload.%s-start" % prefix][
|
|
"info"]["fn_info"]
|
|
|
|
def add_info(pref):
|
|
if not NODES["%s cached operations" % pref].get(cache_info):
|
|
NODES["%s cached operations" % pref][cache_info] = 0
|
|
NODES["%s cached operations" % pref][cache_info] += 1
|
|
if not NODES["Cached time spent"].get(time_spent):
|
|
NODES["Cached time spent"][time_spent] = []
|
|
NODES["Cached time spent"][time_spent].append(cache_info)
|
|
|
|
# Liberty env, all cache is Memcached Cache
|
|
if not node["children"]:
|
|
add_info("Memcache")
|
|
return
|
|
|
|
# Mitaka env with nested 2-layer cache
|
|
ctxt_cache_info = node["children"][0]
|
|
if ctxt_cache_info["children"]:
|
|
memcache_cache_info = ctxt_cache_info["children"][0]
|
|
if not memcache_cache_info["children"]:
|
|
add_info("Memcache")
|
|
else:
|
|
add_info("Context")
|
|
|
|
|
|
def process_db_calls(node, time_spent):
|
|
NODES["DB queries"]["total"] += 1
|
|
NODES["DB queries"]["total_time_spent"] += time_spent
|
|
statement = node[
|
|
"info"]["meta.raw_payload.db-start"]["info"]["db"]["statement"]
|
|
if statement.startswith("SELECT 1"):
|
|
NODES["DB queries"]["select1"] += 1
|
|
NODES["DB queries"]["select1_time_spent"] += time_spent
|
|
else:
|
|
NODES["DB queries"]["real"] += 1
|
|
NODES["DB queries"]["real_time_spent"] += time_spent
|
|
|
|
if statement.startswith("SELECT"):
|
|
process_selects(statement, time_spent)
|
|
elif statement.startswith("UPDATE"):
|
|
process_base_db_calls("UPDATE", statement, time_spent)
|
|
elif statement.startswith("INSERT"):
|
|
process_base_db_calls("INSERT", statement, time_spent)
|
|
elif statement.startswith("DELETE"):
|
|
process_base_db_calls("DELETE", statement, time_spent)
|
|
|
|
|
|
def process_base_db_calls(command, statement, time_spent):
|
|
NODES["DB queries"][command]["total"] += 1
|
|
if not NODES["DB queries"][command]["time_spent"].get(time_spent):
|
|
NODES["DB queries"][command]["time_spent"][time_spent] = []
|
|
NODES["DB queries"][command]["time_spent"][time_spent].append(
|
|
statement)
|
|
|
|
|
|
def process_selects(statement, time_spent):
|
|
process_base_db_calls("SELECT", statement, time_spent)
|
|
|
|
ij = statement.count("INNER JOIN")
|
|
lj = statement.count("LEFT JOIN")
|
|
rj = statement.count("RIGHT JOIN")
|
|
fj = statement.count("FULL JOIN")
|
|
NODES["DB queries"]["SELECT"]["INNER JOIN"] += ij
|
|
NODES["DB queries"]["SELECT"]["LEFT JOIN"] += lj
|
|
NODES["DB queries"]["SELECT"]["RIGHT JOIN"] += rj
|
|
NODES["DB queries"]["SELECT"]["FULL JOIN"] += fj
|
|
|
|
# raise red flags if too many JOINS met
|
|
if ij + lj + rj + fj == 2:
|
|
NODES["DB queries"]["red_flag"]["2joins"]["total"] += 1
|
|
NODES["DB queries"]["red_flag"]["2joins"][
|
|
"queries"].append(statement)
|
|
if not NODES["DB queries"]["red_flag"]["2joins"][
|
|
"time_spent"].get(time_spent):
|
|
NODES["DB queries"]["red_flag"]["2joins"]["time_spent"][
|
|
time_spent] = []
|
|
NODES["DB queries"]["red_flag"]["2joins"]["time_spent"][
|
|
time_spent].append(statement)
|
|
elif ij + lj + rj + fj >= 3:
|
|
NODES["DB queries"]["red_flag"]["3+joins"]["total"] += 1
|
|
NODES["DB queries"]["red_flag"]["3+joins"][
|
|
"queries"].append(statement)
|
|
if not NODES["DB queries"]["red_flag"]["3+joins"][
|
|
"time_spent"].get(time_spent):
|
|
NODES["DB queries"]["red_flag"]["3+joins"]["time_spent"][
|
|
time_spent] = []
|
|
NODES["DB queries"]["red_flag"]["3+joins"]["time_spent"][
|
|
time_spent].append(statement)
|
|
|
|
|
|
def define_nodes(data):
|
|
for child in data["children"]:
|
|
if not child["children"] or child["info"]["name"] == "memoize":
|
|
define_node(child)
|
|
else:
|
|
define_nodes(child)
|
|
|
|
|
|
def sort_dicts(dictionary):
|
|
new_nodes = copy.deepcopy(dictionary)
|
|
for key in ["SELECT", "INSERT", "DELETE", "UPDATE"]:
|
|
new_nodes["DB queries"][key]["time_spent"] = \
|
|
sum([k*len(v) for k, v
|
|
in dictionary["DB queries"][key]["time_spent"].iteritems()])
|
|
for key in ["2joins", "3+joins"]:
|
|
new_nodes["DB queries"]["red_flag"][key]["time_spent"] = \
|
|
sum([k*len(v) for k, v
|
|
in dictionary["DB queries"]["red_flag"][key][
|
|
"time_spent"].iteritems()])
|
|
new_nodes["Cached time spent"] = \
|
|
sum([k*len(v) for k, v
|
|
in dictionary["Cached time spent"].iteritems()])
|
|
return new_nodes
|
|
|
|
|
|
def detect_outliers(data, m=2.):
|
|
full_time_set = list(itertools.chain(*[[k] * len(v) for k, v
|
|
in data.iteritems()]))
|
|
dat = np.abs(full_time_set - np.median(full_time_set))
|
|
mdev = np.median(dat)
|
|
sss = dat/mdev if mdev else 0.
|
|
if mdev:
|
|
for idx, val in enumerate((sss < m).tolist()):
|
|
if not val:
|
|
for query in data[full_time_set[idx]]:
|
|
OUTLIER_QUERIES[query] = full_time_set[idx]
|
|
|
|
|
|
def prepare_tables(nodes):
|
|
# prepare table with common information
|
|
common_info_table = prettytable.PrettyTable(["**Metric**", "**Value**"])
|
|
common_info_table.align["**Metric**"] = "l"
|
|
common_info_table.align["**Value**"] = "l"
|
|
common_info_table.padding_width = 1
|
|
common_info_table.max_width = 100
|
|
common_info_table.header = True
|
|
common_info_table.hrules = prettytable.ALL
|
|
|
|
common_info_table.add_row(["Total (*) Keystone DB queries count",
|
|
nodes["DB queries"]["total"]])
|
|
common_info_table.add_row(["Total (*) Keystone DB queries time spent, ms",
|
|
nodes["DB queries"]["total_time_spent"]])
|
|
common_info_table.add_row([
|
|
"Infrastructure (SELECT 1) Keystone DB queries count",
|
|
nodes["DB queries"]["select1"]])
|
|
common_info_table.add_row([
|
|
"Infrastructure (SELECT 1) Keystone DB queries time spent, ms",
|
|
nodes["DB queries"]["select1_time_spent"]])
|
|
common_info_table.add_row(["Real Keystone DB queries count",
|
|
nodes["DB queries"]["real"]])
|
|
common_info_table.add_row(["Real Keystone DB queries time spent, ms",
|
|
nodes["DB queries"]["real_time_spent"]])
|
|
|
|
db_query_tmpl = "%s\n\n|"
|
|
|
|
for key in ["SELECT", "INSERT", "DELETE", "UPDATE"]:
|
|
if nodes["DB queries"][key]["total"]:
|
|
common_info_table.add_row([
|
|
"%s Keystone DB queries count" % key,
|
|
nodes["DB queries"][key]["total"]])
|
|
common_info_table.add_row([
|
|
"%s Keystone DB queries time spent, ms" % key,
|
|
nodes["DB queries"][key]["time_spent"]])
|
|
detect_outliers(NODES["DB queries"][key]["time_spent"])
|
|
|
|
# prepare table with outliers information
|
|
outliers_table = prettytable.PrettyTable(["**DB query**",
|
|
"**Time spent, ms**"])
|
|
outliers_table.align["**DB query**"] = "l"
|
|
outliers_table.align["**Time spent, ms**"] = "l"
|
|
outliers_table.max_width = 100
|
|
outliers_table.header = True
|
|
outliers_table.hrules = prettytable.ALL
|
|
|
|
for query in OUTLIER_QUERIES:
|
|
outliers_table.add_row([db_query_tmpl % query, OUTLIER_QUERIES[query]])
|
|
|
|
# prepare table with information about DB requests containing multiple
|
|
# JOIN statements inside
|
|
multi_join_queries = prettytable.PrettyTable(["**DB query**",
|
|
"**Time spent, ms**"])
|
|
multi_join_queries.align["**DB query**"] = "l"
|
|
multi_join_queries.align["**Time spent, ms**"] = "l"
|
|
multi_join_queries.max_width = 100
|
|
multi_join_queries.header = True
|
|
multi_join_queries.hrules = prettytable.ALL
|
|
|
|
for key in ["2joins", "3+joins"]:
|
|
for ts in NODES["DB queries"]["red_flag"][key]["time_spent"]:
|
|
for query in NODES["DB queries"]["red_flag"][key][
|
|
"time_spent"][ts]:
|
|
multi_join_queries.add_row([db_query_tmpl % query, ts])
|
|
|
|
# prepare table(s) with cache info
|
|
cache_table = prettytable.PrettyTable(["**Cache**",
|
|
"**Cached operations**",
|
|
"**args**",
|
|
"**kwargs**",
|
|
"**Times used**"])
|
|
|
|
for cache in ["Memcache cached operations", "Context cached operations"]:
|
|
name_map = {
|
|
"Memcache cached operations": "Memcache",
|
|
"Context cached operations": "Local context",
|
|
}
|
|
cache_table.align["**Cache**"] = "l"
|
|
cache_table.align["**Cached operations**"] = "l"
|
|
cache_table.align["**args**"] = "l"
|
|
cache_table.align["**kwargs**"] = "l"
|
|
cache_table.align["**Times used**"] = "l"
|
|
cache_table.max_width = 100
|
|
cache_table.header = True
|
|
cache_table.hrules = prettytable.ALL
|
|
|
|
for operation, times in nodes[cache].iteritems():
|
|
operation = operation[1:-1].split(", ")
|
|
cache_table.add_row([name_map[cache],
|
|
operation[0][1:-1],
|
|
", ".join(operation[1:-1])[1:-1],
|
|
operation[-1][1:-1], times])
|
|
|
|
return common_info_table, multi_join_queries, outliers_table, cache_table
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Process JSON file with '
|
|
'OSprofiler output.')
|
|
parser.add_argument('path', type=str,
|
|
help='Path to the JSON file / directory with list of '
|
|
'JSON files with OSprofiler output')
|
|
|
|
args = parser.parse_args()
|
|
global NODES
|
|
if os.path.isfile(args.path):
|
|
with open(args.path) as data_file:
|
|
data = json.load(data_file)
|
|
define_nodes(data)
|
|
nodes = sort_dicts(NODES)
|
|
common_info_table, multi_join_queries, outliers_table,\
|
|
cache_table = prepare_tables(nodes)
|
|
print(common_info_table)
|
|
print(outliers_table)
|
|
print(multi_join_queries)
|
|
print(cache_table)
|
|
elif os.path.isdir(args.path):
|
|
for item in os.listdir(args.path):
|
|
if item.endswith(".txt"):
|
|
with open(os.path.join(args.path, item)) as data_file:
|
|
data = json.load(data_file)
|
|
NODES = copy.deepcopy(NODES_TEMPLATE)
|
|
define_nodes(data)
|
|
nodes = sort_dicts(NODES)
|
|
common_info_table, multi_join_queries, outliers_table,\
|
|
cache_table = prepare_tables(nodes)
|
|
item_name = \
|
|
item.split(".")[0].replace("_", " ").capitalize() + \
|
|
" request stats"
|
|
print(item_name)
|
|
print(len(item_name) * "~" + "\n")
|
|
print("**%s**\n" % "Control plane request overlook")
|
|
print(common_info_table)
|
|
print("\n**%s**\n" % "Keystone DB queries outliers")
|
|
print(outliers_table)
|
|
print("\n**%s**\n" % "Keystone DB queries with multi "
|
|
"JOINs inside")
|
|
print(multi_join_queries)
|
|
print("\n")
|
|
print("**Keystone cached methods stats**\n")
|
|
print(cache_table)
|
|
print("\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|