prettytable integrated and error breakdown

This commit is contained in:
Sandy Walsh 2013-02-11 18:53:22 -06:00 committed by root
parent c1e8e305e1
commit eb834bcc9d
7 changed files with 261 additions and 25 deletions

2
migrations/002_delta.sql Normal file
View File

@ -0,0 +1,2 @@
ALTER TABLE stacktach_rawdata ADD task VARCHAR(30);
CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`);

2
migrations/003_delta.sql Normal file
View File

@ -0,0 +1,2 @@
ALTER TABLE stacktach_rawdata ADD image_type integer;
CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`);

View File

@ -0,0 +1,63 @@
BEGIN;
CREATE TABLE `stacktach_deployment` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL
)
;
CREATE TABLE `stacktach_rawdata` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`deployment_id` integer NOT NULL,
`tenant` varchar(50),
`json` longtext NOT NULL,
`routing_key` varchar(50),
`state` varchar(20),
`old_state` varchar(20),
`old_task` varchar(30),
`task` varchar(30),
`image_type` integer,
`when` numeric(20, 6) NOT NULL,
`publisher` varchar(100),
`event` varchar(50),
`service` varchar(50),
`host` varchar(100),
`instance` varchar(50),
`request_id` varchar(50)
)
;
ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`);
CREATE TABLE `stacktach_lifecycle` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`instance` varchar(50),
`last_state` varchar(50),
`last_task_state` varchar(50),
`last_raw_id` integer
)
;
ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_timing` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`start_raw_id` integer,
`end_raw_id` integer,
`start_when` numeric(20, 6),
`end_when` numeric(20, 6),
`diff` numeric(20, 6)
)
;
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_requesttracker` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`request_id` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`last_timing_id` integer,
`start` numeric(20, 6) NOT NULL,
`duration` numeric(20, 6) NOT NULL,
`completed` bool NOT NULL
)
;
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`);
COMMIT;

View File

@ -0,0 +1,32 @@
BEGIN;
CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`);
CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`);
CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`);
CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`);
CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`);
CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`);
CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`);
CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`);
CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`);
CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`);
CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`);
CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`);
CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`);
CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`);
CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`);
CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`);
CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`);
CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`);
CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`);
CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`);
CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`);
CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`);
CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`);
CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`);
CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`);
CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`);
CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`);
CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`);
CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`);
CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`);
COMMIT;

View File

@ -39,7 +39,7 @@ def fix_chunk(hours, length):
states[task] = states.get(task, 0) + 1
raw.task = task
image_type_num = image_type.get_numeric_code(payload)
raw.image_type = image_type.get_numeric_code(payload, raw.image_type)
updated += 1
raw.save()

View File

@ -2,6 +2,8 @@ import datetime
import json
import sys
import prettytable
sys.path.append("/stacktach")
from stacktach import datetime_to_decimal as dt
@ -34,6 +36,9 @@ expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
failures = {}
causes = {}
error_messages = {}
successes = {}
tenant_issues = {}
for uuid_dict in updates:
@ -55,10 +60,10 @@ for uuid_dict in updates:
start = None
err = None
operation = None
operation = "n/a"
platform = 0
tenant = 0
dump = False
cell = "n/a"
for raw in raws:
if not start:
@ -75,20 +80,12 @@ for uuid_dict in updates:
for cmd in cmds:
if cmd in raw.event:
operation = cmd
cell = raw.deployment.name
break
if raw.image_type > 0:
platform = raw.image_type
if dump:
print " %s %s T:%s %s %s %s %s %s"\
% (raw.id, raw.routing_key, raw.tenant,
raw.service, raw.host, raw.deployment.name,
raw.event, dt.dt_from_decimal(raw.when))
if raw.event == 'compute.instance.update':
print " State: %s->%s, Task %s->%s" % \
(raw.old_state, raw.state, raw.old_task, raw.task)
if not start:
continue
@ -98,13 +95,16 @@ for uuid_dict in updates:
if diff > 3600:
report = True
if report:
key = (operation, platform, cell)
if not report:
successes[key] = successes.get(key, 0) + 1
else:
print "------", uuid, "----------"
print " Req:", req
print " Duration: %.2f minutes" % (diff / 60)
print " Operation:", operation
print " Platform:", image_type.readable(platform)
key = (operation, platform)
cause = "> %d min" % (expiry / 60)
failures[key] = failures.get(key, 0) + 1
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
@ -117,24 +117,103 @@ for uuid_dict in updates:
err.event, dt.dt_from_decimal(err.when))
exc = payload.get('exception')
if exc:
print exc
# group the messages ...
exc_str = str(exc)
print exc_str
error_messages[exc_str] = error_messages.get(exc_str, 0) + 1
# extract the code, if any ...
code = exc.get('kwargs', {}).get('code')
if code:
codes[code] = codes.get(code, 0) + 1
cause = code
cause_key = (key, cause)
causes[cause_key] = causes.get(cause_key, 0) + 1
print "-- Failures by operation by platform --"
for failure, count in failures.iteritems():
operation, platform = failure
def dump_breakdown(totals, label):
p = prettytable.PrettyTable(["Category", "Count"])
for k, v in totals.iteritems():
p.add_row([k, v])
print label
p.sortby = 'Count'
print p
def dump_summary(info, label):
print "-- %s by operation by cell by platform --" % (label,)
p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count"])
total = 0
op_totals = {}
cell_totals = {}
platform_totals = {}
for key, count in info.iteritems():
operation, platform, cell = key
readable = image_type.readable(platform)
text = "n/a"
if readable:
text = ", ".join(readable)
op_totals[operation] = op_totals.get(operation, 0) + count
cell_totals[cell] = cell_totals.get(cell, 0) + count
platform_totals[text] = platform_totals.get(text, 0) + count
p.add_row([operation, cell, text, count])
total += count
p.sortby = 'Count'
print p
dump_breakdown(op_totals, "Total %s by Operation" % label)
dump_breakdown(cell_totals, "Total %s by Cell" % label)
dump_breakdown(platform_totals, "Total %s by Platform" % label)
print
return total
print
print "SUMMARY"
print
good = dump_summary(successes, "Success")
bad = dump_summary(failures, "Failures")
print "====================================================="
print "Total Success: %d Total Failure: %d" % (good, bad)
print
print "-- Errors by Tenant --"
p = prettytable.PrettyTable(["Tenant", "Count"])
for tenant, count in tenant_issues.iteritems():
p.add_row([tenant, count])
p.sortby = 'Count'
print p
print
print "-- Return code counts --"
p = prettytable.PrettyTable(["Return Code", "Count"])
for k, v in codes.iteritems():
p.add_row([k, v])
p.sortby = 'Count'
print p
print
print "-- Cause breakdown --"
p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"])
for cause_key, count in causes.iteritems():
key, cause = cause_key
operation, platform, cell = key
readable = image_type.readable(platform)
text = "n/a"
if readable:
text = ", ".join(readable)
print "%s on %s = %d" % (operation, text, count)
p.add_row([cause, operation, cell, text, count])
p.sortby = 'Count'
print p
print "-- Errors by Tenant --"
for tenant, count in tenant_issues.iteritems():
print "T %s = %d" % (tenant, count)
print
print "-- Error Message Counts --"
p = prettytable.PrettyTable(["Count", "Message"])
for k, v in error_messages.iteritems():
p.add_row([v, k[:80]])
p.sortby = 'Count'
print p
print "-- Return code counts --"
for k, v in codes.iteritems():
print k, v

58
stacktach/image_type.py Normal file
View File

@ -0,0 +1,58 @@
BASE_IMAGE = 0x1
SNAPSHOT_IMAGE = 0x2
LINUX_IMAGE = 0x10
OS_UBUNTU = 0x100
OS_DEBIAN = 0x200
OS_CENTOS = 0x400
OS_RHEL = 0x800
def isset(num, flag):
return num & flag > 0
flags = {'base' : BASE_IMAGE,
'snapshot' : SNAPSHOT_IMAGE,
'linux' : LINUX_IMAGE,
'ubuntu' : OS_UBUNTU,
'debian' : OS_DEBIAN,
'centos' : OS_CENTOS,
'rhel' : OS_RHEL}
def readable(num):
result = []
for k, v in flags.iteritems():
if isset(num, v):
result.append(k)
return result
def get_numeric_code(payload, default=0):
meta = payload.get('image_meta', {})
if default == None:
default = 0
num = default
image_type = meta.get('image_type', '')
if image_type == 'base':
num |= BASE_IMAGE
if image_type == 'snapshot':
num |= SNAPSHOT_IMAGE
os_type = meta.get('os_type', '')
if os_type == 'linux':
num |= LINUX_IMAGE
os_distro = meta.get('os_distro', '')
if os_distro == 'ubuntu':
num |= OS_UBUNTU
if os_distro == 'debian':
num |= OS_DEBIAN
if os_distro == 'centos':
num |= OS_CENTOS
if os_distro == 'rhel':
num |= OS_RHEL
return num