clean up
This commit is contained in:
parent
8ecd7a4a81
commit
93a0723544
@ -17,6 +17,16 @@ if __name__ != '__main__':
|
|||||||
|
|
||||||
# To mask unique identifiers for categorizing notifications
|
# To mask unique identifiers for categorizing notifications
|
||||||
def mask_msg(text):
|
def mask_msg(text):
|
||||||
|
# Needs order because of how precedent effects masking.
|
||||||
|
#
|
||||||
|
# Example: REQ_ID has a UUID in it, but the meaning is different
|
||||||
|
# in this context, so best to grab those first.
|
||||||
|
#
|
||||||
|
# LG_NUM usually represents a memory size; with the number of flavors
|
||||||
|
# this can create a lot of noise.
|
||||||
|
#
|
||||||
|
# The intent is to remove noise from unimportant subtleties
|
||||||
|
|
||||||
masking_regex = (
|
masking_regex = (
|
||||||
(1, 'REQ_ID',
|
(1, 'REQ_ID',
|
||||||
r"req-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
|
r"req-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
|
||||||
@ -117,14 +127,21 @@ if __name__ == '__main__':
|
|||||||
day=yesterday.day)
|
day=yesterday.day)
|
||||||
end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59)
|
end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59)
|
||||||
|
|
||||||
|
deployments = {}
|
||||||
|
|
||||||
instance_map = {} # { uuid : [request_id, request_id, ...] }
|
instance_map = {} # { uuid : [request_id, request_id, ...] }
|
||||||
exception_counts = {} # { exception_message : count }
|
exception_counts = {} # { exception_message : count }
|
||||||
event_counts = {} # { event_name : count }
|
event_counts = {} # { event_name : count }
|
||||||
metadata = {'report_format': 'json',
|
tenant_issues = {}
|
||||||
'instances': instance_map,
|
codes = {}
|
||||||
'exception_counts': exception_counts,
|
metadata = {
|
||||||
'event_counts': event_counts
|
'report_format': 'json',
|
||||||
}
|
'instances': instance_map,
|
||||||
|
'exception_counts': exception_counts,
|
||||||
|
'event_counts': event_counts,
|
||||||
|
'tenant_issues': tenant_issues,
|
||||||
|
'codes': codes,
|
||||||
|
}
|
||||||
|
|
||||||
# Tell Stacky to format as JSON and set placeholders for various summaries
|
# Tell Stacky to format as JSON and set placeholders for various summaries
|
||||||
report = [metadata]
|
report = [metadata]
|
||||||
@ -132,8 +149,6 @@ if __name__ == '__main__':
|
|||||||
dstart = dt.dt_to_decimal(start)
|
dstart = dt.dt_to_decimal(start)
|
||||||
dend = dt.dt_to_decimal(end)
|
dend = dt.dt_to_decimal(end)
|
||||||
|
|
||||||
codes = {}
|
|
||||||
deployments = {}
|
|
||||||
for deploy in models.Deployment.objects.all():
|
for deploy in models.Deployment.objects.all():
|
||||||
deployments[deploy.id] = deploy.name
|
deployments[deploy.id] = deploy.name
|
||||||
|
|
||||||
@ -145,12 +160,6 @@ if __name__ == '__main__':
|
|||||||
expiry = 60 * 60 # 1 hour
|
expiry = 60 * 60 # 1 hour
|
||||||
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
|
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
|
||||||
|
|
||||||
failures = {}
|
|
||||||
causes = {}
|
|
||||||
durations = {}
|
|
||||||
successes = {}
|
|
||||||
tenant_issues = {}
|
|
||||||
|
|
||||||
for uuid_dict in updates:
|
for uuid_dict in updates:
|
||||||
uuid = uuid_dict['instance']
|
uuid = uuid_dict['instance']
|
||||||
|
|
||||||
@ -224,42 +233,24 @@ if __name__ == '__main__':
|
|||||||
if not _start:
|
if not _start:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image = "?"
|
|
||||||
if image_type.isset(image_type_num, image_type.BASE_IMAGE):
|
|
||||||
image = "base"
|
|
||||||
if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
|
|
||||||
image = "snap"
|
|
||||||
|
|
||||||
_end = _when
|
_end = _when
|
||||||
diff = _end - _start
|
diff = _end - _start
|
||||||
|
|
||||||
if diff > 3600 and failure_type is None:
|
if diff > 1800 and failure_type is None:
|
||||||
failure_type = ">60"
|
failure_type = ">30"
|
||||||
|
|
||||||
key = (operation, image_type_num, cell)
|
if failure_type:
|
||||||
|
key = (operation, image_type_num, cell)
|
||||||
# Track durations for all attempts, good and bad ...
|
|
||||||
duration_min, duration_max, duration_count, duration_total = \
|
|
||||||
durations.get(key, (9999999, 0, 0, 0))
|
|
||||||
duration_min = min(duration_min, diff)
|
|
||||||
duration_max = max(duration_max, diff)
|
|
||||||
duration_count += 1
|
|
||||||
duration_total += diff
|
|
||||||
durations[key] = (duration_min, duration_max, duration_count,
|
|
||||||
duration_total)
|
|
||||||
|
|
||||||
if not failure_type:
|
|
||||||
successes[key] = successes.get(key, 0) + 1
|
|
||||||
else:
|
|
||||||
failed_request = {}
|
failed_request = {}
|
||||||
message = [] # For exception message masking
|
message = [] # For exception message masking
|
||||||
req_list.append(req)
|
req_list.append(req)
|
||||||
instance_map[uuid] = req_list
|
instance_map[uuid] = req_list
|
||||||
failed_request['req'] = req
|
failed_request['req'] = req
|
||||||
|
failed_request['uuid'] = uuid
|
||||||
|
failed_request['tenant'] = tenant
|
||||||
failed_request['duration'] = "%.2f minutes" % (diff/60)
|
failed_request['duration'] = "%.2f minutes" % (diff/60)
|
||||||
failed_request['operation'] = operation
|
failed_request['operation'] = operation
|
||||||
failed_request['platform'] = image_type.readable(image_type_num)
|
failed_request['platform'] = image_type.readable(image_type_num)
|
||||||
failures[key] = failures.get(key, 0) + 1
|
|
||||||
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
|
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
|
||||||
|
|
||||||
if err_id:
|
if err_id:
|
||||||
@ -296,12 +287,12 @@ if __name__ == '__main__':
|
|||||||
codes[code] = codes.get(code, 0) + 1
|
codes[code] = codes.get(code, 0) + 1
|
||||||
failure_type = code
|
failure_type = code
|
||||||
failed_request['failure_type'] = failure_type
|
failed_request['failure_type'] = failure_type
|
||||||
|
|
||||||
raws = models.RawData.objects.filter(request_id=req)\
|
raws = models.RawData.objects.filter(request_id=req)\
|
||||||
.exclude(event='compute.instance.exists')\
|
.exclude(event='compute.instance.exists')\
|
||||||
.order_by('when')
|
.order_by('when')
|
||||||
|
|
||||||
failed_request['details'] = []
|
failed_request['details'] = []
|
||||||
|
|
||||||
for raw in raws:
|
for raw in raws:
|
||||||
failure_detail = {}
|
failure_detail = {}
|
||||||
failure_detail['host'] = raw.host
|
failure_detail['host'] = raw.host
|
||||||
@ -310,13 +301,11 @@ if __name__ == '__main__':
|
|||||||
failure_detail['state'] = raw.state
|
failure_detail['state'] = raw.state
|
||||||
failure_detail['old_task'] = raw.old_task
|
failure_detail['old_task'] = raw.old_task
|
||||||
failure_detail['task'] = raw.task
|
failure_detail['task'] = raw.task
|
||||||
|
|
||||||
failed_request['details'].append(failure_detail)
|
failed_request['details'].append(failure_detail)
|
||||||
|
|
||||||
report.append(failed_request)
|
report.append(failed_request)
|
||||||
|
|
||||||
cause_key = (key, failure_type)
|
|
||||||
causes[cause_key] = causes.get(cause_key, 0) + 1
|
|
||||||
|
|
||||||
# Assign values to store in DB
|
# Assign values to store in DB
|
||||||
values = {'json': json.dumps(report),
|
values = {'json': json.dumps(report),
|
||||||
'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
|
'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
|
||||||
|
@ -16,17 +16,16 @@ from stacktach import models
|
|||||||
def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
||||||
store=False, region=None, too_long=1800):
|
store=False, region=None, too_long=1800):
|
||||||
if not yesterday:
|
if not yesterday:
|
||||||
yesterday = datetime.datetime.utcnow().date() - \
|
yesterday = datetime.datetime.utcnow().date() -\
|
||||||
datetime.timedelta(days=1)
|
datetime.timedelta(days=1)
|
||||||
|
|
||||||
rstart = datetime.datetime(year=yesterday.year, month=yesterday.month,
|
rstart = datetime.datetime(year=yesterday.year, month=yesterday.month,
|
||||||
day=yesterday.day, hour=start_hour)
|
day=yesterday.day, hour=start_hour)
|
||||||
rend = rstart + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
|
rend = rstart + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
|
||||||
|
|
||||||
dstart = dt.dt_to_decimal(rstart)
|
dstart = dt.dt_to_decimal(rstart)
|
||||||
dend = dt.dt_to_decimal(rend)
|
dend = dt.dt_to_decimal(rend)
|
||||||
|
|
||||||
codes = {}
|
|
||||||
too_long_col = '> %d' % (too_long / 60)
|
too_long_col = '> %d' % (too_long / 60)
|
||||||
|
|
||||||
cells = []
|
cells = []
|
||||||
@ -87,11 +86,12 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
|||||||
err = raw
|
err = raw
|
||||||
failure_type = 'http'
|
failure_type = 'http'
|
||||||
|
|
||||||
if raw.old_state != 'error' and raw.state == 'error':
|
if failure_type != 'state' and raw.old_state != 'error'\
|
||||||
|
and raw.state == 'error':
|
||||||
failure_type = 'state'
|
failure_type = 'state'
|
||||||
|
|
||||||
if raw.old_state == 'error' and \
|
if raw.old_state == 'error' and \
|
||||||
(not raw.state in ['deleted', 'error']):
|
(not raw.state in ['deleted', 'error']):
|
||||||
failure_type = None
|
failure_type = None
|
||||||
|
|
||||||
for cmd in cmds:
|
for cmd in cmds:
|
||||||
@ -110,7 +110,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
|||||||
image = "snap"
|
image = "snap"
|
||||||
|
|
||||||
#Get os_type from image_type bit field
|
#Get os_type from image_type bit field
|
||||||
os_type = "other"
|
os_type = "?"
|
||||||
if image_type.isset(image_type_num, image_type.LINUX_IMAGE):
|
if image_type.isset(image_type_num, image_type.LINUX_IMAGE):
|
||||||
os_type = "linux"
|
os_type = "linux"
|
||||||
if image_type.isset(image_type_num, image_type.WINDOWS_IMAGE):
|
if image_type.isset(image_type_num, image_type.WINDOWS_IMAGE):
|
||||||
@ -122,7 +122,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
|||||||
end = raw.when
|
end = raw.when
|
||||||
diff = end - start
|
diff = end - start
|
||||||
|
|
||||||
if diff > too_long and failure_type == None:
|
if diff > too_long and failure_type is None:
|
||||||
failure_type = too_long_col
|
failure_type = too_long_col
|
||||||
|
|
||||||
key = (operation, image, os_type)
|
key = (operation, image, os_type)
|
||||||
@ -158,7 +158,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
|||||||
report.append(details)
|
report.append(details)
|
||||||
|
|
||||||
failure_types = ["4xx", "5xx", too_long_col, "state"]
|
failure_types = ["4xx", "5xx", too_long_col, "state"]
|
||||||
cols = ["Operation", "Image", "OS", "Min", "Max", "Med", "%d%%" % percentile,
|
cols = ["Operation", "Image", "OS Type", "Min", "Max", "Med", "%d%%" % percentile,
|
||||||
"Requests"]
|
"Requests"]
|
||||||
for failure_type in failure_types:
|
for failure_type in failure_types:
|
||||||
cols.append("%s" % failure_type)
|
cols.append("%s" % failure_type)
|
||||||
@ -178,7 +178,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
|
|||||||
# Sum for grand totals.
|
# Sum for grand totals.
|
||||||
failure_count = breakdown.get(failure_type, 0)
|
failure_count = breakdown.get(failure_type, 0)
|
||||||
failure_totals[failure_type] = \
|
failure_totals[failure_type] = \
|
||||||
failure_totals.get(failure_type, 0) + failure_count
|
failure_totals.get(failure_type, 0) + failure_count
|
||||||
|
|
||||||
# Failure percentage for this attempt.
|
# Failure percentage for this attempt.
|
||||||
percentage = float(failure_count) / float(count)
|
percentage = float(failure_count) / float(count)
|
||||||
@ -231,9 +231,9 @@ def valid_date(date):
|
|||||||
try:
|
try:
|
||||||
t = time.strptime(date, "%Y-%m-%d")
|
t = time.strptime(date, "%Y-%m-%d")
|
||||||
return datetime.datetime(*t[:6])
|
return datetime.datetime(*t[:6])
|
||||||
except Exception, e:
|
except Exception:
|
||||||
raise argparse.ArgumentTypeError(
|
raise argparse.ArgumentTypeError(
|
||||||
"'%s' is not in YYYY-MM-DD format." % date)
|
"'%s' is not in YYYY-MM-DD format." % date)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user