From c1e8e305e1ae67766b12e8e87b5ad26230f14ee7 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 11 Feb 2013 15:10:34 -0600 Subject: [PATCH] requests working --- migrations/001_base.sql | 61 ++++++++++ migrations/002_task.sql | 62 ++++++++++ migrations/002_task_indexes.sql | 31 +++++ migrations/003_populate_task_and_image.py | 63 ++++++++++ reports/requests.py | 140 ++++++++++++++++++++++ stacktach/datetime_to_decimal.py | 2 + stacktach/models.py | 3 + stacktach/views.py | 15 ++- worker/worker.py | 30 +++-- 9 files changed, 391 insertions(+), 16 deletions(-) create mode 100644 migrations/001_base.sql create mode 100644 migrations/002_task.sql create mode 100644 migrations/002_task_indexes.sql create mode 100644 migrations/003_populate_task_and_image.py create mode 100644 reports/requests.py diff --git a/migrations/001_base.sql b/migrations/001_base.sql new file mode 100644 index 0000000..5f81c41 --- /dev/null +++ b/migrations/001_base.sql @@ -0,0 +1,61 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/002_task.sql b/migrations/002_task.sql new file mode 100644 index 0000000..be9a43b --- /dev/null +++ b/migrations/002_task.sql @@ -0,0 +1,62 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `task` varchar(30), + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/002_task_indexes.sql b/migrations/002_task_indexes.sql new file mode 100644 index 0000000..69a7c0f --- /dev/null +++ b/migrations/002_task_indexes.sql @@ -0,0 +1,31 @@ +BEGIN; +CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`); +CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`); +CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`); +CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`); +CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`); +CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); +CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`); +CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`); +CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`); +CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`); +CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`); +CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`); +CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`); +CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`); +CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`); +CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`); +CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`); +CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`); +CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`); +CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`); +CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`); +CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`); +CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`); +CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`); +CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`); +CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`); +CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`); +CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`); +COMMIT; diff --git a/migrations/003_populate_task_and_image.py b/migrations/003_populate_task_and_image.py new file mode 100644 index 0000000..01b6619 --- /dev/null +++ b/migrations/003_populate_task_and_image.py @@ -0,0 +1,63 @@ +import datetime +import json +import sys + +sys.path.append("/stacktach") + +from stacktach import datetime_to_decimal as dt +from stacktach import image_type +from stacktach import models + + +if __name__ != '__main__': + sys.exit(1) + +states = {} + + +def fix_chunk(hours, length): + now = datetime.datetime.utcnow() + start = now - datetime.timedelta(hours=hours+length) + end = now - datetime.timedelta(hours=hours) + dstart = dt.dt_to_decimal(start) + dend = dt.dt_to_decimal(end) + + done = 0 + updated = 0 + block = 0 + print "Hours ago (%d to %d) %d - %d" % (hours + length, hours, dstart, dend) + updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .only('task', 'image_type', 'json') + + for raw in updates: + queue, body = json.loads(raw.json) + payload = body.get('payload', {}) + task = payload.get('new_task_state', None) + + if task != None and task != 'None': + states[task] = states.get(task, 0) + 1 + raw.task = task + + image_type_num = image_type.get_numeric_code(payload) + updated += 1 + raw.save() + + done += 1 + if done >= 10000: + block += 1 + done = 0 + print "# 10k blocks processed: %d (events %d)" % \ + (block, updated) + updated = 0 + + for kv in states.iteritems(): + print "%s = %d" % kv + +for day in xrange(2, 90): + hours = day * 24 + steps = 12 + chunk = 24 / steps + for x in xrange(steps): + fix_chunk(hours, chunk) + hours += chunk diff --git a/reports/requests.py b/reports/requests.py new file mode 100644 index 0000000..b12ce9c --- /dev/null +++ b/reports/requests.py @@ -0,0 +1,140 @@ +import datetime +import json +import sys + +sys.path.append("/stacktach") + +from stacktach import datetime_to_decimal as dt +from stacktach import image_type +from stacktach import models + + +if __name__ != '__main__': + sys.exit(1) + +hours = 0 +length = 24 + +now = datetime.datetime.utcnow() +start = now - datetime.timedelta(hours=hours+length) +end = now - datetime.timedelta(hours=hours) + +dnow = dt.dt_to_decimal(now) +dstart = dt.dt_to_decimal(start) +dend = dt.dt_to_decimal(end) + +codes = {} + +# Get all the instances that have changed in the last N hours ... +updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .values('instance').distinct() + +expiry = 60 * 60 # 1 hour +cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] + +failures = {} +tenant_issues = {} + +for uuid_dict in updates: + uuid = uuid_dict['instance'] + + # All the unique Request ID's for this instance during that timespan. + reqs = models.RawData.objects.filter(instance=uuid, + when__gt=dstart, when__lte=dend) \ + .values('request_id').distinct() + + + for req_dict in reqs: + report = False + req = req_dict['request_id'] + raws = models.RawData.objects.filter(request_id=req)\ + .exclude(event='compute.instance.exists')\ + .order_by('when') + + start = None + err = None + + operation = None + platform = 0 + tenant = 0 + dump = False + + for raw in raws: + if not start: + start = raw.when + if 'error' in raw.routing_key: + err = raw + report = True + + if raw.tenant: + if tenant > 0 and raw.tenant != tenant: + print "Conflicting tenant ID", raw.tenant, tenant + tenant = raw.tenant + + for cmd in cmds: + if cmd in raw.event: + operation = cmd + break + + if raw.image_type > 0: + platform = raw.image_type + + if dump: + print " %s %s T:%s %s %s %s %s %s"\ + % (raw.id, raw.routing_key, raw.tenant, + raw.service, raw.host, raw.deployment.name, + raw.event, dt.dt_from_decimal(raw.when)) + if raw.event == 'compute.instance.update': + print " State: %s->%s, Task %s->%s" % \ + (raw.old_state, raw.state, raw.old_task, raw.task) + + if not start: + continue + + end = raw.when + diff = end - start + + if diff > 3600: + report = True + + if report: + print "------", uuid, "----------" + print " Req:", req + print " Duration: %.2f minutes" % (diff / 60) + print " Operation:", operation + print " Platform:", image_type.readable(platform) + key = (operation, platform) + failures[key] = failures.get(key, 0) + 1 + tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1 + + if err: + queue, body = json.loads(err.json) + payload = body['payload'] + print "Error. EventID: %s, Tenant %s, Service %s, Host %s, "\ + "Deployment %s, Event %s, When %s"\ + % (err.id, err.tenant, err.service, err.host, err.deployment.name, + err.event, dt.dt_from_decimal(err.when)) + exc = payload.get('exception') + if exc: + print exc + code = exc.get('kwargs', {}).get('code') + if code: + codes[code] = codes.get(code, 0) + 1 + +print "-- Failures by operation by platform --" +for failure, count in failures.iteritems(): + operation, platform = failure + readable = image_type.readable(platform) + text = "n/a" + if readable: + text = ", ".join(readable) + print "%s on %s = %d" % (operation, text, count) + +print "-- Errors by Tenant --" +for tenant, count in tenant_issues.iteritems(): + print "T %s = %d" % (tenant, count) + +print "-- Return code counts --" +for k, v in codes.iteritems(): + print k, v diff --git a/stacktach/datetime_to_decimal.py b/stacktach/datetime_to_decimal.py index 96c8ea3..1781cb0 100644 --- a/stacktach/datetime_to_decimal.py +++ b/stacktach/datetime_to_decimal.py @@ -12,6 +12,8 @@ def dt_to_decimal(utc): def dt_from_decimal(dec): + if dec == None: + return "n/a" integer = int(dec) micro = (dec - decimal.Decimal(integer)) * decimal.Decimal(1000000) diff --git a/stacktach/models.py b/stacktach/models.py index 9fde6b2..38acaec 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -38,6 +38,9 @@ class RawData(models.Model): blank=True, db_index=True) old_task = models.CharField(max_length=30, null=True, blank=True, db_index=True) + task = models.CharField(max_length=30, null=True, + blank=True, db_index=True) + image_type = models.IntegerField(null=True, default=0, db_index=True) when = models.DecimalField(max_digits=20, decimal_places=6, db_index=True) publisher = models.CharField(max_length=100, null=True, diff --git a/stacktach/views.py b/stacktach/views.py index c01a922..b2ac66b 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -1,23 +1,26 @@ # Copyright 2012 - Dark Secret Software Inc. +import datetime +import json +import pprint + from django import db from django import http from django.shortcuts import render_to_response from django import template -from stacktach import models from stacktach import datetime_to_decimal as dt - -import datetime -import json -import pprint +from stacktach import image_type +from stacktach import models def _extract_states(payload): return { 'state' : payload.get('state', ""), 'old_state' : payload.get('old_state', ""), - 'old_task' : payload.get('old_task_state', "") + 'old_task' : payload.get('old_task_state', ""), + 'task' : payload.get('new_task_state', ""), + 'image_type' : image_type.get_numeric_code(payload) } diff --git a/worker/worker.py b/worker/worker.py index 9a3165f..ae16b04 100644 --- a/worker/worker.py +++ b/worker/worker.py @@ -23,6 +23,7 @@ import kombu.connection import kombu.entity import kombu.mixins import logging +import sys import time from pympler.process import ProcessMemoryInfo @@ -35,6 +36,8 @@ LOG = logging.getLogger(__name__) LOG.setLevel(logging.DEBUG) handler = logging.handlers.TimedRotatingFileHandler('worker.log', when='h', interval=6, backupCount=4) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) LOG.addHandler(handler) @@ -138,13 +141,20 @@ def run(deployment_config): virtual_host=virtual_host) while True: - LOG.debug("Processing on '%s'" % name) - with kombu.connection.BrokerConnection(**params) as conn: - try: - consumer = NovaConsumer(name, conn, deployment, durable) - consumer.run() - except Exception as e: - LOG.exception("name=%s, exception=%s. Reconnecting in 5s" % - (name, e)) - time.sleep(5) - LOG.debug("Completed processing on '%s'" % name) + try: + LOG.debug("Processing on '%s'" % name) + with kombu.connection.BrokerConnection(**params) as conn: + try: + consumer = NovaConsumer(name, conn, deployment, durable) + consumer.run() + except Exception as e: + LOG.exception("name=%s, exception=%s. Reconnecting in 5s" % + (name, e)) + time.sleep(5) + LOG.debug("Completed processing on '%s'" % name) + except: + e = sys.exc_info()[0] + msg = "Uncaught exception: deployment=%s, exception=%s. Retrying in 5s" + LOG.exception(msg % (name, e)) + time.sleep(5) +