From eaf0fe7d112a6ea4046af1b460d52c05b68a291a Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Wed, 7 Nov 2012 11:57:26 -0400 Subject: [PATCH 1/5] working on incremental kpi --- stacktach/models.py | 10 ++++++++++ stacktach/stacky_server.py | 2 +- stacktach/views.py | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/stacktach/models.py b/stacktach/models.py index 7a536c3..1922d93 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -76,3 +76,13 @@ class Timing(models.Model): end_when = models.DecimalField(null=True, max_digits=20, decimal_places=6) diff = models.DecimalField(null=True, max_digits=20, decimal_places=6) + + +class RequestTracker(models.Model): + request_id = models.CharField(max_length=50, db_index=True) + + +class RequestEvents(models.Model): + request_tracker = models.ForeignKey(RequestTracker) + raw = models.ForeignKey(RawData, related_name='+') + when = models.DecimalField(max_digits=20, decimal_places=6) diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 037b75d..07edd2e 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -280,7 +280,7 @@ def do_watch(request, deployment_id): def do_kpi(request): - yesterday = datetime.datetime.now() - datetime.timedelta(days=1) + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) events = models.RawData.objects.exclude(instance=None) \ .exclude(when__lt=yesterday) \ diff --git a/stacktach/views.py b/stacktach/views.py index e4684e7..5346f9f 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -74,6 +74,20 @@ HANDLERS = {'monitor.info':_monitor_message, '':_compute_update_message} +def kpi(raw): + if raw.event == "compute.instance.update": + if "api" in e.host: + activities = instance_map.get(e.instance, []) + activities.append((e.request_id, e, None)) + instance_map[e.instance] = activities + return + + if not e.event.endswith(".end"): + return + + + + def aggregate(raw): """Roll up the raw event into a Lifecycle object and a bunch of Timing objects. From 9dd54679edd1fbd73bc27540daf38c440bf42cfd Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Wed, 7 Nov 2012 12:19:01 -0400 Subject: [PATCH 2/5] brb --- stacktach/views.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stacktach/views.py b/stacktach/views.py index 5346f9f..34ed085 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -76,11 +76,10 @@ HANDLERS = {'monitor.info':_monitor_message, def kpi(raw): if raw.event == "compute.instance.update": - if "api" in e.host: - activities = instance_map.get(e.instance, []) - activities.append((e.request_id, e, None)) - instance_map[e.instance] = activities - return + if "api" in raw.host: + tracker = models.RequestTracker(request_id=raw.request_id) + tracker.save() + return if not e.event.endswith(".end"): return From 65eae46ac3d247963966eec633afa66ddc797660 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Wed, 7 Nov 2012 16:36:12 -0400 Subject: [PATCH 3/5] working on incremental kpi update --- stacktach/models.py | 23 +++++++++++---- stacktach/stacky_server.py | 60 +++++++------------------------------- stacktach/views.py | 49 +++++++++++++++++++++++++------ 3 files changed, 68 insertions(+), 64 deletions(-) diff --git a/stacktach/models.py b/stacktach/models.py index 1922d93..f09e3ff 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -57,6 +57,12 @@ class RawData(models.Model): class Lifecycle(models.Model): + """The Lifecycle table is the Master for a group of + Timing detail records. There is one Lifecycle row for + each instance seen in the event stream. The Timings + relate to the execution time for each .start/.end event + pair for this instance. These pairs are over the entire + lifespan of the instance, even across multiple api requests.""" instance = models.CharField(max_length=50, null=True, blank=True, db_index=True) last_state = models.CharField(max_length=50, null=True, @@ -67,6 +73,9 @@ class Lifecycle(models.Model): class Timing(models.Model): + """Each Timing record corresponds to a .start/.end event pair + for an instance. It tracks how long it took this operation + to execute.""" name = models.CharField(max_length=50, db_index=True) lifecycle = models.ForeignKey(Lifecycle) start_raw = models.ForeignKey(RawData, related_name='+', null=True) @@ -79,10 +88,14 @@ class Timing(models.Model): class RequestTracker(models.Model): + """The RequestTracker table tracks the elapsed time of a user + request from the time it hits the API node to the time of the + final .end event (with the same Request ID).""" request_id = models.CharField(max_length=50, db_index=True) + lifecycle = models.ForeignKey(Lifecycle) + last_timing = models.ForeignKey(Timing, null=true) + start = models.DecimalField(max_digits=20, decimal_places=6) + duration = models.DecimalField(max_digits=20, decimal_places=6) - -class RequestEvents(models.Model): - request_tracker = models.ForeignKey(RequestTracker) - raw = models.ForeignKey(RawData, related_name='+') - when = models.DecimalField(max_digits=20, decimal_places=6) + # Not used ... but soon hopefully. + completed = models.BooleanField(default=False) diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 07edd2e..127d400 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -281,57 +281,17 @@ def do_watch(request, deployment_id): def do_kpi(request): yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) - - events = models.RawData.objects.exclude(instance=None) \ - .exclude(when__lt=yesterday) \ - .filter(Q(event__endswith='.end') | - Q(event="compute.instance.update")) \ - .only('event', 'host', 'request_id', - 'instance', 'deployment') \ - .order_by('when') - - events = list(events) - instance_map = {} # { uuid: [(request_id, start_event, end_event), ...] } - - for e in events: - if e.event == "compute.instance.update": - if "api" in e.host: - activities = instance_map.get(e.instance, []) - activities.append((e.request_id, e, None)) - instance_map[e.instance] = activities - continue - - if not e.event.endswith(".end"): - continue - - activities = instance_map.get(e.instance) - if not activities: - # We missed the api start, skip it - continue - - found = False - for index, a in enumerate(activities): - request_id, start_event, end_event = a - #if end_event is not None: - # continue - - if request_id == e.request_id: - end_event = e - activities[index] = (request_id, start_event, e) - found = True - break + trackers = models.RequestTracker.objects.select_related() \ + .exclude(last_timing=None) \ + .exclude(start__lt=yesterday) \ + .order_by('duration') results = [] results.append(["Event", "Time", "UUID", "Deployment"]) - for uuid, activities in instance_map.iteritems(): - for request_id, start_event, end_event in activities: - if not end_event: - continue - event = end_event.event[:-len(".end")] - start = dt.dt_from_decimal(start_event.when) - end = dt.dt_from_decimal(end_event.when) - diff = end - start - results.append([event, sec_to_time(seconds_from_timedelta( - diff.days, diff.seconds, diff.microseconds)), uuid, - end_event.deployment.name]) + for track in trackers: + end_event = track.last_timing.end_raw + event = end_event.event[:-len(".end")] + uuid = track.lifecycle.instance + results.append([event, sec_to_time(track.duration), + uuid, end_event.deployment.name]) return rsp(results) diff --git a/stacktach/views.py b/stacktach/views.py index 34ed085..042ae0b 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -74,17 +74,42 @@ HANDLERS = {'monitor.info':_monitor_message, '':_compute_update_message} -def kpi(raw): - if raw.event == "compute.instance.update": - if "api" in raw.host: - tracker = models.RequestTracker(request_id=raw.request_id) - tracker.save() - return - - if not e.event.endswith(".end"): +def start_kpi_tracking(lifecycle, raw): + """Start the clock for kpi timings when we see an instance.update + coming in from an api node.""" + if raw.event != "compute.instance.update": return + if "api" not in raw.host: + return + tracker = models.RequestTracker(request_id=raw.request_id, + start=raw.when, + lifecycle=lifecycle, + last_timing=None, + duration=0.0) + tracker.save() + + +def update_kpi(lifecycle, timing, raw): + """Whenever we get a .end event, use the Timing object to + compute our current end-to-end duration. + + Note: it may not be completely accurate if the operation is + still in-process, but we have no way of knowing it's still + in-process without mapping the original command with the + expected .end event (that's a whole other thing) + + Until then, we'll take the lazy route and be aware of these + potential fence-post issues.""" + trackers = models.RequestTracker.objects.filter(request_id=raw.request.id) + if len(trackers) == 0: + return + + tracker = trackers[0] + tracker.last_timing = timing + tracker.duration = timing.end_when - tracker.start + tracker.save() def aggregate(raw): @@ -92,6 +117,9 @@ def aggregate(raw): and a bunch of Timing objects. We can use this for summarized timing reports. + + Additionally, we can use this processing to give + us end-to-end user request timings for kpi reports. """ if not raw.instance: @@ -117,6 +145,8 @@ def aggregate(raw): name = '.'.join(parts[:-1]) if not step in ['start', 'end']: + # Perhaps it's an operation initiated in the API? + start_kpi_tracking(lifecyle, raw) return # We are going to try to track every event pair that comes @@ -164,9 +194,10 @@ def aggregate(raw): # We could have missed start so watch out ... if timing.start_when: timing.diff = timing.end_when - timing.start_when + # Looks like a valid pair ... + update_kpi(lifecycle, timing, raw) timing.save() - def process_raw_data(deployment, args, json_args): """This is called directly by the worker to add the event to the db.""" db.reset_queries() From 03c5a51527f8d5ceef6287e5ca2cfd0f3c4f4092 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 19 Nov 2012 11:14:52 -0400 Subject: [PATCH 4/5] fixed up a bunch of little gotchas. kpi working --- settings.py | 2 +- stacktach/models.py | 5 +++-- stacktach/stacky_server.py | 1 + stacktach/views.py | 7 ++++--- worker/stacktach.sh | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/settings.py b/settings.py index 05068b6..a972fb5 100644 --- a/settings.py +++ b/settings.py @@ -16,7 +16,7 @@ except ImportError: db_password = os.environ['STACKTACH_DB_PASSWORD'] install_dir = os.environ['STACKTACH_INSTALL_DIR'] -DEBUG = False +DEBUG = True TEMPLATE_DEBUG = DEBUG ADMINS = ( diff --git a/stacktach/models.py b/stacktach/models.py index f09e3ff..2a5310b 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -81,7 +81,8 @@ class Timing(models.Model): start_raw = models.ForeignKey(RawData, related_name='+', null=True) end_raw = models.ForeignKey(RawData, related_name='+', null=True) - start_when = models.DecimalField(null=True, max_digits=20, decimal_places=6) + start_when = models.DecimalField(null=True, max_digits=20, + decimal_places=6) end_when = models.DecimalField(null=True, max_digits=20, decimal_places=6) diff = models.DecimalField(null=True, max_digits=20, decimal_places=6) @@ -93,7 +94,7 @@ class RequestTracker(models.Model): final .end event (with the same Request ID).""" request_id = models.CharField(max_length=50, db_index=True) lifecycle = models.ForeignKey(Lifecycle) - last_timing = models.ForeignKey(Timing, null=true) + last_timing = models.ForeignKey(Timing, null=True) start = models.DecimalField(max_digits=20, decimal_places=6) duration = models.DecimalField(max_digits=20, decimal_places=6) diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 127d400..4daeaba 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -281,6 +281,7 @@ def do_watch(request, deployment_id): def do_kpi(request): yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + yesterday = dt.dt_to_decimal(yesterday) trackers = models.RequestTracker.objects.select_related() \ .exclude(last_timing=None) \ .exclude(start__lt=yesterday) \ diff --git a/stacktach/views.py b/stacktach/views.py index 042ae0b..f71cecc 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -87,7 +87,7 @@ def start_kpi_tracking(lifecycle, raw): start=raw.when, lifecycle=lifecycle, last_timing=None, - duration=0.0) + duration=str(0.0)) tracker.save() @@ -102,7 +102,8 @@ def update_kpi(lifecycle, timing, raw): Until then, we'll take the lazy route and be aware of these potential fence-post issues.""" - trackers = models.RequestTracker.objects.filter(request_id=raw.request.id) + trackers = models.RequestTracker.objects.\ + filter(request_id=raw.request_id) if len(trackers) == 0: return @@ -146,7 +147,7 @@ def aggregate(raw): if not step in ['start', 'end']: # Perhaps it's an operation initiated in the API? - start_kpi_tracking(lifecyle, raw) + start_kpi_tracking(lifecycle, raw) return # We are going to try to track every event pair that comes diff --git a/worker/stacktach.sh b/worker/stacktach.sh index be51937..e048438 100755 --- a/worker/stacktach.sh +++ b/worker/stacktach.sh @@ -1,6 +1,6 @@ #!/bin/bash -WORKDIR=/srv/www/stacktach/django/stproject/ +WORKDIR=/srv/www/stacktach/app DAEMON=/usr/bin/python ARGS=$WORKDIR/worker/start_workers.py PIDFILE=/var/run/stacktach.pid From 4f36d540d011a3a90eaf6e2f95c3e4457df02f11 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 19 Nov 2012 11:53:51 -0400 Subject: [PATCH 5/5] whoops, turned debug back on --- settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.py b/settings.py index a972fb5..05068b6 100644 --- a/settings.py +++ b/settings.py @@ -16,7 +16,7 @@ except ImportError: db_password = os.environ['STACKTACH_DB_PASSWORD'] install_dir = os.environ['STACKTACH_INSTALL_DIR'] -DEBUG = True +DEBUG = False TEMPLATE_DEBUG = DEBUG ADMINS = (