stacktach/migrations/006_populate_usage_from_rawdata.py
2014-03-28 11:47:16 -03:00

124 lines
4.2 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import datetime
import os
import sys
try:
import ujson as json
except ImportError:
try:
import simplejson as json
except ImportError:
import json
POSSIBLE_TOPDIR = os.path.normpath(os.path.join(os.path.abspath(sys.argv[0]),
os.pardir, os.pardir))
if os.path.exists(os.path.join(POSSIBLE_TOPDIR, 'stacktach')):
sys.path.insert(0, POSSIBLE_TOPDIR)
from django.core.exceptions import MultipleObjectsReturned
from stacktach import datetime_to_decimal as dt
from stacktach import models
from stacktach import views
if __name__ != '__main__':
sys.exit(1)
events = ['compute.instance.create.start',
'compute.instance.create.end',
'compute.instance.rebuild.start',
'compute.instance.rebuild.end',
'compute.instance.resize.prep.start',
'compute.instance.resize.prep.end',
'compute.instance.finish_resize.end',
'compute.instance.resize.revert.start',
'compute.instance.resize.revert.end',
'compute.instance.delete.end']
def usage_already_exists(raw):
if raw.event == 'compute.instance.delete.end':
# Since deletes only have one event, they either exist or they don't
try:
models.InstanceDeletes.objects.get(raw=raw)
except models.InstanceDeletes.DoesNotExist:
return False
except MultipleObjectsReturned:
return True
return True
else:
# All other usage has multiple events, thus they can exist but be
# incomplete.
return False
def populate_usage(raw, body):
if not usage_already_exists(raw):
views.aggregate_usage(raw, body)
def print_status(event, completed, errored, total):
out = (event, completed, errored, total - (completed + errored))
print "%s: %s completed, %s errored, %s remaining" % out
def find_start_decimal():
usage = models.InstanceUsage.objects.all().order_by('id')[0]
usage_raw = models.RawData.objects.filter(request_id=usage.request_id)\
.order_by('when')[0]
delete = models.InstanceDeletes.objects.all().order_by('id')[0]
# Start a day after receiving the first usage, just to be safe
return min(usage_raw.when, delete.raw.when) + (60 * 60 * 24)
start_decimal = find_start_decimal()
print "Starting from date %s" % dt.dt_from_decimal(start_decimal)
for event in events:
start = datetime.datetime.utcnow()
raws = models.RawData.objects.filter(event=event, when__lte=start_decimal)
total = raws.count()
completed = 0
errored = 0
loc = 0
print_status(event, completed, errored, total)
update_interval = datetime.timedelta(seconds=30)
next_update = start + update_interval
while loc < total:
new_loc = loc + 500
for raw in raws[loc:new_loc]:
try:
json_dict = json.loads(raw.json)
populate_usage(raw, json_dict[1])
completed += 1
except Exception:
errored += 1
print "Error with raw: %s" % raw.id
if datetime.datetime.utcnow() > next_update:
print_status(event, completed, errored, total)
next_update = datetime.datetime.utcnow() + update_interval
loc = new_loc
end = datetime.datetime.utcnow()
print_status(event, completed, errored, total)
print "%s took %s" % (event, end - start)