diff --git a/datasets/datasets_devstack_131007/131007_devstack_export/config.json b/datasets/datasets_devstack_131007/131007_devstack_export/config.json index de0e33f..82d9786 100644 --- a/datasets/datasets_devstack_131007/131007_devstack_export/config.json +++ b/datasets/datasets_devstack_131007/131007_devstack_export/config.json @@ -1,10 +1,16 @@ { + "Innodb_rows_read": { + "default": 1000 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, "database": "nova_dataset_131007_devstack", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 }, "project": "openstack/nova", "seed_data": "nova.sql", diff --git a/datasets/datasets_devstack_131007/131007_devstack_export/input.json b/datasets/datasets_devstack_131007/131007_devstack_export/input.json index 6a874d1..fe7b59e 100644 --- a/datasets/datasets_devstack_131007/131007_devstack_export/input.json +++ b/datasets/datasets_devstack_131007/131007_devstack_export/input.json @@ -7,6 +7,12 @@ "seed_data": "nova.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, + "Innodb_rows_read": { + "default": 1000 } } diff --git a/datasets/datasets_devstack_150/datasets_devstack_150/config.json b/datasets/datasets_devstack_150/datasets_devstack_150/config.json index 870d34b..d76c2c7 100644 --- a/datasets/datasets_devstack_150/datasets_devstack_150/config.json +++ b/datasets/datasets_devstack_150/datasets_devstack_150/config.json @@ -1,10 +1,17 @@ { + "Innodb_rows_read": { + "default": 1000 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, "database": "datasets_devstack_150", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "151->152": 67.0, + "default": 60 }, "project": "openstack/nova", "seed_data": "nova.sql", diff --git a/datasets/datasets_devstack_150/datasets_devstack_150/input.json b/datasets/datasets_devstack_150/datasets_devstack_150/input.json index c173626..d623614 100644 --- a/datasets/datasets_devstack_150/datasets_devstack_150/input.json +++ b/datasets/datasets_devstack_150/datasets_devstack_150/input.json @@ -7,7 +7,13 @@ "seed_data": "nova.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, + "Innodb_rows_read": { + "default": 1000 } } diff --git a/datasets/datasets_trivial_500/nova_trivial_500/config.json b/datasets/datasets_trivial_500/nova_trivial_500/config.json index 7c0e14a..175f6f1 100644 --- a/datasets/datasets_trivial_500/nova_trivial_500/config.json +++ b/datasets/datasets_trivial_500/nova_trivial_500/config.json @@ -1,11 +1,18 @@ { + "Innodb_rows_read": { + "default": 1000 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, "database": "nova_dataset_trivial_500", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "138": 42.0, - "default": 30 + "151->152": 84.0, + "152->151": 103.0, + "default": 60 }, "project": "openstack/nova", "seed_data": "nova_trivial_500.sql", diff --git a/datasets/datasets_trivial_500/nova_trivial_500/input.json b/datasets/datasets_trivial_500/nova_trivial_500/input.json index 305cbc4..d71a8e0 100644 --- a/datasets/datasets_trivial_500/nova_trivial_500/input.json +++ b/datasets/datasets_trivial_500/nova_trivial_500/input.json @@ -7,7 +7,13 @@ "seed_data": "nova_trivial_500.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, + "Innodb_rows_read": { + "default": 1000 } } diff --git a/datasets/datasets_trivial_6000/nova_trivial_6000/config.json b/datasets/datasets_trivial_6000/nova_trivial_6000/config.json index 4d42715..6d1ccb6 100644 --- a/datasets/datasets_trivial_6000/nova_trivial_6000/config.json +++ b/datasets/datasets_trivial_6000/nova_trivial_6000/config.json @@ -1,11 +1,20 @@ { + "Innodb_rows_read": { + "default": 1000 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, "database": "nova_dataset_trivial_6000", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "152": 74.0, - "default": 30 + "151->152": 159.0, + "152->151": 195.0, + "184->185": 66.0, + "186->185": 144.0, + "default": 60 }, "project": "openstack/nova", "seed_data": "nova_trivial_6000.sql", diff --git a/datasets/datasets_trivial_6000/nova_trivial_6000/input.json b/datasets/datasets_trivial_6000/nova_trivial_6000/input.json index fdea286..e4a1ebd 100644 --- a/datasets/datasets_trivial_6000/nova_trivial_6000/input.json +++ b/datasets/datasets_trivial_6000/nova_trivial_6000/input.json @@ -7,7 +7,13 @@ "seed_data": "nova_trivial_6000.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 + }, + "XInnodb_rows_changed": { + "default": 1000 + }, + "Innodb_rows_read": { + "default": 1000 } } diff --git a/datasets/datasets_user_001/user_001/config.json b/datasets/datasets_user_001/user_001/config.json index 4eb54b4..11a3505 100644 --- a/datasets/datasets_user_001/user_001/config.json +++ b/datasets/datasets_user_001/user_001/config.json @@ -1,18 +1,49 @@ { + "Innodb_rows_read": { + "148->149": 110000, + "151->152": 3470000, + "159->160": 200000, + "160->161": 390000, + "202->203": 260000, + "205->206": 140000, + "215->216": 930000, + "default": 100000 + }, + "XInnodb_rows_changed": { + "148->149": 110000, + "151->152": 3200000, + "184->185": 140000, + "193->194": 150000, + "202->203": 520000, + "203->204": 260000, + "205->206": 190000, + "215->216": 260000, + "229->230": 140000, + "default": 100000 + }, "database": "nova_datasets_user_001", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "135": 62.0, - "138": 44.0, - "149": 87.0, - "152": 241.0, - "159": 86.0, - "205": 51.0, - "206": 63.0, - "216": 102.0, - "230": 53.0, + "134->135": 116.0, + "135->134": 97.0, + "137->138": 85.0, + "138->137": 100.0, + "148->149": 135.0, + "149->148": 158.0, + "151->152": 333.0, + "152->151": 330.0, + "158->159": 136.0, + "159->158": 168.0, + "186->185": 569.0, + "204->205": 97.0, + "205->204": 98.0, + "205->206": 116.0, + "206->205": 106.0, + "215->216": 137.0, + "229->230": 122.0, + "230->229": 84.0, "_138_bugs": [ 1263835 ], @@ -22,7 +53,7 @@ "_205_bugs": [ 1263868 ], - "default": 30 + "default": 60 }, "project": "openstack/nova", "seed_data": "nova_user_001.sql", diff --git a/datasets/datasets_user_001/user_001/input.json b/datasets/datasets_user_001/user_001/input.json index dc9a679..1e8366d 100644 --- a/datasets/datasets_user_001/user_001/input.json +++ b/datasets/datasets_user_001/user_001/input.json @@ -7,17 +7,16 @@ "seed_data": "nova_user_001.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30, - "135": 120, - "138": 180, + "default": 60, "_138_bugs": [1263835], - "149": 240, "_149_bugs": [1263836], - "152": 300, - "159": 120, - "205": 120, - "_205_bugs": [1263868], - "216": 180 + "_205_bugs": [1263868] + }, + "XInnodb_rows_changed": { + "default": 100000 + }, + "Innodb_rows_read": { + "default": 100000 } } diff --git a/datasets/datasets_user_002/user_002/config.json b/datasets/datasets_user_002/user_002/config.json index a90929b..e0d6e9a 100644 --- a/datasets/datasets_user_002/user_002/config.json +++ b/datasets/datasets_user_002/user_002/config.json @@ -1,10 +1,16 @@ { + "Innodb_rows_read": { + "default": 100000 + }, + "XInnodb_rows_changed": { + "default": 100000 + }, "database": "nova_dataset_user_002", "db_pass": "tester", "db_user": "nova", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 }, "project": "openstack/nova", "seed_data": "nova_user_002.sql", diff --git a/datasets/datasets_user_002/user_002/input.json b/datasets/datasets_user_002/user_002/input.json index 106c588..8a1b8a0 100644 --- a/datasets/datasets_user_002/user_002/input.json +++ b/datasets/datasets_user_002/user_002/input.json @@ -7,7 +7,13 @@ "seed_data": "nova_user_002.sql", "logging_conf": "logging.conf", "maximum_migration_times": { - "default": 30 + "default": 60 + }, + "XInnodb_rows_changed": { + "default": 100000 + }, + "Innodb_rows_read": { + "default": 100000 } } diff --git a/turbo_hipster/cmd/analyse_historical.py b/turbo_hipster/cmd/analyse_historical.py index 8e36e52..95df6c4 100644 --- a/turbo_hipster/cmd/analyse_historical.py +++ b/turbo_hipster/cmd/analyse_historical.py @@ -59,6 +59,7 @@ def main(): # Open the results database db = MySQLdb.connect(host=config['results']['host'], + port=config['results'].get('port', 3306), user=config['results']['username'], passwd=config['results']['password'], db=config['results']['database']) @@ -83,16 +84,28 @@ def main(): if not 'duration' in migration: continue - cursor.execute('insert ignore into summary' - '(path, parsed_at, engine, dataset, ' - 'migration, duration, stats_json) ' - 'values("%s", now(), "%s", ' - '"%s", "%s", %d, "%s");' - % (item['name'], engine, dataset, - '%s->%s' % (migration['from'], - migration['to']), - migration['duration'], - migration['stats'])) + if migration['stats']: + cursor.execute('insert ignore into summary' + '(path, parsed_at, engine, dataset, ' + 'migration, duration, stats_json) ' + 'values(%s, now(), %s, ' + '%s, %s, %s, %s);', + (item['name'], engine, dataset, + '%s->%s' % (migration['from'], + migration['to']), + migration['duration'], + json.dumps(migration['stats']))) + else: + cursor.execute('insert ignore into summary' + '(path, parsed_at, engine, dataset, ' + 'migration, duration, stats_json) ' + 'values(%s, now(), %s, ' + '%s, %s, %s, NULL);', + (item['name'], engine, dataset, + '%s->%s' % (migration['from'], + migration['to']), + migration['duration'])) + cursor.execute('commit;') items = connection.get_container(swift_config['container'], diff --git a/turbo_hipster/cmd/report_historical.py b/turbo_hipster/cmd/report_historical.py index 7440808..215b968 100644 --- a/turbo_hipster/cmd/report_historical.py +++ b/turbo_hipster/cmd/report_historical.py @@ -17,7 +17,7 @@ import json import math -import numpy +import MySQLdb import os import sys @@ -29,35 +29,63 @@ def main(): def process_dataset(dataset): - with open('results.json') as f: - results = json.loads(f.read()) + with open('/etc/turbo-hipster/config.json', 'r') as config_stream: + config = json.load(config_stream) + db = MySQLdb.connect(host=config['results']['host'], + port=config['results'].get('port', 3306), + user=config['results']['username'], + passwd=config['results']['password'], + db=config['results']['database']) + cursor = db.cursor(MySQLdb.cursors.DictCursor) migrations = {} all_times = {} + stats_summary = {} for engine in ['mysql', 'percona']: - print - print 'Dataset: %s' % dataset - print 'Engine: %s' % engine - print + print '%s, %s' % (dataset, engine) + cursor.execute('select distinct(migration) from summary where ' + 'engine="%s" and dataset="%s" order by migration;' + % (engine, dataset)) + migrations_list = [] + for row in cursor: + migrations_list.append(row['migration']) - for migration in sorted(results[engine][dataset]): - times = [] + for migration in migrations_list: all_times.setdefault(migration, []) - for time in results[engine][dataset][migration]: - for i in range(results[engine][dataset][migration][time]): - times.append(int(time)) - all_times[migration].append(int(time)) - times = sorted(times) - emit_summary(engine, times, migrations, migration) + cursor.execute('select distinct(duration), count(*) from summary ' + 'where engine="%s" and dataset="%s" and ' + 'migration="%s" group by duration;' + % (engine, dataset, migration)) + for row in cursor: + for i in range(row['count(*)']): + all_times[migration].append(row['duration']) - print - print 'Dataset: %s' % dataset - print 'Engine: combined' - print - for migration in sorted(all_times.keys()): - emit_summary('combined', all_times[migration], migrations, migration) + cursor.execute('select stats_json from summary where engine="%s" ' + 'and dataset="%s" and migration="%s" and ' + 'not (stats_json = "{}");' + % (engine, dataset, migration)) + for row in cursor: + stats = json.loads(row['stats_json']) + for key in stats: + stats_summary.setdefault(migration, {}) + stats_summary[migration].setdefault(key, {}) + stats_summary[migration][key].setdefault(stats[key], 0) + stats_summary[migration][key][stats[key]] += 1 + + # Composed stats + rows_changed = 0 + for key in ['Innodb_rows_updated', + 'Innodb_rows_inserted', + 'Innodb_rows_deleted']: + rows_changed += stats.get(key, 0) + + stats_summary[migration].setdefault('XInnodb_rows_changed', {}) + stats_summary[migration]['XInnodb_rows_changed'].setdefault( + rows_changed, 0) + stats_summary[migration]['XInnodb_rows_changed'][rows_changed]\ + += 1 with open('results.txt', 'w') as f: f.write('Migration,mysql,percona\n') @@ -75,10 +103,33 @@ def process_dataset(dataset): config = json.loads(f.read()) for migration in sorted(all_times.keys()): - minimum, mean, maximum, stddev = analyse(all_times[migration]) - recommend = mean + 2 * stddev - if recommend > 30.0: - config['maximum_migration_times'][migration] = math.ceil(recommend) + # Timing + config_max = config['maximum_migration_times']['default'] + l = len(all_times[migration]) + if l > 10: + sorted_all_times = sorted(all_times[migration]) + one_percent = int(math.ceil(l / 100)) + recommend = sorted_all_times[-one_percent] + 30 + if recommend > config_max: + config['maximum_migration_times'][migration] = \ + math.ceil(recommend) + + # Innodb stats + if not migration in stats_summary: + continue + + for stats_key in ['XInnodb_rows_changed', 'Innodb_rows_read']: + config_max = config[stats_key]['default'] + + values = [] + results = stats_summary[migration].get(stats_key, {}) + for result in results: + values.append(result) + + max_value = max(values) + rounding = max_value % 10000 + if max_value > config_max: + config[stats_key][migration] = max_value + (10000 - rounding) with open(os.path.join(config_path, 'config.json'), 'w') as f: f.write(json.dumps(config, indent=4, sort_keys=True)) @@ -94,40 +145,6 @@ def omg_hard_to_predict_names(dataset): return dataset -def analyse(times): - np_times = numpy.array(times) - minimum = np_times.min() - mean = np_times.mean() - maximum = np_times.max() - stddev = np_times.std() - return minimum, mean, maximum, stddev - - -def emit_summary(engine, times, migrations, migration): - minimum, mean, maximum, stddev = analyse(times) - failed_threshold = int(max(30.0, mean + stddev * 2)) - - failed = 0 - for time in times: - if time > failed_threshold: - failed += 1 - - migrations.setdefault(migration, {}) - migrations[migration][engine] = ('%.02f;%0.2f;%.02f' - % (mean - 2 * stddev, - mean, - mean + 2 * stddev)) - - if failed_threshold != 30 or failed > 0: - print ('%s: Values range from %s to %s seconds. %d values. ' - 'Mean is %.02f, stddev is %.02f.\n ' - 'Recommend max of %d. With this value %.02f%% of tests ' - 'would have failed.' - % (migration, minimum, maximum, - len(times), mean, stddev, failed_threshold, - failed * 100.0 / len(times))) - - if __name__ == '__main__': sys.path.insert(0, os.path.abspath( os.path.join(os.path.dirname(__file__), '../')))