diff --git a/swift/common/db.py b/swift/common/db.py index 67913ca94e..219c097c5b 100644 --- a/swift/common/db.py +++ b/swift/common/db.py @@ -879,14 +879,16 @@ class ContainerBroker(DatabaseBroker): return (row['object_count'] in (None, '', 0, '0')) and \ (float(row['delete_timestamp']) > float(row['put_timestamp'])) - def get_info(self): + def get_info(self, include_metadata=False): """ Get global data for the container. - :returns: sqlite.row of (account, container, created_at, put_timestamp, + :returns: dict with keys: account, container, created_at, put_timestamp, delete_timestamp, object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, - reported_object_count, reported_bytes_used, hash, id) + reported_object_count, reported_bytes_used, hash, id + If include_metadata is set, metadata is included as a key + pointing to a dict of tuples of the metadata """ try: self._commit_puts() @@ -894,13 +896,24 @@ class ContainerBroker(DatabaseBroker): if not self.stale_reads_ok: raise with self.get() as conn: - return conn.execute(''' + metadata = '' + if include_metadata: + metadata = ', metadata' + data = conn.execute(''' SELECT account, container, created_at, put_timestamp, delete_timestamp, object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, reported_object_count, reported_bytes_used, hash, id + %s FROM container_stat - ''').fetchone() + ''' % metadata).fetchone() + data = dict(data) + if include_metadata: + try: + data['metadata'] = json.loads(data.get('metadata','')) + except ValueError: + data['metadata'] = {} + return data def reported(self, put_timestamp, delete_timestamp, object_count, bytes_used): @@ -1394,9 +1407,9 @@ class AccountBroker(DatabaseBroker): """ Get global data for the account. - :returns: sqlite.row of (account, created_at, put_timestamp, + :returns: dict with keys: account, created_at, put_timestamp, delete_timestamp, container_count, object_count, - bytes_used, hash, id) + bytes_used, hash, id """ try: self._commit_puts() @@ -1404,11 +1417,11 @@ class AccountBroker(DatabaseBroker): if not self.stale_reads_ok: raise with self.get() as conn: - return conn.execute(''' + return dict(conn.execute(''' SELECT account, created_at, put_timestamp, delete_timestamp, container_count, object_count, bytes_used, hash, id FROM account_stat - ''').fetchone() + ''').fetchone()) def list_containers_iter(self, limit, marker, end_marker, prefix, delimiter): diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index 04968f181f..c8faa66779 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -60,6 +60,9 @@ class DatabaseStatsCollector(Daemon): def get_data(self): raise Exception('Not Implemented') + def get_header(self): + raise Exception('Not Implemented') + def find_and_process(self): src_filename = time.strftime(self.filename_format) working_dir = os.path.join(self.target_dir, @@ -70,6 +73,7 @@ class DatabaseStatsCollector(Daemon): hasher = hashlib.md5() try: with open(tmp_filename, 'wb') as statfile: + statfile.write(self.get_header()) for device in os.listdir(self.devices): if self.mount_check and not check_mount(self.devices, device): @@ -122,6 +126,8 @@ class AccountStatsCollector(DatabaseStatsCollector): info['bytes_used']) return line_data + def get_header(self): + return '' class ContainerStatsCollector(DatabaseStatsCollector): """ @@ -133,20 +139,36 @@ class ContainerStatsCollector(DatabaseStatsCollector): super(ContainerStatsCollector, self).__init__(stats_conf, 'container', container_server_data_dir, 'container-stats-%Y%m%d%H_') + self.metadata_keys = [mkey.strip() for mkey in + stats_conf.get('metadata_keys', '').split(',') if mkey.strip()] + + def get_header(self): + header = 'Account Hash, Container Name, Object Count, Bytes Used' + if self.metadata_keys: + xtra_headers = ','.join(self.metadata_keys) + header += ',%s' % xtra_headers + header += '\n' + return header def get_data(self, db_path): """ Data for generated csv has the following columns: Account Hash, Container Name, Object Count, Bytes Used + This will just collect whether or not the metadata is set + using a 1 or ''. """ line_data = None broker = ContainerBroker(db_path) if not broker.is_deleted(): - info = broker.get_info() + info = broker.get_info(include_metadata=bool(self.metadata_keys)) encoded_container_name = urllib.quote(info['container']) - line_data = '"%s","%s",%d,%d\n' % ( - info['account'], - encoded_container_name, - info['object_count'], - info['bytes_used']) + line_data = '"%s","%s",%d,%d' % ( + info['account'], encoded_container_name, + info['object_count'], info['bytes_used']) + if self.metadata_keys: + metadata_results = ','.join( + [info['metadata'].get(mkey) and '1' or '' + for mkey in self.metadata_keys]) + line_data += ',%s' % metadata_results + line_data += '\n' return line_data diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index 2721614e9f..7836351a79 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -66,6 +66,17 @@ class TestDbStats(unittest.TestCase): info = stat.get_data("%s/con.db" % self.containers) self.assertEquals('''"test_acc","test_con",1,10\n''', info) + def test_container_stat_get_metadata(self): + stat = db_stats_collector.ContainerStatsCollector(self.conf) + container_db = ContainerBroker("%s/con.db" % self.containers, + account='test_acc', container='test_con') + container_db.initialize() + container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') + info = stat.get_data("%s/con.db" % self.containers) + self.assertEquals('''"test_acc","test_con",1,10\n''', info) + container_db.update_metadata({'test1': ('val',1000)}) + + def _gen_account_stat(self): stat = db_stats_collector.AccountStatsCollector(self.conf) output_data = set() @@ -83,20 +94,30 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 10) return stat, output_data - def _gen_container_stat(self): + def _gen_container_stat(self, set_metadata=False): + if set_metadata: + self.conf['metadata_keys'] = 'test1,test2' stat = db_stats_collector.ContainerStatsCollector(self.conf) output_data = set() for i in range(10): - account_db = ContainerBroker( + cont_db = ContainerBroker( "%s/container-stats-201001010%s-%s.db" % (self.containers, i, uuid.uuid4().hex), account='test_acc_%s' % i, container='test_con') - account_db.initialize() - account_db.put_object('test_obj', time.time(), 10, 'text', - 'faketag') + cont_db.initialize() + cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') + metadata_output = '' + if set_metadata: + if i%2: + cont_db.update_metadata({'test1': (55,100)}) + metadata_output = ',1,' + else: + cont_db.update_metadata({'test2': (55,100)}) + metadata_output = ',,1' # this will "commit" the data - account_db.get_info() - output_data.add('''"test_acc_%s","test_con",1,10''' % i), + cont_db.get_info() + output_data.add('''"test_acc_%s","test_con",1,10%s''' % + (i, metadata_output)) self.assertEqual(len(output_data), 10) return stat, output_data @@ -112,6 +133,21 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 0) + def test_account_stat_run_once_container_metadata(self): + + stat, output_data = self._gen_container_stat(set_metadata=True) + stat.run_once() + stat_file = os.listdir(self.log_dir)[0] + with open(os.path.join(self.log_dir, stat_file)) as stat_handle: + headers = stat_handle.readline() + self.assert_(headers.startswith('Account Hash, Container Name,')) + for i in range(10): + data = stat_handle.readline() + output_data.discard(data.strip()) + + self.assertEqual(len(output_data), 0) + + def test_account_stat_run_once_both(self): acc_stat, acc_output_data = self._gen_account_stat() con_stat, con_output_data = self._gen_container_stat() @@ -128,6 +164,8 @@ class TestDbStats(unittest.TestCase): con_stat.run_once() stat_file = [f for f in os.listdir(self.log_dir) if f != stat_file][0] with open(os.path.join(self.log_dir, stat_file)) as stat_handle: + headers = stat_handle.readline() + self.assert_(headers.startswith('Account Hash, Container Name,')) for i in range(10): data = stat_handle.readline() con_output_data.discard(data.strip()) @@ -144,6 +182,7 @@ class TestDbStats(unittest.TestCase): db_stat = db_stats_collector.DatabaseStatsCollector(self.conf, 'account', 'test_dir', 'stats-%Y%m%d%H_') self.assertRaises(Exception, db_stat.get_data) + self.assertRaises(Exception, db_stat.get_header) def test_not_not_mounted(self): self.conf['mount_check'] = 'true'