aodh/ceilometer/storage/impl_db2.py
Eoghan Glynn cdc49d6c75 Selectable aggregation functions for statistics
Addresses: BP selectable-aggregates

Previously, the statistics API always computed and returned a
standard set of aggregate functions.

Now, individual aggregation functions can be selected and even
parameterized via syntax of form:

   /v2/meters/<meter_name>/statistics?aggregate.func=<name>&aggregate.param=<value>

For example in order to calculate the average CPU util only:

  GET /v2/meters/cpu_util/statistics?aggregate.func=avg

  HTTP/1.0 200 OK
  [{"aggregate": {"avg": 0.6858829535841072},
    "avg": 0.6858829535841072,
    "duration_start": "2014-01-30T11:13:23",
    "duration_end": "2014-01-31T16:07:13",
    "duration": 104030.0,
    "period": 0,
    "period_start": "2014-01-30T11:13:23",
    "period_end": "2014-01-31T16:07:13",
    "groupby": null,
    "unit": "%"}]

In the current patch, selectable aggregates are provided by the
sqlalchemy driver only, with support in the mongodb driver to
follow in a subsequent patch.

Change-Id: I6cc095ba5ae16dea3f6b404e72a070ab9ac49c9a
2014-03-03 21:23:22 +00:00

443 lines
18 KiB
Python

# -*- encoding: utf-8 -*-
# Copyright © 2012 New Dream Network, LLC (DreamHost)
# Copyright © 2013 eNovance
# Copyright © 2013 IBM Corp
#
# Author: Doug Hellmann <doug.hellmann@dreamhost.com>
# Julien Danjou <julien@danjou.info>
# Tong Li <litong01@us.ibm.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""DB2 storage backend
"""
from __future__ import division
import copy
import datetime
import itertools
import sys
import bson.code
import bson.objectid
import pymongo
from ceilometer.openstack.common.gettextutils import _ # noqa
from ceilometer.openstack.common import log
from ceilometer.openstack.common import timeutils
from ceilometer import storage
from ceilometer.storage import base
from ceilometer.storage import models
from ceilometer.storage import pymongo_base
from ceilometer import utils
LOG = log.getLogger(__name__)
class DB2Storage(base.StorageEngine):
"""The db2 storage for Ceilometer
Collections::
- user
- { _id: user id
source: [ array of source ids reporting for the user ]
}
- project
- { _id: project id
source: [ array of source ids reporting for the project ]
}
- meter
- the raw incoming data
- resource
- the metadata for resources
- { _id: uuid of resource,
metadata: metadata dictionaries
user_id: uuid
project_id: uuid
meter: [ array of {counter_name: string, counter_type: string,
counter_unit: string} ]
}
"""
def get_connection(self, conf):
"""Return a Connection instance based on the configuration settings.
"""
return Connection(conf)
class Connection(pymongo_base.Connection):
"""DB2 connection.
"""
CONNECTION_POOL = pymongo_base.ConnectionPool()
GROUP = {'_id': '$counter_name',
'unit': {'$min': '$counter_unit'},
'min': {'$min': '$counter_volume'},
'max': {'$max': '$counter_volume'},
'sum': {'$sum': '$counter_volume'},
'count': {'$sum': 1},
'duration_start': {'$min': '$timestamp'},
'duration_end': {'$max': '$timestamp'},
}
PROJECT = {'_id': 0, 'unit': 1,
'min': 1, 'max': 1, 'sum': 1, 'count': 1,
'avg': {'$divide': ['$sum', '$count']},
'duration_start': 1,
'duration_end': 1,
}
SORT_OPERATION_MAP = {'desc': pymongo.DESCENDING, 'asc': pymongo.ASCENDING}
SECONDS_IN_A_DAY = 86400
def __init__(self, conf):
url = conf.database.connection
# Since we are using pymongo, even though we are connecting to DB2
# we still have to make sure that the scheme which used to distinguish
# db2 driver from mongodb driver be replaced so that pymongo will not
# produce an exception on the scheme.
url = url.replace('db2:', 'mongodb:', 1)
self.conn = self.CONNECTION_POOL.connect(url)
# Require MongoDB 2.2 to use aggregate(), since we are using mongodb
# as backend for test, the following code is necessary to make sure
# that the test wont try aggregate on older mongodb during the test.
# For db2, the versionArray won't be part of the server_info, so there
# will not be exception when real db2 gets used as backend.
server_info = self.conn.server_info()
if server_info.get('sysInfo'):
self._using_mongodb = True
else:
self._using_mongodb = False
if self._using_mongodb and server_info.get('versionArray') < [2, 2]:
raise storage.StorageBadVersion("Need at least MongoDB 2.2")
connection_options = pymongo.uri_parser.parse_uri(url)
self.db = getattr(self.conn, connection_options['database'])
if connection_options.get('username'):
self.db.authenticate(connection_options['username'],
connection_options['password'])
self.upgrade()
@classmethod
def _build_sort_instructions(cls, sort_keys=[], sort_dir='desc'):
"""Returns a sort_instruction.
Sort instructions are used in the query to determine what attributes
to sort on and what direction to use.
:param q: The query dict passed in.
:param sort_keys: array of attributes by which results be sorted.
:param sort_dir: direction in which results be sorted (asc, desc).
:return: sort parameters
"""
sort_instructions = []
_sort_dir = cls.SORT_OPERATION_MAP.get(
sort_dir, cls.SORT_OPERATION_MAP['desc'])
for _sort_key in sort_keys:
_instruction = (_sort_key, _sort_dir)
sort_instructions.append(_instruction)
return sort_instructions
def upgrade(self, version=None):
# Establish indexes
#
# We need variations for user_id vs. project_id because of the
# way the indexes are stored in b-trees. The user_id and
# project_id values are usually mutually exclusive in the
# queries, so the database won't take advantage of an index
# including both.
if self.db.resource.index_information() == {}:
resource_id = str(bson.objectid.ObjectId())
self.db.resource.insert({'_id': resource_id,
'no_key': resource_id})
meter_id = str(bson.objectid.ObjectId())
self.db.meter.insert({'_id': meter_id,
'no_key': meter_id})
self.db.resource.ensure_index([
('user_id', pymongo.ASCENDING),
('project_id', pymongo.ASCENDING),
('source', pymongo.ASCENDING)], name='resource_idx')
self.db.meter.ensure_index([
('resource_id', pymongo.ASCENDING),
('user_id', pymongo.ASCENDING),
('project_id', pymongo.ASCENDING),
('counter_name', pymongo.ASCENDING),
('timestamp', pymongo.ASCENDING),
('source', pymongo.ASCENDING)], name='meter_idx')
self.db.meter.ensure_index([('timestamp',
pymongo.DESCENDING)],
name='timestamp_idx')
self.db.resource.remove({'_id': resource_id})
self.db.meter.remove({'_id': meter_id})
# The following code is to ensure that the keys for collections
# are set as objectId so that db2 index on key can be created
# correctly
user_id = str(bson.objectid.ObjectId())
self.db.user.insert({'_id': user_id})
self.db.user.remove({'_id': user_id})
project_id = str(bson.objectid.ObjectId())
self.db.project.insert({'_id': project_id})
self.db.project.remove({'_id': project_id})
def clear(self):
# db2 does not support drop_database, remove all collections
for col in ['user', 'project', 'resource', 'meter']:
self.db[col].drop()
# drop_database command does nothing on db2 database since this has
# not been implemented. However calling this method is important for
# removal of all the empty dbs created during the test runs since
# test run is against mongodb on Jenkins
self.conn.drop_database(self.db)
self.conn.close()
def record_metering_data(self, data):
"""Write the data to the backend storage system.
:param data: a dictionary such as returned by
ceilometer.meter.meter_message_from_counter
"""
# Make sure we know about the user and project
self.db.user.update(
{'_id': data['user_id'] or 'null'},
{'$addToSet': {'source': data['source'],
},
},
upsert=True,
)
self.db.project.update(
{'_id': data['project_id']},
{'$addToSet': {'source': data['source'],
},
},
upsert=True,
)
# Record the updated resource metadata
self.db.resource.update(
{'_id': data['resource_id']},
{'$set': {'project_id': data['project_id'],
'user_id': data['user_id'] or 'null',
'metadata': data['resource_metadata'],
'source': data['source'],
},
'$addToSet': {'meter': {'counter_name': data['counter_name'],
'counter_type': data['counter_type'],
'counter_unit': data['counter_unit'],
},
},
},
upsert=True,
)
# Record the raw data for the meter. Use a copy so we do not
# modify a data structure owned by our caller (the driver adds
# a new key '_id').
record = copy.copy(data)
record['recorded_at'] = timeutils.utcnow()
# Make sure that the data does have field _id which db2 wont add
# automatically.
if record.get('_id') is None:
record['_id'] = str(bson.objectid.ObjectId())
self.db.meter.insert(record)
def get_resources(self, user=None, project=None, source=None,
start_timestamp=None, start_timestamp_op=None,
end_timestamp=None, end_timestamp_op=None,
metaquery={}, resource=None, pagination=None):
"""Return an iterable of models.Resource instances
:param user: Optional ID for user that owns the resource.
:param project: Optional ID for project that owns the resource.
:param source: Optional source filter.
:param start_timestamp: Optional modified timestamp start range.
:param start_timestamp_op: Optional start time operator, like gt, ge.
:param end_timestamp: Optional modified timestamp end range.
:param end_timestamp_op: Optional end time operator, like lt, le.
:param metaquery: Optional dict with metadata to match on.
:param resource: Optional resource filter.
:param pagination: Optional pagination query.
"""
if pagination:
raise NotImplementedError(_('Pagination not implemented'))
q = {}
if user is not None:
q['user_id'] = user
if project is not None:
q['project_id'] = project
if source is not None:
q['source'] = source
if resource is not None:
q['resource_id'] = resource
# Add resource_ prefix so it matches the field in the db
q.update(dict(('resource_' + k, v)
for (k, v) in metaquery.iteritems()))
if start_timestamp or end_timestamp:
# Look for resources matching the above criteria and with
# samples in the time range we care about, then change the
# resource query to return just those resources by id.
ts_range = pymongo_base.make_timestamp_range(start_timestamp,
end_timestamp,
start_timestamp_op,
end_timestamp_op)
if ts_range:
q['timestamp'] = ts_range
sort_keys = base._handle_sort_key('resource', 'timestamp')
sort_keys.insert(0, 'resource_id')
sort_instructions = self._build_sort_instructions(sort_keys=sort_keys,
sort_dir='desc')
resource = lambda x: x['resource_id']
meters = self.db.meter.find(q, sort=sort_instructions)
for resource_id, r_meters in itertools.groupby(meters, key=resource):
# Because we have to know first/last timestamp, and we need a full
# list of references to the resource's meters, we need a tuple
# here.
r_meters = tuple(r_meters)
latest_meter = r_meters[0]
last_ts = latest_meter['timestamp']
first_ts = r_meters[-1]['timestamp']
yield models.Resource(resource_id=latest_meter['resource_id'],
project_id=latest_meter['project_id'],
first_sample_timestamp=first_ts,
last_sample_timestamp=last_ts,
source=latest_meter['source'],
user_id=latest_meter['user_id'],
metadata=latest_meter['resource_metadata'])
def get_meter_statistics(self, sample_filter, period=None, groupby=None,
aggregate=None):
"""Return an iterable of models.Statistics instance containing meter
statistics described by the query parameters.
The filter must have a meter value set.
"""
if (groupby and
set(groupby) - set(['user_id', 'project_id',
'resource_id', 'source'])):
raise NotImplementedError("Unable to group by these fields")
if aggregate:
msg = _('Selectable aggregates not implemented')
raise NotImplementedError(msg)
q = pymongo_base.make_query_from_filter(sample_filter)
if period:
if sample_filter.start:
period_start = sample_filter.start
else:
period_start = self.db.meter.find(
limit=1, sort=[('timestamp',
pymongo.ASCENDING)])[0]['timestamp']
if groupby:
sort_keys = ['counter_name'] + groupby + ['timestamp']
else:
sort_keys = ['counter_name', 'timestamp']
sort_instructions = self._build_sort_instructions(sort_keys=sort_keys,
sort_dir='asc')
meters = self.db.meter.find(q, sort=sort_instructions)
def _group_key(meter):
# the method to define a key for groupby call
key = {}
for y in sort_keys:
if y == 'timestamp' and period:
key[y] = (timeutils.delta_seconds(period_start,
meter[y]) // period)
elif y != 'timestamp':
key[y] = meter[y]
return key
def _to_offset(periods):
return {'days': (periods * period) // self.SECONDS_IN_A_DAY,
'seconds': (periods * period) % self.SECONDS_IN_A_DAY}
for key, grouped_meters in itertools.groupby(meters, key=_group_key):
stat = models.Statistics(unit=None,
min=sys.maxint, max=-sys.maxint,
avg=0, sum=0, count=0,
period=0, period_start=0, period_end=0,
duration=0, duration_start=0,
duration_end=0, groupby=None)
for meter in grouped_meters:
stat.unit = meter.get('counter_unit', '')
m_volume = meter.get('counter_volume')
if stat.min > m_volume:
stat.min = m_volume
if stat.max < m_volume:
stat.max = m_volume
stat.sum += m_volume
stat.count += 1
if stat.duration_start == 0:
stat.duration_start = meter['timestamp']
stat.duration_end = meter['timestamp']
if groupby and not stat.groupby:
stat.groupby = {}
for group_key in groupby:
stat.groupby[group_key] = meter[group_key]
stat.duration = timeutils.delta_seconds(stat.duration_start,
stat.duration_end)
stat.avg = stat.sum / stat.count
if period:
stat.period = period
periods = key.get('timestamp')
stat.period_start = period_start + \
datetime.timedelta(**(_to_offset(periods)))
stat.period_end = period_start + \
datetime.timedelta(**(_to_offset(periods + 1)))
else:
stat.period_start = stat.duration_start
stat.period_end = stat.duration_end
yield stat
def get_capabilities(self):
"""Return an dictionary representing the capabilities of this driver.
"""
available = {
'meters': {'query': {'simple': True,
'metadata': True}},
'resources': {'query': {'simple': True,
'metadata': True}},
'samples': {'query': {'simple': True,
'metadata': True,
'complex': True}},
'statistics': {'groupby': True,
'query': {'simple': True,
'metadata': True},
'aggregation': {'standard': True}},
'alarms': {'query': {'simple': True,
'complex': True},
'history': {'query': {'simple': True}}},
}
return utils.update_nested(self.DEFAULT_CAPABILITIES, available)