VMware vSphere: Improve the accuracy of queried samples

Currently we are querying the latest real time sample from vSphere and
creating a sample out of it, in ceilometer. The sampling interval of
real time samples in vSphere is 20 s. However, the interval at which the
various pollsters are run is by default 600 s (10 mins). This CL
attempts to improve the accuracy of the sample by averaging the samples
over the pollster interval.

Change-Id: Id75fe87840b82d7c7d3e4fae9b2fc982e22ea1d7
Implements: blueprint vmware-vcenter-server
Closes-Bug: 1295500
This commit is contained in:
Akhil Hingane 2014-03-15 13:32:06 +05:30
parent 55cbe746da
commit 03c803b5ce
12 changed files with 80 additions and 51 deletions

View File

@ -21,6 +21,7 @@
import abc
import six
from ceilometer.openstack.common import timeutils
from ceilometer import plugin
@ -37,3 +38,16 @@ class ComputePollster(plugin.PollsterBase):
:param cache: A dictionary for passing data between plugins
:param resources: The resources to examine (expected to be instances)
"""
def _record_poll_time(self):
"""Method records current time as the poll time.
:return: time in seconds since the last poll time was recorded
"""
current_time = timeutils.utcnow()
duration = None
if hasattr(self, '_last_poll_time'):
duration = timeutils.delta_seconds(self._last_poll_time,
current_time)
self._last_poll_time = current_time
return duration

View File

@ -64,10 +64,12 @@ class CPUPollster(plugin.ComputePollster):
class CPUUtilPollster(plugin.ComputePollster):
def get_samples(self, manager, cache, resources):
self._inspection_duration = self._record_poll_time()
for instance in resources:
LOG.debug(_('Checking CPU util for instance %s'), instance.id)
try:
cpu_info = manager.inspector.inspect_cpu_util(instance)
cpu_info = manager.inspector.inspect_cpu_util(
instance, self._inspection_duration)
LOG.debug(_("CPU UTIL: %(instance)s %(util)d"),
({'instance': instance.__dict__,
'util': cpu_info.util}))

View File

@ -171,7 +171,9 @@ class _DiskRatesPollsterBase(plugin.ComputePollster):
r_requests_rate = 0
w_bytes_rate = 0
w_requests_rate = 0
for disk, info in inspector.inspect_disk_rates(instance):
disk_rates = inspector.inspect_disk_rates(
instance, self._inspection_duration)
for disk, info in disk_rates:
r_bytes_rate += info.read_bytes_rate
r_requests_rate += info.read_requests_rate
w_bytes_rate += info.write_bytes_rate
@ -189,6 +191,7 @@ class _DiskRatesPollsterBase(plugin.ComputePollster):
"""Return one Sample."""
def get_samples(self, manager, cache, resources):
self._inspection_duration = self._record_poll_time()
for instance in resources:
try:
disk_rates_info = self._populate_cache(

View File

@ -26,10 +26,12 @@ LOG = log.getLogger(__name__)
class MemoryUsagePollster(plugin.ComputePollster):
def get_samples(self, manager, cache, resources):
self._inspection_duration = self._record_poll_time()
for instance in resources:
LOG.debug(_('Checking memory usage for instance %s'), instance.id)
try:
memory_info = manager.inspector.inspect_memory_usage(instance)
memory_info = manager.inspector.inspect_memory_usage(
instance, self._inspection_duration)
LOG.debug(_("MEMORY USAGE: %(instance)s %(usage)f"),
({'instance': instance.__dict__,
'usage': memory_info.usage}))

View File

@ -84,6 +84,7 @@ class _Base(plugin.ComputePollster):
return i_cache[instance_name]
def get_samples(self, manager, cache, resources):
self._inspection_duration = self._record_poll_time()
for instance in resources:
instance_name = util.instance_name(instance)
LOG.debug(_('checking net info for instance %s'), instance.id)
@ -122,7 +123,8 @@ class _RateBase(_Base):
CACHE_KEY_VNIC = 'vnic-rates'
def _get_vnic_info(self, inspector, instance):
return inspector.inspect_vnic_rates(instance)
return inspector.inspect_vnic_rates(instance,
self._inspection_duration)
def _get_rx_info(self, info):
return info.rx_bytes_rate

View File

@ -159,10 +159,12 @@ class Inspector(object):
"""
raise NotImplementedError()
def inspect_cpu_util(self, instance):
def inspect_cpu_util(self, instance, duration=None):
"""Inspect the CPU Utilization (%) for an instance.
:param instance: the target instance
:param duration: the last 'n' seconds, over which the value should be
inspected
:return: the percentage of CPU utilization
"""
raise NotImplementedError()
@ -176,10 +178,12 @@ class Inspector(object):
"""
raise NotImplementedError()
def inspect_vnic_rates(self, instance):
def inspect_vnic_rates(self, instance, duration=None):
"""Inspect the vNIC rate statistics for an instance.
:param instance: the target instance
:param duration: the last 'n' seconds, over which the value should be
inspected
:return: for each vNIC, the rate of bytes & packets
received and transmitted
"""
@ -194,18 +198,22 @@ class Inspector(object):
"""
raise NotImplementedError()
def inspect_memory_usage(self, instance):
def inspect_memory_usage(self, instance, duration=None):
"""Inspect the memory usage statistics for an instance.
:param instance: the target instance
:param duration: the last 'n' seconds, over which the value should be
inspected
:return: the amount of memory used
"""
raise NotImplementedError()
def inspect_disk_rates(self, instance):
def inspect_disk_rates(self, instance, duration=None):
"""Inspect the disk statistics as rates for an instance.
:param instance: the target instance
:param duration: the last 'n' seconds, over which the value should be
inspected
:return: for each disk, the number of bytes & operations
read and written per second, with the error count
"""

View File

@ -43,7 +43,7 @@ OPTS = [
cfg.FloatOpt('task_poll_interval',
default=0.5,
help='Sleep time in seconds for polling an ongoing async '
'task'),
'task')
]
cfg.CONF.register_group(opt_group)
@ -79,21 +79,15 @@ class VsphereInspector(virt_inspector.Inspector):
self._ops = vsphere_operations.VsphereOperations(
get_api_session(), 1000)
def inspect_instances(self):
raise NotImplementedError()
def inspect_cpus(self, instance_name):
raise NotImplementedError()
def inspect_cpu_util(self, instance):
def inspect_cpu_util(self, instance, duration=None):
vm_moid = self._ops.get_vm_moid(instance.id)
if vm_moid is None:
raise virt_inspector.InstanceNotFoundException(
_('VM %s not found in VMware Vsphere') % instance.id)
cpu_util_counter_id = self._ops.get_perf_counter_id(
VC_AVERAGE_CPU_CONSUMED_CNTR)
cpu_util = self._ops.query_vm_aggregate_stats(vm_moid,
cpu_util_counter_id)
cpu_util = self._ops.query_vm_aggregate_stats(
vm_moid, cpu_util_counter_id, duration)
# For this counter vSphere returns values scaled-up by 100, since the
# corresponding API can't return decimals, but only longs.
@ -102,10 +96,7 @@ class VsphereInspector(virt_inspector.Inspector):
cpu_util = cpu_util / 100
return virt_inspector.CPUUtilStats(util=cpu_util)
def inspect_vnics(self, instance_name):
raise NotImplementedError()
def inspect_vnic_rates(self, instance):
def inspect_vnic_rates(self, instance, duration=None):
vm_moid = self._ops.get_vm_moid(instance.id)
if not vm_moid:
raise virt_inspector.InstanceNotFoundException(
@ -116,8 +107,8 @@ class VsphereInspector(virt_inspector.Inspector):
for net_counter in (VC_NETWORK_RX_COUNTER, VC_NETWORK_TX_COUNTER):
net_counter_id = self._ops.get_perf_counter_id(net_counter)
vnic_id_to_stats_map = \
self._ops.query_vm_device_stats(vm_moid, net_counter_id)
vnic_id_to_stats_map = self._ops.query_vm_device_stats(
vm_moid, net_counter_id, duration)
vnic_stats[net_counter] = vnic_id_to_stats_map
vnic_ids.update(vnic_id_to_stats_map.iterkeys())
@ -137,22 +128,20 @@ class VsphereInspector(virt_inspector.Inspector):
parameters=None)
yield (interface, stats)
def inspect_disks(self, instance_name):
raise NotImplementedError()
def inspect_memory_usage(self, instance):
def inspect_memory_usage(self, instance, duration=None):
vm_moid = self._ops.get_vm_moid(instance.id)
if vm_moid is None:
raise virt_inspector.InstanceNotFoundException(
_('VM %s not found in VMware Vsphere') % instance.id)
mem_counter_id = self._ops.get_perf_counter_id(
VC_AVERAGE_MEMORY_CONSUMED_CNTR)
memory = self._ops.query_vm_aggregate_stats(vm_moid, mem_counter_id)
memory = self._ops.query_vm_aggregate_stats(
vm_moid, mem_counter_id, duration)
# Stat provided from vSphere is in KB, converting it to MB.
memory = memory / units.Ki
return virt_inspector.MemoryUsageStats(usage=memory)
def inspect_disk_rates(self, instance):
def inspect_disk_rates(self, instance, duration=None):
vm_moid = self._ops.get_vm_moid(instance.id)
if not vm_moid:
raise virt_inspector.InstanceNotFoundException(
@ -170,7 +159,7 @@ class VsphereInspector(virt_inspector.Inspector):
for disk_counter in disk_counters:
disk_counter_id = self._ops.get_perf_counter_id(disk_counter)
disk_id_to_stat_map = self._ops.query_vm_device_stats(
vm_moid, disk_counter_id)
vm_moid, disk_counter_id, duration)
disk_stats[disk_counter] = disk_id_to_stat_map
disk_ids.update(disk_id_to_stat_map.iterkeys())

View File

@ -138,34 +138,38 @@ class VsphereOperations(object):
return session.invoke_api(vim_util, "get_object_property",
session.vim, vm_mobj, property_name)
def query_vm_aggregate_stats(self, vm_moid, counter_id):
def query_vm_aggregate_stats(self, vm_moid, counter_id, duration):
"""Method queries the aggregated real-time stat value for a VM.
This method should be used for aggregate counters.
:param vm_moid: moid of the VM
:param counter_id: id of the perf counter in VC
:param duration: in seconds from current time,
over which the stat value was applicable
:return: the aggregated stats value for the counter
"""
# For aggregate counters, device_name should be ""
stats = self._query_vm_perf_stats(vm_moid, counter_id, "")
stats = self._query_vm_perf_stats(vm_moid, counter_id, "", duration)
# Performance manager provides the aggregated stats value
# with device name -> None
return stats.get(None, 0)
def query_vm_device_stats(self, vm_moid, counter_id):
def query_vm_device_stats(self, vm_moid, counter_id, duration):
"""Method queries the real-time stat values for a VM, for all devices.
This method should be used for device(non-aggregate) counters.
:param vm_moid: moid of the VM
:param counter_id: id of the perf counter in VC
:param duration: in seconds from current time,
over which the stat value was applicable
:return: a map containing the stat values keyed by the device ID/name
"""
# For device counters, device_name should be "*" to get stat values
# for all devices.
stats = self._query_vm_perf_stats(vm_moid, counter_id, "*")
stats = self._query_vm_perf_stats(vm_moid, counter_id, "*", duration)
# For some device counters, in addition to the per device value
# the Performance manager also returns the aggregated value.
@ -173,7 +177,7 @@ class VsphereOperations(object):
stats.pop(None, None)
return stats
def _query_vm_perf_stats(self, vm_moid, counter_id, device_name):
def _query_vm_perf_stats(self, vm_moid, counter_id, device_name, duration):
"""Method queries the real-time stat values for a VM.
:param vm_moid: moid of the VM for which stats are needed
@ -182,6 +186,8 @@ class VsphereOperations(object):
queried. For aggregate counters pass empty string ("").
For device counters pass "*", if stats are required over all
devices.
:param duration: in seconds from current time,
over which the stat value was applicable
:return: a map containing the stat values keyed by the device ID/name
"""
@ -197,8 +203,10 @@ class VsphereOperations(object):
query_spec.entity = vim_util.get_moref(vm_moid, "VirtualMachine")
query_spec.metricId = [metric_id]
query_spec.intervalId = VC_REAL_TIME_SAMPLING_INTERVAL
# The following setting ensures that we need only one latest sample
query_spec.maxSample = 1
# We query all samples which are applicable over the specified duration
samples_cnt = (duration / VC_REAL_TIME_SAMPLING_INTERVAL if duration
else 1)
query_spec.maxSample = samples_cnt
perf_manager = session.vim.service_content.perfManager
perf_stats = session.invoke_api(session.vim, 'QueryPerf', perf_manager,
@ -208,11 +216,12 @@ class VsphereOperations(object):
if perf_stats:
entity_metric = perf_stats[0]
sample_infos = entity_metric.sampleInfo
samples_count = len(sample_infos)
if samples_count > 0:
if len(sample_infos) > 0:
for metric_series in entity_metric.value:
stat_value = float(metric_series.value[samples_count - 1])
# Take the average of all samples to improve the accuracy
# of the stat value
stat_value = float(sum(metric_series.value)) / samples_cnt
device_id = metric_series.id.instance
stat_values[device_id] = stat_value

View File

@ -92,7 +92,7 @@ class TestCPUUtilPollster(base.TestPollsterBase):
virt_inspector.CPUUtilStats(util=60),
))
def inspect_cpu_util(name):
def inspect_cpu_util(name, duration):
return six.next(next_value)
self.inspector.inspect_cpu_util = \

View File

@ -34,7 +34,7 @@ class TestMemoryPollster(base.TestPollsterBase):
virt_inspector.MemoryUsageStats(usage=2.0),
))
def inspect_memory_usage(name):
def inspect_memory_usage(instance, duration):
return six.next(next_value)
self.inspector.inspect_memory_usage = \

View File

@ -99,7 +99,7 @@ class TestVsphereInspection(test.BaseTestCase):
def get_counter_id_side_effect(counter_full_name):
return counter_name_to_id_map[counter_full_name]
def query_stat_side_effect(vm_moid, counter_id):
def query_stat_side_effect(vm_moid, counter_id, duration):
# assert inputs
self.assertEqual(test_vm_moid, vm_moid)
self.assertTrue(counter_id in counter_id_to_stats_map)
@ -144,7 +144,7 @@ class TestVsphereInspection(test.BaseTestCase):
def get_counter_id_side_effect(counter_full_name):
return counter_name_to_id_map[counter_full_name]
def query_stat_side_effect(vm_moid, counter_id):
def query_stat_side_effect(vm_moid, counter_id, duration):
# assert inputs
self.assertEqual(test_vm_moid, vm_moid)
self.assertTrue(counter_id in counter_id_to_stats_map)

View File

@ -161,14 +161,14 @@ class VsphereOperationsTest(test.BaseTestCase):
ops = self._vsphere_ops
# test aggregate stat
stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id)
self.assertEqual(333, stat_val)
stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id, 60)
self.assertEqual(222, stat_val)
# test per-device(non-aggregate) stats
expected_device_stats = {
device1: 300,
device2: 30,
device3: 3
device1: 200,
device2: 20,
device3: 2
}
stats = ops.query_vm_device_stats(vm_moid, counter_id)
stats = ops.query_vm_device_stats(vm_moid, counter_id, 60)
self.assertEqual(expected_device_stats, stats)