From 03c803b5ce9f0a0f0bd1b6343c3fc156be36d8f9 Mon Sep 17 00:00:00 2001 From: Akhil Hingane Date: Sat, 15 Mar 2014 13:32:06 +0530 Subject: [PATCH] VMware vSphere: Improve the accuracy of queried samples Currently we are querying the latest real time sample from vSphere and creating a sample out of it, in ceilometer. The sampling interval of real time samples in vSphere is 20 s. However, the interval at which the various pollsters are run is by default 600 s (10 mins). This CL attempts to improve the accuracy of the sample by averaging the samples over the pollster interval. Change-Id: Id75fe87840b82d7c7d3e4fae9b2fc982e22ea1d7 Implements: blueprint vmware-vcenter-server Closes-Bug: 1295500 --- ceilometer/compute/plugin.py | 14 ++++++++ ceilometer/compute/pollsters/cpu.py | 4 ++- ceilometer/compute/pollsters/disk.py | 5 ++- ceilometer/compute/pollsters/memory.py | 4 ++- ceilometer/compute/pollsters/net.py | 4 ++- ceilometer/compute/virt/inspector.py | 16 ++++++--- ceilometer/compute/virt/vmware/inspector.py | 35 +++++++------------ .../compute/virt/vmware/vsphere_operations.py | 29 +++++++++------ .../tests/compute/pollsters/test_cpu.py | 2 +- .../tests/compute/pollsters/test_memory.py | 2 +- .../compute/virt/vmware/test_inspector.py | 4 +-- .../virt/vmware/test_vsphere_operations.py | 12 +++---- 12 files changed, 80 insertions(+), 51 deletions(-) diff --git a/ceilometer/compute/plugin.py b/ceilometer/compute/plugin.py index e1ee81ce0..2c4244298 100644 --- a/ceilometer/compute/plugin.py +++ b/ceilometer/compute/plugin.py @@ -21,6 +21,7 @@ import abc import six +from ceilometer.openstack.common import timeutils from ceilometer import plugin @@ -37,3 +38,16 @@ class ComputePollster(plugin.PollsterBase): :param cache: A dictionary for passing data between plugins :param resources: The resources to examine (expected to be instances) """ + + def _record_poll_time(self): + """Method records current time as the poll time. + + :return: time in seconds since the last poll time was recorded + """ + current_time = timeutils.utcnow() + duration = None + if hasattr(self, '_last_poll_time'): + duration = timeutils.delta_seconds(self._last_poll_time, + current_time) + self._last_poll_time = current_time + return duration diff --git a/ceilometer/compute/pollsters/cpu.py b/ceilometer/compute/pollsters/cpu.py index 5e55314fa..f0ee34f71 100644 --- a/ceilometer/compute/pollsters/cpu.py +++ b/ceilometer/compute/pollsters/cpu.py @@ -64,10 +64,12 @@ class CPUPollster(plugin.ComputePollster): class CPUUtilPollster(plugin.ComputePollster): def get_samples(self, manager, cache, resources): + self._inspection_duration = self._record_poll_time() for instance in resources: LOG.debug(_('Checking CPU util for instance %s'), instance.id) try: - cpu_info = manager.inspector.inspect_cpu_util(instance) + cpu_info = manager.inspector.inspect_cpu_util( + instance, self._inspection_duration) LOG.debug(_("CPU UTIL: %(instance)s %(util)d"), ({'instance': instance.__dict__, 'util': cpu_info.util})) diff --git a/ceilometer/compute/pollsters/disk.py b/ceilometer/compute/pollsters/disk.py index 6263b5d31..c6f9f776b 100644 --- a/ceilometer/compute/pollsters/disk.py +++ b/ceilometer/compute/pollsters/disk.py @@ -171,7 +171,9 @@ class _DiskRatesPollsterBase(plugin.ComputePollster): r_requests_rate = 0 w_bytes_rate = 0 w_requests_rate = 0 - for disk, info in inspector.inspect_disk_rates(instance): + disk_rates = inspector.inspect_disk_rates( + instance, self._inspection_duration) + for disk, info in disk_rates: r_bytes_rate += info.read_bytes_rate r_requests_rate += info.read_requests_rate w_bytes_rate += info.write_bytes_rate @@ -189,6 +191,7 @@ class _DiskRatesPollsterBase(plugin.ComputePollster): """Return one Sample.""" def get_samples(self, manager, cache, resources): + self._inspection_duration = self._record_poll_time() for instance in resources: try: disk_rates_info = self._populate_cache( diff --git a/ceilometer/compute/pollsters/memory.py b/ceilometer/compute/pollsters/memory.py index 296aa06e2..43850a72e 100644 --- a/ceilometer/compute/pollsters/memory.py +++ b/ceilometer/compute/pollsters/memory.py @@ -26,10 +26,12 @@ LOG = log.getLogger(__name__) class MemoryUsagePollster(plugin.ComputePollster): def get_samples(self, manager, cache, resources): + self._inspection_duration = self._record_poll_time() for instance in resources: LOG.debug(_('Checking memory usage for instance %s'), instance.id) try: - memory_info = manager.inspector.inspect_memory_usage(instance) + memory_info = manager.inspector.inspect_memory_usage( + instance, self._inspection_duration) LOG.debug(_("MEMORY USAGE: %(instance)s %(usage)f"), ({'instance': instance.__dict__, 'usage': memory_info.usage})) diff --git a/ceilometer/compute/pollsters/net.py b/ceilometer/compute/pollsters/net.py index c5d61c046..5eae80dee 100644 --- a/ceilometer/compute/pollsters/net.py +++ b/ceilometer/compute/pollsters/net.py @@ -84,6 +84,7 @@ class _Base(plugin.ComputePollster): return i_cache[instance_name] def get_samples(self, manager, cache, resources): + self._inspection_duration = self._record_poll_time() for instance in resources: instance_name = util.instance_name(instance) LOG.debug(_('checking net info for instance %s'), instance.id) @@ -122,7 +123,8 @@ class _RateBase(_Base): CACHE_KEY_VNIC = 'vnic-rates' def _get_vnic_info(self, inspector, instance): - return inspector.inspect_vnic_rates(instance) + return inspector.inspect_vnic_rates(instance, + self._inspection_duration) def _get_rx_info(self, info): return info.rx_bytes_rate diff --git a/ceilometer/compute/virt/inspector.py b/ceilometer/compute/virt/inspector.py index b7b1c6075..cc7cf7a34 100644 --- a/ceilometer/compute/virt/inspector.py +++ b/ceilometer/compute/virt/inspector.py @@ -159,10 +159,12 @@ class Inspector(object): """ raise NotImplementedError() - def inspect_cpu_util(self, instance): + def inspect_cpu_util(self, instance, duration=None): """Inspect the CPU Utilization (%) for an instance. :param instance: the target instance + :param duration: the last 'n' seconds, over which the value should be + inspected :return: the percentage of CPU utilization """ raise NotImplementedError() @@ -176,10 +178,12 @@ class Inspector(object): """ raise NotImplementedError() - def inspect_vnic_rates(self, instance): + def inspect_vnic_rates(self, instance, duration=None): """Inspect the vNIC rate statistics for an instance. :param instance: the target instance + :param duration: the last 'n' seconds, over which the value should be + inspected :return: for each vNIC, the rate of bytes & packets received and transmitted """ @@ -194,18 +198,22 @@ class Inspector(object): """ raise NotImplementedError() - def inspect_memory_usage(self, instance): + def inspect_memory_usage(self, instance, duration=None): """Inspect the memory usage statistics for an instance. :param instance: the target instance + :param duration: the last 'n' seconds, over which the value should be + inspected :return: the amount of memory used """ raise NotImplementedError() - def inspect_disk_rates(self, instance): + def inspect_disk_rates(self, instance, duration=None): """Inspect the disk statistics as rates for an instance. :param instance: the target instance + :param duration: the last 'n' seconds, over which the value should be + inspected :return: for each disk, the number of bytes & operations read and written per second, with the error count """ diff --git a/ceilometer/compute/virt/vmware/inspector.py b/ceilometer/compute/virt/vmware/inspector.py index f4fd1e6a2..46f83d4e9 100644 --- a/ceilometer/compute/virt/vmware/inspector.py +++ b/ceilometer/compute/virt/vmware/inspector.py @@ -43,7 +43,7 @@ OPTS = [ cfg.FloatOpt('task_poll_interval', default=0.5, help='Sleep time in seconds for polling an ongoing async ' - 'task'), + 'task') ] cfg.CONF.register_group(opt_group) @@ -79,21 +79,15 @@ class VsphereInspector(virt_inspector.Inspector): self._ops = vsphere_operations.VsphereOperations( get_api_session(), 1000) - def inspect_instances(self): - raise NotImplementedError() - - def inspect_cpus(self, instance_name): - raise NotImplementedError() - - def inspect_cpu_util(self, instance): + def inspect_cpu_util(self, instance, duration=None): vm_moid = self._ops.get_vm_moid(instance.id) if vm_moid is None: raise virt_inspector.InstanceNotFoundException( _('VM %s not found in VMware Vsphere') % instance.id) cpu_util_counter_id = self._ops.get_perf_counter_id( VC_AVERAGE_CPU_CONSUMED_CNTR) - cpu_util = self._ops.query_vm_aggregate_stats(vm_moid, - cpu_util_counter_id) + cpu_util = self._ops.query_vm_aggregate_stats( + vm_moid, cpu_util_counter_id, duration) # For this counter vSphere returns values scaled-up by 100, since the # corresponding API can't return decimals, but only longs. @@ -102,10 +96,7 @@ class VsphereInspector(virt_inspector.Inspector): cpu_util = cpu_util / 100 return virt_inspector.CPUUtilStats(util=cpu_util) - def inspect_vnics(self, instance_name): - raise NotImplementedError() - - def inspect_vnic_rates(self, instance): + def inspect_vnic_rates(self, instance, duration=None): vm_moid = self._ops.get_vm_moid(instance.id) if not vm_moid: raise virt_inspector.InstanceNotFoundException( @@ -116,8 +107,8 @@ class VsphereInspector(virt_inspector.Inspector): for net_counter in (VC_NETWORK_RX_COUNTER, VC_NETWORK_TX_COUNTER): net_counter_id = self._ops.get_perf_counter_id(net_counter) - vnic_id_to_stats_map = \ - self._ops.query_vm_device_stats(vm_moid, net_counter_id) + vnic_id_to_stats_map = self._ops.query_vm_device_stats( + vm_moid, net_counter_id, duration) vnic_stats[net_counter] = vnic_id_to_stats_map vnic_ids.update(vnic_id_to_stats_map.iterkeys()) @@ -137,22 +128,20 @@ class VsphereInspector(virt_inspector.Inspector): parameters=None) yield (interface, stats) - def inspect_disks(self, instance_name): - raise NotImplementedError() - - def inspect_memory_usage(self, instance): + def inspect_memory_usage(self, instance, duration=None): vm_moid = self._ops.get_vm_moid(instance.id) if vm_moid is None: raise virt_inspector.InstanceNotFoundException( _('VM %s not found in VMware Vsphere') % instance.id) mem_counter_id = self._ops.get_perf_counter_id( VC_AVERAGE_MEMORY_CONSUMED_CNTR) - memory = self._ops.query_vm_aggregate_stats(vm_moid, mem_counter_id) + memory = self._ops.query_vm_aggregate_stats( + vm_moid, mem_counter_id, duration) # Stat provided from vSphere is in KB, converting it to MB. memory = memory / units.Ki return virt_inspector.MemoryUsageStats(usage=memory) - def inspect_disk_rates(self, instance): + def inspect_disk_rates(self, instance, duration=None): vm_moid = self._ops.get_vm_moid(instance.id) if not vm_moid: raise virt_inspector.InstanceNotFoundException( @@ -170,7 +159,7 @@ class VsphereInspector(virt_inspector.Inspector): for disk_counter in disk_counters: disk_counter_id = self._ops.get_perf_counter_id(disk_counter) disk_id_to_stat_map = self._ops.query_vm_device_stats( - vm_moid, disk_counter_id) + vm_moid, disk_counter_id, duration) disk_stats[disk_counter] = disk_id_to_stat_map disk_ids.update(disk_id_to_stat_map.iterkeys()) diff --git a/ceilometer/compute/virt/vmware/vsphere_operations.py b/ceilometer/compute/virt/vmware/vsphere_operations.py index d4f5a3fed..ef9d78605 100644 --- a/ceilometer/compute/virt/vmware/vsphere_operations.py +++ b/ceilometer/compute/virt/vmware/vsphere_operations.py @@ -138,34 +138,38 @@ class VsphereOperations(object): return session.invoke_api(vim_util, "get_object_property", session.vim, vm_mobj, property_name) - def query_vm_aggregate_stats(self, vm_moid, counter_id): + def query_vm_aggregate_stats(self, vm_moid, counter_id, duration): """Method queries the aggregated real-time stat value for a VM. This method should be used for aggregate counters. :param vm_moid: moid of the VM :param counter_id: id of the perf counter in VC + :param duration: in seconds from current time, + over which the stat value was applicable :return: the aggregated stats value for the counter """ # For aggregate counters, device_name should be "" - stats = self._query_vm_perf_stats(vm_moid, counter_id, "") + stats = self._query_vm_perf_stats(vm_moid, counter_id, "", duration) # Performance manager provides the aggregated stats value # with device name -> None return stats.get(None, 0) - def query_vm_device_stats(self, vm_moid, counter_id): + def query_vm_device_stats(self, vm_moid, counter_id, duration): """Method queries the real-time stat values for a VM, for all devices. This method should be used for device(non-aggregate) counters. :param vm_moid: moid of the VM :param counter_id: id of the perf counter in VC + :param duration: in seconds from current time, + over which the stat value was applicable :return: a map containing the stat values keyed by the device ID/name """ # For device counters, device_name should be "*" to get stat values # for all devices. - stats = self._query_vm_perf_stats(vm_moid, counter_id, "*") + stats = self._query_vm_perf_stats(vm_moid, counter_id, "*", duration) # For some device counters, in addition to the per device value # the Performance manager also returns the aggregated value. @@ -173,7 +177,7 @@ class VsphereOperations(object): stats.pop(None, None) return stats - def _query_vm_perf_stats(self, vm_moid, counter_id, device_name): + def _query_vm_perf_stats(self, vm_moid, counter_id, device_name, duration): """Method queries the real-time stat values for a VM. :param vm_moid: moid of the VM for which stats are needed @@ -182,6 +186,8 @@ class VsphereOperations(object): queried. For aggregate counters pass empty string (""). For device counters pass "*", if stats are required over all devices. + :param duration: in seconds from current time, + over which the stat value was applicable :return: a map containing the stat values keyed by the device ID/name """ @@ -197,8 +203,10 @@ class VsphereOperations(object): query_spec.entity = vim_util.get_moref(vm_moid, "VirtualMachine") query_spec.metricId = [metric_id] query_spec.intervalId = VC_REAL_TIME_SAMPLING_INTERVAL - # The following setting ensures that we need only one latest sample - query_spec.maxSample = 1 + # We query all samples which are applicable over the specified duration + samples_cnt = (duration / VC_REAL_TIME_SAMPLING_INTERVAL if duration + else 1) + query_spec.maxSample = samples_cnt perf_manager = session.vim.service_content.perfManager perf_stats = session.invoke_api(session.vim, 'QueryPerf', perf_manager, @@ -208,11 +216,12 @@ class VsphereOperations(object): if perf_stats: entity_metric = perf_stats[0] sample_infos = entity_metric.sampleInfo - samples_count = len(sample_infos) - if samples_count > 0: + if len(sample_infos) > 0: for metric_series in entity_metric.value: - stat_value = float(metric_series.value[samples_count - 1]) + # Take the average of all samples to improve the accuracy + # of the stat value + stat_value = float(sum(metric_series.value)) / samples_cnt device_id = metric_series.id.instance stat_values[device_id] = stat_value diff --git a/ceilometer/tests/compute/pollsters/test_cpu.py b/ceilometer/tests/compute/pollsters/test_cpu.py index e24ec3b04..de4b39dea 100644 --- a/ceilometer/tests/compute/pollsters/test_cpu.py +++ b/ceilometer/tests/compute/pollsters/test_cpu.py @@ -92,7 +92,7 @@ class TestCPUUtilPollster(base.TestPollsterBase): virt_inspector.CPUUtilStats(util=60), )) - def inspect_cpu_util(name): + def inspect_cpu_util(name, duration): return six.next(next_value) self.inspector.inspect_cpu_util = \ diff --git a/ceilometer/tests/compute/pollsters/test_memory.py b/ceilometer/tests/compute/pollsters/test_memory.py index c630a8ab9..d3cc302a2 100644 --- a/ceilometer/tests/compute/pollsters/test_memory.py +++ b/ceilometer/tests/compute/pollsters/test_memory.py @@ -34,7 +34,7 @@ class TestMemoryPollster(base.TestPollsterBase): virt_inspector.MemoryUsageStats(usage=2.0), )) - def inspect_memory_usage(name): + def inspect_memory_usage(instance, duration): return six.next(next_value) self.inspector.inspect_memory_usage = \ diff --git a/ceilometer/tests/compute/virt/vmware/test_inspector.py b/ceilometer/tests/compute/virt/vmware/test_inspector.py index 2c4f55ccb..56b1f3518 100644 --- a/ceilometer/tests/compute/virt/vmware/test_inspector.py +++ b/ceilometer/tests/compute/virt/vmware/test_inspector.py @@ -99,7 +99,7 @@ class TestVsphereInspection(test.BaseTestCase): def get_counter_id_side_effect(counter_full_name): return counter_name_to_id_map[counter_full_name] - def query_stat_side_effect(vm_moid, counter_id): + def query_stat_side_effect(vm_moid, counter_id, duration): # assert inputs self.assertEqual(test_vm_moid, vm_moid) self.assertTrue(counter_id in counter_id_to_stats_map) @@ -144,7 +144,7 @@ class TestVsphereInspection(test.BaseTestCase): def get_counter_id_side_effect(counter_full_name): return counter_name_to_id_map[counter_full_name] - def query_stat_side_effect(vm_moid, counter_id): + def query_stat_side_effect(vm_moid, counter_id, duration): # assert inputs self.assertEqual(test_vm_moid, vm_moid) self.assertTrue(counter_id in counter_id_to_stats_map) diff --git a/ceilometer/tests/compute/virt/vmware/test_vsphere_operations.py b/ceilometer/tests/compute/virt/vmware/test_vsphere_operations.py index a06db97b3..072820975 100644 --- a/ceilometer/tests/compute/virt/vmware/test_vsphere_operations.py +++ b/ceilometer/tests/compute/virt/vmware/test_vsphere_operations.py @@ -161,14 +161,14 @@ class VsphereOperationsTest(test.BaseTestCase): ops = self._vsphere_ops # test aggregate stat - stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id) - self.assertEqual(333, stat_val) + stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id, 60) + self.assertEqual(222, stat_val) # test per-device(non-aggregate) stats expected_device_stats = { - device1: 300, - device2: 30, - device3: 3 + device1: 200, + device2: 20, + device3: 2 } - stats = ops.query_vm_device_stats(vm_moid, counter_id) + stats = ops.query_vm_device_stats(vm_moid, counter_id, 60) self.assertEqual(expected_device_stats, stats)