VMware vSphere: Improve the accuracy of queried samples
Currently we are querying the latest real time sample from vSphere and creating a sample out of it, in ceilometer. The sampling interval of real time samples in vSphere is 20 s. However, the interval at which the various pollsters are run is by default 600 s (10 mins). This CL attempts to improve the accuracy of the sample by averaging the samples over the pollster interval. Change-Id: Id75fe87840b82d7c7d3e4fae9b2fc982e22ea1d7 Implements: blueprint vmware-vcenter-server Closes-Bug: 1295500
This commit is contained in:
parent
55cbe746da
commit
03c803b5ce
@ -21,6 +21,7 @@
|
||||
import abc
|
||||
import six
|
||||
|
||||
from ceilometer.openstack.common import timeutils
|
||||
from ceilometer import plugin
|
||||
|
||||
|
||||
@ -37,3 +38,16 @@ class ComputePollster(plugin.PollsterBase):
|
||||
:param cache: A dictionary for passing data between plugins
|
||||
:param resources: The resources to examine (expected to be instances)
|
||||
"""
|
||||
|
||||
def _record_poll_time(self):
|
||||
"""Method records current time as the poll time.
|
||||
|
||||
:return: time in seconds since the last poll time was recorded
|
||||
"""
|
||||
current_time = timeutils.utcnow()
|
||||
duration = None
|
||||
if hasattr(self, '_last_poll_time'):
|
||||
duration = timeutils.delta_seconds(self._last_poll_time,
|
||||
current_time)
|
||||
self._last_poll_time = current_time
|
||||
return duration
|
||||
|
@ -64,10 +64,12 @@ class CPUPollster(plugin.ComputePollster):
|
||||
class CPUUtilPollster(plugin.ComputePollster):
|
||||
|
||||
def get_samples(self, manager, cache, resources):
|
||||
self._inspection_duration = self._record_poll_time()
|
||||
for instance in resources:
|
||||
LOG.debug(_('Checking CPU util for instance %s'), instance.id)
|
||||
try:
|
||||
cpu_info = manager.inspector.inspect_cpu_util(instance)
|
||||
cpu_info = manager.inspector.inspect_cpu_util(
|
||||
instance, self._inspection_duration)
|
||||
LOG.debug(_("CPU UTIL: %(instance)s %(util)d"),
|
||||
({'instance': instance.__dict__,
|
||||
'util': cpu_info.util}))
|
||||
|
@ -171,7 +171,9 @@ class _DiskRatesPollsterBase(plugin.ComputePollster):
|
||||
r_requests_rate = 0
|
||||
w_bytes_rate = 0
|
||||
w_requests_rate = 0
|
||||
for disk, info in inspector.inspect_disk_rates(instance):
|
||||
disk_rates = inspector.inspect_disk_rates(
|
||||
instance, self._inspection_duration)
|
||||
for disk, info in disk_rates:
|
||||
r_bytes_rate += info.read_bytes_rate
|
||||
r_requests_rate += info.read_requests_rate
|
||||
w_bytes_rate += info.write_bytes_rate
|
||||
@ -189,6 +191,7 @@ class _DiskRatesPollsterBase(plugin.ComputePollster):
|
||||
"""Return one Sample."""
|
||||
|
||||
def get_samples(self, manager, cache, resources):
|
||||
self._inspection_duration = self._record_poll_time()
|
||||
for instance in resources:
|
||||
try:
|
||||
disk_rates_info = self._populate_cache(
|
||||
|
@ -26,10 +26,12 @@ LOG = log.getLogger(__name__)
|
||||
class MemoryUsagePollster(plugin.ComputePollster):
|
||||
|
||||
def get_samples(self, manager, cache, resources):
|
||||
self._inspection_duration = self._record_poll_time()
|
||||
for instance in resources:
|
||||
LOG.debug(_('Checking memory usage for instance %s'), instance.id)
|
||||
try:
|
||||
memory_info = manager.inspector.inspect_memory_usage(instance)
|
||||
memory_info = manager.inspector.inspect_memory_usage(
|
||||
instance, self._inspection_duration)
|
||||
LOG.debug(_("MEMORY USAGE: %(instance)s %(usage)f"),
|
||||
({'instance': instance.__dict__,
|
||||
'usage': memory_info.usage}))
|
||||
|
@ -84,6 +84,7 @@ class _Base(plugin.ComputePollster):
|
||||
return i_cache[instance_name]
|
||||
|
||||
def get_samples(self, manager, cache, resources):
|
||||
self._inspection_duration = self._record_poll_time()
|
||||
for instance in resources:
|
||||
instance_name = util.instance_name(instance)
|
||||
LOG.debug(_('checking net info for instance %s'), instance.id)
|
||||
@ -122,7 +123,8 @@ class _RateBase(_Base):
|
||||
CACHE_KEY_VNIC = 'vnic-rates'
|
||||
|
||||
def _get_vnic_info(self, inspector, instance):
|
||||
return inspector.inspect_vnic_rates(instance)
|
||||
return inspector.inspect_vnic_rates(instance,
|
||||
self._inspection_duration)
|
||||
|
||||
def _get_rx_info(self, info):
|
||||
return info.rx_bytes_rate
|
||||
|
@ -159,10 +159,12 @@ class Inspector(object):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_cpu_util(self, instance):
|
||||
def inspect_cpu_util(self, instance, duration=None):
|
||||
"""Inspect the CPU Utilization (%) for an instance.
|
||||
|
||||
:param instance: the target instance
|
||||
:param duration: the last 'n' seconds, over which the value should be
|
||||
inspected
|
||||
:return: the percentage of CPU utilization
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
@ -176,10 +178,12 @@ class Inspector(object):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_vnic_rates(self, instance):
|
||||
def inspect_vnic_rates(self, instance, duration=None):
|
||||
"""Inspect the vNIC rate statistics for an instance.
|
||||
|
||||
:param instance: the target instance
|
||||
:param duration: the last 'n' seconds, over which the value should be
|
||||
inspected
|
||||
:return: for each vNIC, the rate of bytes & packets
|
||||
received and transmitted
|
||||
"""
|
||||
@ -194,18 +198,22 @@ class Inspector(object):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_memory_usage(self, instance):
|
||||
def inspect_memory_usage(self, instance, duration=None):
|
||||
"""Inspect the memory usage statistics for an instance.
|
||||
|
||||
:param instance: the target instance
|
||||
:param duration: the last 'n' seconds, over which the value should be
|
||||
inspected
|
||||
:return: the amount of memory used
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_disk_rates(self, instance):
|
||||
def inspect_disk_rates(self, instance, duration=None):
|
||||
"""Inspect the disk statistics as rates for an instance.
|
||||
|
||||
:param instance: the target instance
|
||||
:param duration: the last 'n' seconds, over which the value should be
|
||||
inspected
|
||||
:return: for each disk, the number of bytes & operations
|
||||
read and written per second, with the error count
|
||||
"""
|
||||
|
@ -43,7 +43,7 @@ OPTS = [
|
||||
cfg.FloatOpt('task_poll_interval',
|
||||
default=0.5,
|
||||
help='Sleep time in seconds for polling an ongoing async '
|
||||
'task'),
|
||||
'task')
|
||||
]
|
||||
|
||||
cfg.CONF.register_group(opt_group)
|
||||
@ -79,21 +79,15 @@ class VsphereInspector(virt_inspector.Inspector):
|
||||
self._ops = vsphere_operations.VsphereOperations(
|
||||
get_api_session(), 1000)
|
||||
|
||||
def inspect_instances(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_cpus(self, instance_name):
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_cpu_util(self, instance):
|
||||
def inspect_cpu_util(self, instance, duration=None):
|
||||
vm_moid = self._ops.get_vm_moid(instance.id)
|
||||
if vm_moid is None:
|
||||
raise virt_inspector.InstanceNotFoundException(
|
||||
_('VM %s not found in VMware Vsphere') % instance.id)
|
||||
cpu_util_counter_id = self._ops.get_perf_counter_id(
|
||||
VC_AVERAGE_CPU_CONSUMED_CNTR)
|
||||
cpu_util = self._ops.query_vm_aggregate_stats(vm_moid,
|
||||
cpu_util_counter_id)
|
||||
cpu_util = self._ops.query_vm_aggregate_stats(
|
||||
vm_moid, cpu_util_counter_id, duration)
|
||||
|
||||
# For this counter vSphere returns values scaled-up by 100, since the
|
||||
# corresponding API can't return decimals, but only longs.
|
||||
@ -102,10 +96,7 @@ class VsphereInspector(virt_inspector.Inspector):
|
||||
cpu_util = cpu_util / 100
|
||||
return virt_inspector.CPUUtilStats(util=cpu_util)
|
||||
|
||||
def inspect_vnics(self, instance_name):
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_vnic_rates(self, instance):
|
||||
def inspect_vnic_rates(self, instance, duration=None):
|
||||
vm_moid = self._ops.get_vm_moid(instance.id)
|
||||
if not vm_moid:
|
||||
raise virt_inspector.InstanceNotFoundException(
|
||||
@ -116,8 +107,8 @@ class VsphereInspector(virt_inspector.Inspector):
|
||||
|
||||
for net_counter in (VC_NETWORK_RX_COUNTER, VC_NETWORK_TX_COUNTER):
|
||||
net_counter_id = self._ops.get_perf_counter_id(net_counter)
|
||||
vnic_id_to_stats_map = \
|
||||
self._ops.query_vm_device_stats(vm_moid, net_counter_id)
|
||||
vnic_id_to_stats_map = self._ops.query_vm_device_stats(
|
||||
vm_moid, net_counter_id, duration)
|
||||
vnic_stats[net_counter] = vnic_id_to_stats_map
|
||||
vnic_ids.update(vnic_id_to_stats_map.iterkeys())
|
||||
|
||||
@ -137,22 +128,20 @@ class VsphereInspector(virt_inspector.Inspector):
|
||||
parameters=None)
|
||||
yield (interface, stats)
|
||||
|
||||
def inspect_disks(self, instance_name):
|
||||
raise NotImplementedError()
|
||||
|
||||
def inspect_memory_usage(self, instance):
|
||||
def inspect_memory_usage(self, instance, duration=None):
|
||||
vm_moid = self._ops.get_vm_moid(instance.id)
|
||||
if vm_moid is None:
|
||||
raise virt_inspector.InstanceNotFoundException(
|
||||
_('VM %s not found in VMware Vsphere') % instance.id)
|
||||
mem_counter_id = self._ops.get_perf_counter_id(
|
||||
VC_AVERAGE_MEMORY_CONSUMED_CNTR)
|
||||
memory = self._ops.query_vm_aggregate_stats(vm_moid, mem_counter_id)
|
||||
memory = self._ops.query_vm_aggregate_stats(
|
||||
vm_moid, mem_counter_id, duration)
|
||||
# Stat provided from vSphere is in KB, converting it to MB.
|
||||
memory = memory / units.Ki
|
||||
return virt_inspector.MemoryUsageStats(usage=memory)
|
||||
|
||||
def inspect_disk_rates(self, instance):
|
||||
def inspect_disk_rates(self, instance, duration=None):
|
||||
vm_moid = self._ops.get_vm_moid(instance.id)
|
||||
if not vm_moid:
|
||||
raise virt_inspector.InstanceNotFoundException(
|
||||
@ -170,7 +159,7 @@ class VsphereInspector(virt_inspector.Inspector):
|
||||
for disk_counter in disk_counters:
|
||||
disk_counter_id = self._ops.get_perf_counter_id(disk_counter)
|
||||
disk_id_to_stat_map = self._ops.query_vm_device_stats(
|
||||
vm_moid, disk_counter_id)
|
||||
vm_moid, disk_counter_id, duration)
|
||||
disk_stats[disk_counter] = disk_id_to_stat_map
|
||||
disk_ids.update(disk_id_to_stat_map.iterkeys())
|
||||
|
||||
|
@ -138,34 +138,38 @@ class VsphereOperations(object):
|
||||
return session.invoke_api(vim_util, "get_object_property",
|
||||
session.vim, vm_mobj, property_name)
|
||||
|
||||
def query_vm_aggregate_stats(self, vm_moid, counter_id):
|
||||
def query_vm_aggregate_stats(self, vm_moid, counter_id, duration):
|
||||
"""Method queries the aggregated real-time stat value for a VM.
|
||||
|
||||
This method should be used for aggregate counters.
|
||||
|
||||
:param vm_moid: moid of the VM
|
||||
:param counter_id: id of the perf counter in VC
|
||||
:param duration: in seconds from current time,
|
||||
over which the stat value was applicable
|
||||
:return: the aggregated stats value for the counter
|
||||
"""
|
||||
# For aggregate counters, device_name should be ""
|
||||
stats = self._query_vm_perf_stats(vm_moid, counter_id, "")
|
||||
stats = self._query_vm_perf_stats(vm_moid, counter_id, "", duration)
|
||||
|
||||
# Performance manager provides the aggregated stats value
|
||||
# with device name -> None
|
||||
return stats.get(None, 0)
|
||||
|
||||
def query_vm_device_stats(self, vm_moid, counter_id):
|
||||
def query_vm_device_stats(self, vm_moid, counter_id, duration):
|
||||
"""Method queries the real-time stat values for a VM, for all devices.
|
||||
|
||||
This method should be used for device(non-aggregate) counters.
|
||||
|
||||
:param vm_moid: moid of the VM
|
||||
:param counter_id: id of the perf counter in VC
|
||||
:param duration: in seconds from current time,
|
||||
over which the stat value was applicable
|
||||
:return: a map containing the stat values keyed by the device ID/name
|
||||
"""
|
||||
# For device counters, device_name should be "*" to get stat values
|
||||
# for all devices.
|
||||
stats = self._query_vm_perf_stats(vm_moid, counter_id, "*")
|
||||
stats = self._query_vm_perf_stats(vm_moid, counter_id, "*", duration)
|
||||
|
||||
# For some device counters, in addition to the per device value
|
||||
# the Performance manager also returns the aggregated value.
|
||||
@ -173,7 +177,7 @@ class VsphereOperations(object):
|
||||
stats.pop(None, None)
|
||||
return stats
|
||||
|
||||
def _query_vm_perf_stats(self, vm_moid, counter_id, device_name):
|
||||
def _query_vm_perf_stats(self, vm_moid, counter_id, device_name, duration):
|
||||
"""Method queries the real-time stat values for a VM.
|
||||
|
||||
:param vm_moid: moid of the VM for which stats are needed
|
||||
@ -182,6 +186,8 @@ class VsphereOperations(object):
|
||||
queried. For aggregate counters pass empty string ("").
|
||||
For device counters pass "*", if stats are required over all
|
||||
devices.
|
||||
:param duration: in seconds from current time,
|
||||
over which the stat value was applicable
|
||||
:return: a map containing the stat values keyed by the device ID/name
|
||||
"""
|
||||
|
||||
@ -197,8 +203,10 @@ class VsphereOperations(object):
|
||||
query_spec.entity = vim_util.get_moref(vm_moid, "VirtualMachine")
|
||||
query_spec.metricId = [metric_id]
|
||||
query_spec.intervalId = VC_REAL_TIME_SAMPLING_INTERVAL
|
||||
# The following setting ensures that we need only one latest sample
|
||||
query_spec.maxSample = 1
|
||||
# We query all samples which are applicable over the specified duration
|
||||
samples_cnt = (duration / VC_REAL_TIME_SAMPLING_INTERVAL if duration
|
||||
else 1)
|
||||
query_spec.maxSample = samples_cnt
|
||||
|
||||
perf_manager = session.vim.service_content.perfManager
|
||||
perf_stats = session.invoke_api(session.vim, 'QueryPerf', perf_manager,
|
||||
@ -208,11 +216,12 @@ class VsphereOperations(object):
|
||||
if perf_stats:
|
||||
entity_metric = perf_stats[0]
|
||||
sample_infos = entity_metric.sampleInfo
|
||||
samples_count = len(sample_infos)
|
||||
|
||||
if samples_count > 0:
|
||||
if len(sample_infos) > 0:
|
||||
for metric_series in entity_metric.value:
|
||||
stat_value = float(metric_series.value[samples_count - 1])
|
||||
# Take the average of all samples to improve the accuracy
|
||||
# of the stat value
|
||||
stat_value = float(sum(metric_series.value)) / samples_cnt
|
||||
device_id = metric_series.id.instance
|
||||
stat_values[device_id] = stat_value
|
||||
|
||||
|
@ -92,7 +92,7 @@ class TestCPUUtilPollster(base.TestPollsterBase):
|
||||
virt_inspector.CPUUtilStats(util=60),
|
||||
))
|
||||
|
||||
def inspect_cpu_util(name):
|
||||
def inspect_cpu_util(name, duration):
|
||||
return six.next(next_value)
|
||||
|
||||
self.inspector.inspect_cpu_util = \
|
||||
|
@ -34,7 +34,7 @@ class TestMemoryPollster(base.TestPollsterBase):
|
||||
virt_inspector.MemoryUsageStats(usage=2.0),
|
||||
))
|
||||
|
||||
def inspect_memory_usage(name):
|
||||
def inspect_memory_usage(instance, duration):
|
||||
return six.next(next_value)
|
||||
|
||||
self.inspector.inspect_memory_usage = \
|
||||
|
@ -99,7 +99,7 @@ class TestVsphereInspection(test.BaseTestCase):
|
||||
def get_counter_id_side_effect(counter_full_name):
|
||||
return counter_name_to_id_map[counter_full_name]
|
||||
|
||||
def query_stat_side_effect(vm_moid, counter_id):
|
||||
def query_stat_side_effect(vm_moid, counter_id, duration):
|
||||
# assert inputs
|
||||
self.assertEqual(test_vm_moid, vm_moid)
|
||||
self.assertTrue(counter_id in counter_id_to_stats_map)
|
||||
@ -144,7 +144,7 @@ class TestVsphereInspection(test.BaseTestCase):
|
||||
def get_counter_id_side_effect(counter_full_name):
|
||||
return counter_name_to_id_map[counter_full_name]
|
||||
|
||||
def query_stat_side_effect(vm_moid, counter_id):
|
||||
def query_stat_side_effect(vm_moid, counter_id, duration):
|
||||
# assert inputs
|
||||
self.assertEqual(test_vm_moid, vm_moid)
|
||||
self.assertTrue(counter_id in counter_id_to_stats_map)
|
||||
|
@ -161,14 +161,14 @@ class VsphereOperationsTest(test.BaseTestCase):
|
||||
ops = self._vsphere_ops
|
||||
|
||||
# test aggregate stat
|
||||
stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id)
|
||||
self.assertEqual(333, stat_val)
|
||||
stat_val = ops.query_vm_aggregate_stats(vm_moid, counter_id, 60)
|
||||
self.assertEqual(222, stat_val)
|
||||
|
||||
# test per-device(non-aggregate) stats
|
||||
expected_device_stats = {
|
||||
device1: 300,
|
||||
device2: 30,
|
||||
device3: 3
|
||||
device1: 200,
|
||||
device2: 20,
|
||||
device3: 2
|
||||
}
|
||||
stats = ops.query_vm_device_stats(vm_moid, counter_id)
|
||||
stats = ops.query_vm_device_stats(vm_moid, counter_id, 60)
|
||||
self.assertEqual(expected_device_stats, stats)
|
||||
|
Loading…
Reference in New Issue
Block a user