Add a new metric, for handleable requests per provider

Added a new metric, that exports a gauge which counts how many open requests can each provider handle.
Added a test case for this as well.

Change-Id: I712204afbd2ec63d929df493336702786957f05c
This commit is contained in:
Samuel Surovka 2024-02-09 14:20:57 +01:00
parent 909cf73ca0
commit 18c8e43c2a
6 changed files with 160 additions and 0 deletions

View File

@ -632,6 +632,11 @@ Provider Metrics
Number of leaked volumes removed automatically by Nodepool.
.. zuul:stat:: nodepool.provider.<provider>.pool.<pool>.addressable_requests
:type: gauge
Number of open node requests a provider pool can address.
Launch metrics
^^^^^^^^^^^^^^

View File

@ -1020,6 +1020,7 @@ class StatsWorker(BaseCleanupWorker, stats.StatsReporter):
self.stats_event.clear()
try:
self.updateNodeStats(zk)
self.updateNodeRequestStats(zk)
except Exception:
self.log.exception("Exception while reporting stats:")
time.sleep(1)

View File

@ -180,3 +180,41 @@ class StatsReporter(object):
key = key_template % (tenant, k)
pipeline.gauge(key, lim)
pipeline.send()
def updateNodeRequestStats(self, zk_conn):
if not self._statsd:
return
pipeline = self._statsd.pipeline()
provider_requests = {}
provider_supported_labels = {}
for pool in zk_conn.getRegisteredPools():
if not hasattr(pool, "name") or pool.name is None:
# skip pools without name attribute for backward compatibility
continue
provider_supported_labels[
(pool.provider_name, pool.name)] = pool.supported_labels
provider_requests[(pool.provider_name, pool.name)] = 0
for node_request in zk_conn.nodeRequestIterator(cached_ids=True):
for (provider, pool), supported_labels in (
provider_supported_labels.items()):
if all(
label in supported_labels
for label in node_request.node_types
):
provider_requests[(provider, pool)] += 1
for (provider_name,
pool_name), requests_count in provider_requests.items():
# nodepool.provider.PROVIDER.pool.POOL.addressable_requests
metric = ("nodepool."
"provider."
f"{provider_name}."
"pool."
f"{pool_name}."
"addressable_requests")
pipeline.gauge(metric, requests_count)
pipeline.send()

View File

@ -0,0 +1,64 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: fake-label
min-ready: 1
- name: fake-label2
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-label2
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -2803,3 +2803,48 @@ class TestLauncher(tests.DBTestCase):
self.assertEqual(len(req3.nodes), 1)
node3 = self.zk.getNode(req3.nodes[0])
self.assertEqual(node3.provider, 'low-provider')
def test_requests_by_provider_stats(self):
configfile = self.setup_config('node_two_providers_two_labels.yaml')
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.waitForImage('fake-provider2', 'fake-image')
nodepool.launcher.LOCK_CLEANUP = 1
pool = self.useNodepool(configfile, watermark_sleep=1)
self.startPool(pool)
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
req.requestor = 'unit-test'
self.zk.storeNodeRequest(req)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
req2 = zk.NodeRequest()
req2.state = zk.REQUESTED
req2.node_types.append('fake-label2')
req2.requestor = 'unit-test'
self.zk.storeNodeRequest(req2)
req2 = self.waitForNodeRequest(req2)
self.assertEqual(req2.state, zk.FULFILLED)
self.assertReportedStat(
'nodepool.'
'provider.'
'fake-provider.'
'pool.'
'main.'
'addressable_requests',
value='1', kind='g')
self.assertReportedStat(
'nodepool.'
'provider.'
'fake-provider2.'
'pool.'
'main.'
'addressable_requests',
value='2', kind='g')

View File

@ -0,0 +1,7 @@
---
features:
- |
Nodepool now exports a new metric called
:zuul:stat:`nodepool.provider.<provider>.pool.<pool>.addressable_requests`.
This gauge shows provider pools, and the number of open node requests
which they can address.