From 18c8e43c2a770a51afcbf69ad6da7f04834fd55e Mon Sep 17 00:00:00 2001 From: Samuel Surovka Date: Fri, 9 Feb 2024 14:20:57 +0100 Subject: [PATCH] Add a new metric, for handleable requests per provider Added a new metric, that exports a gauge which counts how many open requests can each provider handle. Added a test case for this as well. Change-Id: I712204afbd2ec63d929df493336702786957f05c --- doc/source/operation.rst | 5 ++ nodepool/launcher.py | 1 + nodepool/stats.py | 38 +++++++++++ .../node_two_providers_two_labels.yaml | 64 +++++++++++++++++++ nodepool/tests/unit/test_launcher.py | 45 +++++++++++++ ...ider-requests-metric-002ae3813555ede1.yaml | 7 ++ 6 files changed, 160 insertions(+) create mode 100644 nodepool/tests/fixtures/node_two_providers_two_labels.yaml create mode 100644 releasenotes/notes/per-provider-requests-metric-002ae3813555ede1.yaml diff --git a/doc/source/operation.rst b/doc/source/operation.rst index b8192f6b0..10596c2b3 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -632,6 +632,11 @@ Provider Metrics Number of leaked volumes removed automatically by Nodepool. +.. zuul:stat:: nodepool.provider..pool..addressable_requests + :type: gauge + + Number of open node requests a provider pool can address. + Launch metrics ^^^^^^^^^^^^^^ diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 555e747af..85536dd60 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -1020,6 +1020,7 @@ class StatsWorker(BaseCleanupWorker, stats.StatsReporter): self.stats_event.clear() try: self.updateNodeStats(zk) + self.updateNodeRequestStats(zk) except Exception: self.log.exception("Exception while reporting stats:") time.sleep(1) diff --git a/nodepool/stats.py b/nodepool/stats.py index 6c580518d..14929e539 100644 --- a/nodepool/stats.py +++ b/nodepool/stats.py @@ -180,3 +180,41 @@ class StatsReporter(object): key = key_template % (tenant, k) pipeline.gauge(key, lim) pipeline.send() + + def updateNodeRequestStats(self, zk_conn): + if not self._statsd: + return + + pipeline = self._statsd.pipeline() + provider_requests = {} + + provider_supported_labels = {} + for pool in zk_conn.getRegisteredPools(): + if not hasattr(pool, "name") or pool.name is None: + # skip pools without name attribute for backward compatibility + continue + provider_supported_labels[ + (pool.provider_name, pool.name)] = pool.supported_labels + provider_requests[(pool.provider_name, pool.name)] = 0 + + for node_request in zk_conn.nodeRequestIterator(cached_ids=True): + for (provider, pool), supported_labels in ( + provider_supported_labels.items()): + if all( + label in supported_labels + for label in node_request.node_types + ): + provider_requests[(provider, pool)] += 1 + + for (provider_name, + pool_name), requests_count in provider_requests.items(): + # nodepool.provider.PROVIDER.pool.POOL.addressable_requests + metric = ("nodepool." + "provider." + f"{provider_name}." + "pool." + f"{pool_name}." + "addressable_requests") + pipeline.gauge(metric, requests_count) + + pipeline.send() diff --git a/nodepool/tests/fixtures/node_two_providers_two_labels.yaml b/nodepool/tests/fixtures/node_two_providers_two_labels.yaml new file mode 100644 index 000000000..a155a42b9 --- /dev/null +++ b/nodepool/tests/fixtures/node_two_providers_two_labels.yaml @@ -0,0 +1,64 @@ +elements-dir: . +images-dir: '{images_dir}' +build-log-dir: '{build_log_dir}' + +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +labels: + - name: fake-label + min-ready: 1 + - name: fake-label2 + +providers: + - name: fake-provider + cloud: fake + driver: fake + region-name: fake-region + rate: 0.0001 + diskimages: + - name: fake-image + pools: + - name: main + max-servers: 96 + labels: + - name: fake-label + diskimage: fake-image + min-ram: 8192 + - name: fake-provider2 + cloud: fake + driver: fake + region-name: fake-region + rate: 0.0001 + diskimages: + - name: fake-image + pools: + - name: main + max-servers: 96 + labels: + - name: fake-label + diskimage: fake-image + min-ram: 8192 + - name: fake-label2 + diskimage: fake-image + min-ram: 8192 + +diskimages: + - name: fake-image + elements: + - fedora + - vm + release: 21 + dib-cmd: nodepool/tests/fake-image-create + env-vars: + TMPDIR: /opt/dib_tmp + DIB_IMAGE_CACHE: /opt/dib_cache + DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/ + BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2 diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index fc11d247f..99a8a0fa4 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py @@ -2803,3 +2803,48 @@ class TestLauncher(tests.DBTestCase): self.assertEqual(len(req3.nodes), 1) node3 = self.zk.getNode(req3.nodes[0]) self.assertEqual(node3.provider, 'low-provider') + + def test_requests_by_provider_stats(self): + configfile = self.setup_config('node_two_providers_two_labels.yaml') + self.useBuilder(configfile) + self.waitForImage('fake-provider', 'fake-image') + self.waitForImage('fake-provider2', 'fake-image') + + nodepool.launcher.LOCK_CLEANUP = 1 + pool = self.useNodepool(configfile, watermark_sleep=1) + self.startPool(pool) + + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('fake-label') + req.requestor = 'unit-test' + self.zk.storeNodeRequest(req) + + req = self.waitForNodeRequest(req) + self.assertEqual(req.state, zk.FULFILLED) + + req2 = zk.NodeRequest() + req2.state = zk.REQUESTED + req2.node_types.append('fake-label2') + req2.requestor = 'unit-test' + self.zk.storeNodeRequest(req2) + + req2 = self.waitForNodeRequest(req2) + self.assertEqual(req2.state, zk.FULFILLED) + + self.assertReportedStat( + 'nodepool.' + 'provider.' + 'fake-provider.' + 'pool.' + 'main.' + 'addressable_requests', + value='1', kind='g') + self.assertReportedStat( + 'nodepool.' + 'provider.' + 'fake-provider2.' + 'pool.' + 'main.' + 'addressable_requests', + value='2', kind='g') diff --git a/releasenotes/notes/per-provider-requests-metric-002ae3813555ede1.yaml b/releasenotes/notes/per-provider-requests-metric-002ae3813555ede1.yaml new file mode 100644 index 000000000..54c574036 --- /dev/null +++ b/releasenotes/notes/per-provider-requests-metric-002ae3813555ede1.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Nodepool now exports a new metric called + :zuul:stat:`nodepool.provider..pool..addressable_requests`. + This gauge shows provider pools, and the number of open node requests + which they can address.