diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 313ef8fde..41950d44e 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -294,6 +294,16 @@ Nodepool builder Number of image uploads to a specific provider in the cloud plus the time in seconds spent to upload the image. +.. zuul:stat:: nodepool.builder.dib_image_build...rc + :type: gauge + + Return code of the DIB. + +.. zuul:stat:: nodepool.builder.dib_image_build...duration + :type: timer + + Time the DIB run took. + Nodepool launcher ~~~~~~~~~~~~~~~~~ diff --git a/nodepool/builder.py b/nodepool/builder.py index bd29c034b..bb00e6771 100755 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -749,6 +749,7 @@ class BuildWorker(BaseWorker): self.log.info('Running %s' % (cmd,)) self.log.info('Logging to %s' % (log_fn,)) + start_time = time.monotonic() try: p = subprocess.Popen( shlex.split(cmd), @@ -784,6 +785,8 @@ class BuildWorker(BaseWorker): if did_suspend: self.log.info("ZooKeeper available. Resuming") + build_time = time.monotonic() - start_time + build_data = zk.ImageBuild() build_data.builder_id = self._builder_id build_data.builder = self._hostname @@ -817,6 +820,15 @@ class BuildWorker(BaseWorker): (diskimage.name, filename, ext, size)) self._statsd.gauge(key, size) + if self._statsd: + # report result to statsd + for ext in img_types.split(','): + key_base = 'nodepool.dib_image_build.%s.%s' % ( + diskimage.name, ext) + self._statsd.gauge(key_base + '.rc', rc) + self._statsd.timing(key_base + '.duration', + int(build_time * 1000)) + return build_data def run(self): diff --git a/nodepool/tests/test_builder.py b/nodepool/tests/test_builder.py index 1a1220d80..c992bf7aa 100644 --- a/nodepool/tests/test_builder.py +++ b/nodepool/tests/test_builder.py @@ -307,6 +307,10 @@ class TestNodePoolBuilder(tests.DBTestCase): self.waitForImage('fake-provider', 'fake-image') # Make sure our cleanup worker properly removes the first build. self.waitForBuildDeletion('fake-image', '0000000001') + self.assertReportedStat('nodepool.dib_image_build.fake-image.qcow2.rc', + '127', 'g') + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.qcow2.duration', None, 'ms') def test_diskimage_build_only(self): configfile = self.setup_config('node_diskimage_only.yaml') @@ -317,6 +321,10 @@ class TestNodePoolBuilder(tests.DBTestCase): self.assertEqual(build_tar._formats, ['tar']) self.assertEqual(build_default._formats, ['qcow2']) + self.assertReportedStat('nodepool.dib_image_build.fake-image.tar.rc', + '0', 'g') + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.tar.duration', None, 'ms') def test_diskimage_build_formats(self): configfile = self.setup_config('node_diskimage_formats.yaml') diff --git a/releasenotes/notes/build-result-metrics-deb2aaa329830f8a.yaml b/releasenotes/notes/build-result-metrics-deb2aaa329830f8a.yaml new file mode 100644 index 000000000..26c88959e --- /dev/null +++ b/releasenotes/notes/build-result-metrics-deb2aaa329830f8a.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Two new metrics are now reported after each run of the diskimage builder: + nodepool.builder.dib_image_build...rc will be set to + the last result code of the diskimage builder. This metric can be used to + set up alerting for failed disk image builds. + nodepool.builder.dib_image_build...duration will + receive the time it took to build the disk image.