diff --git a/doc/source/operation.rst b/doc/source/operation.rst index fec7c9aae..497034c67 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -394,6 +394,12 @@ launchers, all will provide the same information. :resheader Content-Type: ``application/json`` or ``text/plain`` depending on the :http:header:`Accept` header +.. http:get:: /ready + + Responds with status code 200 as soon as all configured providers are fully + started. During startup it returns 500. This can be used as a + readiness probe in a kubernetes based deployment. + Monitoring ---------- diff --git a/nodepool/launcher.py b/nodepool/launcher.py index a8d84e3ce..6ba7f211d 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -881,6 +881,7 @@ class NodePool(threading.Thread): self._delete_thread = None self._stats_thread = None self._submittedRequests = {} + self.ready = False def stop(self): self._stopped = True @@ -1155,4 +1156,8 @@ class NodePool(threading.Thread): except Exception: self.log.exception("Exception in main loop:") + # At this point all providers are registered and fully functional + # so we can mark nodepool as ready. + self.ready = True + self._stop_event.wait(self.watermark_sleep) diff --git a/nodepool/tests/unit/test_webapp.py b/nodepool/tests/unit/test_webapp.py index 7fd7249c8..797c6bb31 100644 --- a/nodepool/tests/unit/test_webapp.py +++ b/nodepool/tests/unit/test_webapp.py @@ -17,9 +17,11 @@ import json import logging import yaml from urllib import request +from urllib.error import HTTPError from nodepool import tests from nodepool import zk +from nodepool.nodeutils import iterate_timeout class TestWebApp(tests.DBTestCase): @@ -248,3 +250,30 @@ class TestWebApp(tests.DBTestCase): config = yaml.safe_load(open(configfile)) self.assertEqual(config['webapp']['port'], 8080) self.assertEqual(config['webapp']['listen_address'], '127.0.0.1') + + def test_webapp_ready(self): + configfile = self.setup_config('node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + + webapp = self.useWebApp(pool, port=0) + webapp.start() + port = webapp.server.socket.getsockname()[1] + + # Query ready endpoint before the pool has been started. We expect + # an error in this case. + req = request.Request("http://localhost:%s/ready" % port) + with self.assertRaises(HTTPError, request.urlopen, req): + pass + + pool.start() + + # Now wait until we get a valid response. + for _ in iterate_timeout(30, Exception, 'ready succeeds'): + try: + f = request.urlopen(req) + break + except HTTPError: + pass + + data = f.read() + self.assertEqual(data, b"OK") diff --git a/nodepool/webapp.py b/nodepool/webapp.py index 28947115e..49c7a88e7 100644 --- a/nodepool/webapp.py +++ b/nodepool/webapp.py @@ -75,6 +75,13 @@ class WebApp(threading.Thread): self.server.server_close() def get_cache(self, path, params, request_type): + # At first process ready request as this doesn't need caching. + if path == '/ready': + if not self.nodepool.ready: + raise webob.exc.HTTPServiceUnavailable() + else: + return time.time(), 'OK' + # TODO quick and dirty way to take query parameters # into account when caching data if params: diff --git a/releasenotes/notes/ready-endpoint-064e8516190567d6.yaml b/releasenotes/notes/ready-endpoint-064e8516190567d6.yaml new file mode 100644 index 000000000..08a8d6c92 --- /dev/null +++ b/releasenotes/notes/ready-endpoint-064e8516190567d6.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + There is a new :http:get:`/ready` endpoint that can be used as a readiness + probe.