Add ready endpoint to webapp

When running nodepool launchers in kubernetes a common method to
update nodepool or its config is doing rolling restarts. The process
for this is start a new nodepool, wait for it to be ready and then
tear down the old instance. Currently this is not possible without
risking node_failures when there is only one instance serving a
label. The reason for this is that there is no reliable way to
determine when the new instance is fully started which could lead to a
too early tear down of the old instance. This would result in
node_failures for all in-flight nore requests that are only valid for
this provider.

Adding a /ready endpoint to the webapp can make this deterministic
using readiness checks of kubernetes.

Change-Id: I53e77f3d8aaa4742ce2a89c1179e8563f850270e
This commit is contained in:
Tobias Henkel 2019-11-19 14:23:43 +01:00
parent 2a3d4f842b
commit f7f0821e98
5 changed files with 52 additions and 0 deletions

View File

@ -394,6 +394,12 @@ launchers, all will provide the same information.
:resheader Content-Type: ``application/json`` or ``text/plain`` :resheader Content-Type: ``application/json`` or ``text/plain``
depending on the :http:header:`Accept` header depending on the :http:header:`Accept` header
.. http:get:: /ready
Responds with status code 200 as soon as all configured providers are fully
started. During startup it returns 500. This can be used as a
readiness probe in a kubernetes based deployment.
Monitoring Monitoring
---------- ----------

View File

@ -881,6 +881,7 @@ class NodePool(threading.Thread):
self._delete_thread = None self._delete_thread = None
self._stats_thread = None self._stats_thread = None
self._submittedRequests = {} self._submittedRequests = {}
self.ready = False
def stop(self): def stop(self):
self._stopped = True self._stopped = True
@ -1155,4 +1156,8 @@ class NodePool(threading.Thread):
except Exception: except Exception:
self.log.exception("Exception in main loop:") self.log.exception("Exception in main loop:")
# At this point all providers are registered and fully functional
# so we can mark nodepool as ready.
self.ready = True
self._stop_event.wait(self.watermark_sleep) self._stop_event.wait(self.watermark_sleep)

View File

@ -17,9 +17,11 @@ import json
import logging import logging
import yaml import yaml
from urllib import request from urllib import request
from urllib.error import HTTPError
from nodepool import tests from nodepool import tests
from nodepool import zk from nodepool import zk
from nodepool.nodeutils import iterate_timeout
class TestWebApp(tests.DBTestCase): class TestWebApp(tests.DBTestCase):
@ -248,3 +250,30 @@ class TestWebApp(tests.DBTestCase):
config = yaml.safe_load(open(configfile)) config = yaml.safe_load(open(configfile))
self.assertEqual(config['webapp']['port'], 8080) self.assertEqual(config['webapp']['port'], 8080)
self.assertEqual(config['webapp']['listen_address'], '127.0.0.1') self.assertEqual(config['webapp']['listen_address'], '127.0.0.1')
def test_webapp_ready(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
webapp = self.useWebApp(pool, port=0)
webapp.start()
port = webapp.server.socket.getsockname()[1]
# Query ready endpoint before the pool has been started. We expect
# an error in this case.
req = request.Request("http://localhost:%s/ready" % port)
with self.assertRaises(HTTPError, request.urlopen, req):
pass
pool.start()
# Now wait until we get a valid response.
for _ in iterate_timeout(30, Exception, 'ready succeeds'):
try:
f = request.urlopen(req)
break
except HTTPError:
pass
data = f.read()
self.assertEqual(data, b"OK")

View File

@ -75,6 +75,13 @@ class WebApp(threading.Thread):
self.server.server_close() self.server.server_close()
def get_cache(self, path, params, request_type): def get_cache(self, path, params, request_type):
# At first process ready request as this doesn't need caching.
if path == '/ready':
if not self.nodepool.ready:
raise webob.exc.HTTPServiceUnavailable()
else:
return time.time(), 'OK'
# TODO quick and dirty way to take query parameters # TODO quick and dirty way to take query parameters
# into account when caching data # into account when caching data
if params: if params:

View File

@ -0,0 +1,5 @@
---
features:
- |
There is a new :http:get:`/ready` endpoint that can be used as a readiness
probe.