Add ready endpoint to webapp
When running nodepool launchers in kubernetes a common method to update nodepool or its config is doing rolling restarts. The process for this is start a new nodepool, wait for it to be ready and then tear down the old instance. Currently this is not possible without risking node_failures when there is only one instance serving a label. The reason for this is that there is no reliable way to determine when the new instance is fully started which could lead to a too early tear down of the old instance. This would result in node_failures for all in-flight nore requests that are only valid for this provider. Adding a /ready endpoint to the webapp can make this deterministic using readiness checks of kubernetes. Change-Id: I53e77f3d8aaa4742ce2a89c1179e8563f850270e
This commit is contained in:
parent
2a3d4f842b
commit
f7f0821e98
@ -394,6 +394,12 @@ launchers, all will provide the same information.
|
||||
:resheader Content-Type: ``application/json`` or ``text/plain``
|
||||
depending on the :http:header:`Accept` header
|
||||
|
||||
.. http:get:: /ready
|
||||
|
||||
Responds with status code 200 as soon as all configured providers are fully
|
||||
started. During startup it returns 500. This can be used as a
|
||||
readiness probe in a kubernetes based deployment.
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
|
@ -881,6 +881,7 @@ class NodePool(threading.Thread):
|
||||
self._delete_thread = None
|
||||
self._stats_thread = None
|
||||
self._submittedRequests = {}
|
||||
self.ready = False
|
||||
|
||||
def stop(self):
|
||||
self._stopped = True
|
||||
@ -1155,4 +1156,8 @@ class NodePool(threading.Thread):
|
||||
except Exception:
|
||||
self.log.exception("Exception in main loop:")
|
||||
|
||||
# At this point all providers are registered and fully functional
|
||||
# so we can mark nodepool as ready.
|
||||
self.ready = True
|
||||
|
||||
self._stop_event.wait(self.watermark_sleep)
|
||||
|
@ -17,9 +17,11 @@ import json
|
||||
import logging
|
||||
import yaml
|
||||
from urllib import request
|
||||
from urllib.error import HTTPError
|
||||
|
||||
from nodepool import tests
|
||||
from nodepool import zk
|
||||
from nodepool.nodeutils import iterate_timeout
|
||||
|
||||
|
||||
class TestWebApp(tests.DBTestCase):
|
||||
@ -248,3 +250,30 @@ class TestWebApp(tests.DBTestCase):
|
||||
config = yaml.safe_load(open(configfile))
|
||||
self.assertEqual(config['webapp']['port'], 8080)
|
||||
self.assertEqual(config['webapp']['listen_address'], '127.0.0.1')
|
||||
|
||||
def test_webapp_ready(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
|
||||
webapp = self.useWebApp(pool, port=0)
|
||||
webapp.start()
|
||||
port = webapp.server.socket.getsockname()[1]
|
||||
|
||||
# Query ready endpoint before the pool has been started. We expect
|
||||
# an error in this case.
|
||||
req = request.Request("http://localhost:%s/ready" % port)
|
||||
with self.assertRaises(HTTPError, request.urlopen, req):
|
||||
pass
|
||||
|
||||
pool.start()
|
||||
|
||||
# Now wait until we get a valid response.
|
||||
for _ in iterate_timeout(30, Exception, 'ready succeeds'):
|
||||
try:
|
||||
f = request.urlopen(req)
|
||||
break
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
data = f.read()
|
||||
self.assertEqual(data, b"OK")
|
||||
|
@ -75,6 +75,13 @@ class WebApp(threading.Thread):
|
||||
self.server.server_close()
|
||||
|
||||
def get_cache(self, path, params, request_type):
|
||||
# At first process ready request as this doesn't need caching.
|
||||
if path == '/ready':
|
||||
if not self.nodepool.ready:
|
||||
raise webob.exc.HTTPServiceUnavailable()
|
||||
else:
|
||||
return time.time(), 'OK'
|
||||
|
||||
# TODO quick and dirty way to take query parameters
|
||||
# into account when caching data
|
||||
if params:
|
||||
|
5
releasenotes/notes/ready-endpoint-064e8516190567d6.yaml
Normal file
5
releasenotes/notes/ready-endpoint-064e8516190567d6.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
There is a new :http:get:`/ready` endpoint that can be used as a readiness
|
||||
probe.
|
Loading…
Reference in New Issue
Block a user