Add ready endpoint to webapp
When running nodepool launchers in kubernetes a common method to update nodepool or its config is doing rolling restarts. The process for this is start a new nodepool, wait for it to be ready and then tear down the old instance. Currently this is not possible without risking node_failures when there is only one instance serving a label. The reason for this is that there is no reliable way to determine when the new instance is fully started which could lead to a too early tear down of the old instance. This would result in node_failures for all in-flight nore requests that are only valid for this provider. Adding a /ready endpoint to the webapp can make this deterministic using readiness checks of kubernetes. Change-Id: I53e77f3d8aaa4742ce2a89c1179e8563f850270e
This commit is contained in:
parent
2a3d4f842b
commit
f7f0821e98
@ -394,6 +394,12 @@ launchers, all will provide the same information.
|
|||||||
:resheader Content-Type: ``application/json`` or ``text/plain``
|
:resheader Content-Type: ``application/json`` or ``text/plain``
|
||||||
depending on the :http:header:`Accept` header
|
depending on the :http:header:`Accept` header
|
||||||
|
|
||||||
|
.. http:get:: /ready
|
||||||
|
|
||||||
|
Responds with status code 200 as soon as all configured providers are fully
|
||||||
|
started. During startup it returns 500. This can be used as a
|
||||||
|
readiness probe in a kubernetes based deployment.
|
||||||
|
|
||||||
Monitoring
|
Monitoring
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
@ -881,6 +881,7 @@ class NodePool(threading.Thread):
|
|||||||
self._delete_thread = None
|
self._delete_thread = None
|
||||||
self._stats_thread = None
|
self._stats_thread = None
|
||||||
self._submittedRequests = {}
|
self._submittedRequests = {}
|
||||||
|
self.ready = False
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
self._stopped = True
|
self._stopped = True
|
||||||
@ -1155,4 +1156,8 @@ class NodePool(threading.Thread):
|
|||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception in main loop:")
|
self.log.exception("Exception in main loop:")
|
||||||
|
|
||||||
|
# At this point all providers are registered and fully functional
|
||||||
|
# so we can mark nodepool as ready.
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
self._stop_event.wait(self.watermark_sleep)
|
self._stop_event.wait(self.watermark_sleep)
|
||||||
|
@ -17,9 +17,11 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
from urllib import request
|
from urllib import request
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
from nodepool import tests
|
from nodepool import tests
|
||||||
from nodepool import zk
|
from nodepool import zk
|
||||||
|
from nodepool.nodeutils import iterate_timeout
|
||||||
|
|
||||||
|
|
||||||
class TestWebApp(tests.DBTestCase):
|
class TestWebApp(tests.DBTestCase):
|
||||||
@ -248,3 +250,30 @@ class TestWebApp(tests.DBTestCase):
|
|||||||
config = yaml.safe_load(open(configfile))
|
config = yaml.safe_load(open(configfile))
|
||||||
self.assertEqual(config['webapp']['port'], 8080)
|
self.assertEqual(config['webapp']['port'], 8080)
|
||||||
self.assertEqual(config['webapp']['listen_address'], '127.0.0.1')
|
self.assertEqual(config['webapp']['listen_address'], '127.0.0.1')
|
||||||
|
|
||||||
|
def test_webapp_ready(self):
|
||||||
|
configfile = self.setup_config('node.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
|
||||||
|
webapp = self.useWebApp(pool, port=0)
|
||||||
|
webapp.start()
|
||||||
|
port = webapp.server.socket.getsockname()[1]
|
||||||
|
|
||||||
|
# Query ready endpoint before the pool has been started. We expect
|
||||||
|
# an error in this case.
|
||||||
|
req = request.Request("http://localhost:%s/ready" % port)
|
||||||
|
with self.assertRaises(HTTPError, request.urlopen, req):
|
||||||
|
pass
|
||||||
|
|
||||||
|
pool.start()
|
||||||
|
|
||||||
|
# Now wait until we get a valid response.
|
||||||
|
for _ in iterate_timeout(30, Exception, 'ready succeeds'):
|
||||||
|
try:
|
||||||
|
f = request.urlopen(req)
|
||||||
|
break
|
||||||
|
except HTTPError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
data = f.read()
|
||||||
|
self.assertEqual(data, b"OK")
|
||||||
|
@ -75,6 +75,13 @@ class WebApp(threading.Thread):
|
|||||||
self.server.server_close()
|
self.server.server_close()
|
||||||
|
|
||||||
def get_cache(self, path, params, request_type):
|
def get_cache(self, path, params, request_type):
|
||||||
|
# At first process ready request as this doesn't need caching.
|
||||||
|
if path == '/ready':
|
||||||
|
if not self.nodepool.ready:
|
||||||
|
raise webob.exc.HTTPServiceUnavailable()
|
||||||
|
else:
|
||||||
|
return time.time(), 'OK'
|
||||||
|
|
||||||
# TODO quick and dirty way to take query parameters
|
# TODO quick and dirty way to take query parameters
|
||||||
# into account when caching data
|
# into account when caching data
|
||||||
if params:
|
if params:
|
||||||
|
5
releasenotes/notes/ready-endpoint-064e8516190567d6.yaml
Normal file
5
releasenotes/notes/ready-endpoint-064e8516190567d6.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
There is a new :http:get:`/ready` endpoint that can be used as a readiness
|
||||||
|
probe.
|
Loading…
Reference in New Issue
Block a user