Merge "Handle invalid request lock in request cleanup" into feature/zuulv3

This commit is contained in:
Zuul 2017-12-14 15:50:52 +00:00 committed by Gerrit Code Review
commit a20d81c32c
2 changed files with 24 additions and 6 deletions

View File

@ -20,6 +20,7 @@ import abc
import six
from nodepool import zk
from nodepool import exceptions
@six.add_metaclass(abc.ABCMeta)
@ -179,7 +180,15 @@ class NodeRequestHandler(object):
node.allocated_to = None
self.zk.storeNode(node)
self.unlockNodeSet()
self.zk.unlockNodeRequest(self.request)
try:
self.zk.unlockNodeRequest(self.request)
except exceptions.ZKLockException:
# If the lock object is invalid that is "ok" since we no
# longer have a request either. Just do our best, log and
# move on.
self.log.debug("Request lock invalid for node request %s "
"when attempting to clean up the lock",
self.request.id)
return True
if self.launch_manager.failed_nodes:

View File

@ -217,10 +217,19 @@ class PoolWorker(threading.Thread):
'''
active_handlers = []
for r in self.request_handlers:
if not r.poll():
try:
if not r.poll():
active_handlers.append(r)
else:
self.log.debug("Removing handler for request %s",
r.request.id)
except Exception:
# If we fail to poll a request handler log it but move on
# and process the other handlers. We keep this handler around
# and will try again later.
self.log.exception("Error polling request handler for "
"request %s", r.request.id)
active_handlers.append(r)
else:
self.log.debug("Removing handler for request %s", r.request.id)
self.request_handlers = active_handlers
active_reqs = [r.request.id for r in self.request_handlers]
self.log.debug("Active requests: %s", active_reqs)
@ -424,8 +433,8 @@ class CleanupWorker(BaseCleanupWorker):
Because the node request locks are not direct children of the request
znode, we need to remove the locks separately after the request has
been processed. Only remove them after LOCK_CLEANUP seconds have
passed. This helps prevent the scenario where a request could go
away _while_ a lock is currently held for processing and the cleanup
passed. This helps reduce chances of the scenario where a request could
go away _while_ a lock is currently held for processing and the cleanup
thread attempts to delete it. The delay should reduce the chance that
we delete a currently held lock.
'''