diff --git a/nodepool/driver/__init__.py b/nodepool/driver/__init__.py index 93b994afa..694267328 100644 --- a/nodepool/driver/__init__.py +++ b/nodepool/driver/__init__.py @@ -20,6 +20,7 @@ import abc import six from nodepool import zk +from nodepool import exceptions @six.add_metaclass(abc.ABCMeta) @@ -179,7 +180,15 @@ class NodeRequestHandler(object): node.allocated_to = None self.zk.storeNode(node) self.unlockNodeSet() - self.zk.unlockNodeRequest(self.request) + try: + self.zk.unlockNodeRequest(self.request) + except exceptions.ZKLockException: + # If the lock object is invalid that is "ok" since we no + # longer have a request either. Just do our best, log and + # move on. + self.log.debug("Request lock invalid for node request %s " + "when attempting to clean up the lock", + self.request.id) return True if self.launch_manager.failed_nodes: diff --git a/nodepool/launcher.py b/nodepool/launcher.py index ba392dca5..bfdb799fa 100755 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -217,10 +217,19 @@ class PoolWorker(threading.Thread): ''' active_handlers = [] for r in self.request_handlers: - if not r.poll(): + try: + if not r.poll(): + active_handlers.append(r) + else: + self.log.debug("Removing handler for request %s", + r.request.id) + except Exception: + # If we fail to poll a request handler log it but move on + # and process the other handlers. We keep this handler around + # and will try again later. + self.log.exception("Error polling request handler for " + "request %s", r.request.id) active_handlers.append(r) - else: - self.log.debug("Removing handler for request %s", r.request.id) self.request_handlers = active_handlers active_reqs = [r.request.id for r in self.request_handlers] self.log.debug("Active requests: %s", active_reqs) @@ -424,8 +433,8 @@ class CleanupWorker(BaseCleanupWorker): Because the node request locks are not direct children of the request znode, we need to remove the locks separately after the request has been processed. Only remove them after LOCK_CLEANUP seconds have - passed. This helps prevent the scenario where a request could go - away _while_ a lock is currently held for processing and the cleanup + passed. This helps reduce chances of the scenario where a request could + go away _while_ a lock is currently held for processing and the cleanup thread attempts to delete it. The delay should reduce the chance that we delete a currently held lock. '''