diff --git a/nodepool/launcher.py b/nodepool/launcher.py index bd1fce8e0..bb5234435 100755 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -395,7 +395,12 @@ class CleanupWorker(BaseCleanupWorker): except exceptions.ZKLockException: continue - self._resetLostRequest(zk_conn, req) + try: + self._resetLostRequest(zk_conn, req) + except Exception: + self.log.exception("Error resetting lost request %s:", + req.id) + zk_conn.unlockNodeRequest(req) def _cleanupNodeRequestLocks(self): diff --git a/nodepool/zk.py b/nodepool/zk.py index 78829582e..61d906d70 100755 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -625,6 +625,9 @@ class ZooKeeper(object): except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % lock_path) + except kze.NoNodeError: + have_lock = False + self.log.error("Image build not found for locking: %s", image) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. @@ -642,6 +645,10 @@ class ZooKeeper(object): except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % lock_path) + except kze.NoNodeError: + have_lock = False + self.log.error("Image build number not found for locking: %s, %s", + build_number, image) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. @@ -659,6 +666,10 @@ class ZooKeeper(object): except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % lock_path) + except kze.NoNodeError: + have_lock = False + self.log.error("Image upload not found for locking: %s, %s, %s", + build_number, provider, image) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. @@ -1436,6 +1447,9 @@ class ZooKeeper(object): except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % path) + except kze.NoNodeError: + have_lock = False + self.log.error("Request not found for locking: %s", request) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. @@ -1483,6 +1497,9 @@ class ZooKeeper(object): except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % path) + except kze.NoNodeError: + have_lock = False + self.log.error("Node not found for locking: %s", node) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it.