Improve exception handling around lost requests
An unexpected exception during resets of lost requests was leaving the request in a locked state, and thus it would never get processed again. Improve handling around that. Also, raise a ZKLockException if the thing we've been asked to lock has disappeared for some reason (this was the unhandled exception that caused the lost request handling to fail). Change-Id: Ie3e91714edc482b7b4fb99d7992cae999b1b7026
This commit is contained in:
parent
c739eec853
commit
8dc91bb752
@ -395,7 +395,12 @@ class CleanupWorker(BaseCleanupWorker):
|
||||
except exceptions.ZKLockException:
|
||||
continue
|
||||
|
||||
self._resetLostRequest(zk_conn, req)
|
||||
try:
|
||||
self._resetLostRequest(zk_conn, req)
|
||||
except Exception:
|
||||
self.log.exception("Error resetting lost request %s:",
|
||||
req.id)
|
||||
|
||||
zk_conn.unlockNodeRequest(req)
|
||||
|
||||
def _cleanupNodeRequestLocks(self):
|
||||
|
@ -625,6 +625,9 @@ class ZooKeeper(object):
|
||||
except kze.LockTimeout:
|
||||
raise npe.TimeoutException(
|
||||
"Timeout trying to acquire lock %s" % lock_path)
|
||||
except kze.NoNodeError:
|
||||
have_lock = False
|
||||
self.log.error("Image build not found for locking: %s", image)
|
||||
|
||||
# If we aren't blocking, it's possible we didn't get the lock
|
||||
# because someone else has it.
|
||||
@ -642,6 +645,10 @@ class ZooKeeper(object):
|
||||
except kze.LockTimeout:
|
||||
raise npe.TimeoutException(
|
||||
"Timeout trying to acquire lock %s" % lock_path)
|
||||
except kze.NoNodeError:
|
||||
have_lock = False
|
||||
self.log.error("Image build number not found for locking: %s, %s",
|
||||
build_number, image)
|
||||
|
||||
# If we aren't blocking, it's possible we didn't get the lock
|
||||
# because someone else has it.
|
||||
@ -659,6 +666,10 @@ class ZooKeeper(object):
|
||||
except kze.LockTimeout:
|
||||
raise npe.TimeoutException(
|
||||
"Timeout trying to acquire lock %s" % lock_path)
|
||||
except kze.NoNodeError:
|
||||
have_lock = False
|
||||
self.log.error("Image upload not found for locking: %s, %s, %s",
|
||||
build_number, provider, image)
|
||||
|
||||
# If we aren't blocking, it's possible we didn't get the lock
|
||||
# because someone else has it.
|
||||
@ -1436,6 +1447,9 @@ class ZooKeeper(object):
|
||||
except kze.LockTimeout:
|
||||
raise npe.TimeoutException(
|
||||
"Timeout trying to acquire lock %s" % path)
|
||||
except kze.NoNodeError:
|
||||
have_lock = False
|
||||
self.log.error("Request not found for locking: %s", request)
|
||||
|
||||
# If we aren't blocking, it's possible we didn't get the lock
|
||||
# because someone else has it.
|
||||
@ -1483,6 +1497,9 @@ class ZooKeeper(object):
|
||||
except kze.LockTimeout:
|
||||
raise npe.TimeoutException(
|
||||
"Timeout trying to acquire lock %s" % path)
|
||||
except kze.NoNodeError:
|
||||
have_lock = False
|
||||
self.log.error("Node not found for locking: %s", node)
|
||||
|
||||
# If we aren't blocking, it's possible we didn't get the lock
|
||||
# because someone else has it.
|
||||
|
Loading…
Reference in New Issue
Block a user