Improve exception handling around lost requests
An unexpected exception during resets of lost requests was leaving the request in a locked state, and thus it would never get processed again. Improve handling around that. Also, raise a ZKLockException if the thing we've been asked to lock has disappeared for some reason (this was the unhandled exception that caused the lost request handling to fail). Change-Id: Ie3e91714edc482b7b4fb99d7992cae999b1b7026
This commit is contained in:
parent
c739eec853
commit
8dc91bb752
@ -395,7 +395,12 @@ class CleanupWorker(BaseCleanupWorker):
|
|||||||
except exceptions.ZKLockException:
|
except exceptions.ZKLockException:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._resetLostRequest(zk_conn, req)
|
try:
|
||||||
|
self._resetLostRequest(zk_conn, req)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Error resetting lost request %s:",
|
||||||
|
req.id)
|
||||||
|
|
||||||
zk_conn.unlockNodeRequest(req)
|
zk_conn.unlockNodeRequest(req)
|
||||||
|
|
||||||
def _cleanupNodeRequestLocks(self):
|
def _cleanupNodeRequestLocks(self):
|
||||||
|
@ -625,6 +625,9 @@ class ZooKeeper(object):
|
|||||||
except kze.LockTimeout:
|
except kze.LockTimeout:
|
||||||
raise npe.TimeoutException(
|
raise npe.TimeoutException(
|
||||||
"Timeout trying to acquire lock %s" % lock_path)
|
"Timeout trying to acquire lock %s" % lock_path)
|
||||||
|
except kze.NoNodeError:
|
||||||
|
have_lock = False
|
||||||
|
self.log.error("Image build not found for locking: %s", image)
|
||||||
|
|
||||||
# If we aren't blocking, it's possible we didn't get the lock
|
# If we aren't blocking, it's possible we didn't get the lock
|
||||||
# because someone else has it.
|
# because someone else has it.
|
||||||
@ -642,6 +645,10 @@ class ZooKeeper(object):
|
|||||||
except kze.LockTimeout:
|
except kze.LockTimeout:
|
||||||
raise npe.TimeoutException(
|
raise npe.TimeoutException(
|
||||||
"Timeout trying to acquire lock %s" % lock_path)
|
"Timeout trying to acquire lock %s" % lock_path)
|
||||||
|
except kze.NoNodeError:
|
||||||
|
have_lock = False
|
||||||
|
self.log.error("Image build number not found for locking: %s, %s",
|
||||||
|
build_number, image)
|
||||||
|
|
||||||
# If we aren't blocking, it's possible we didn't get the lock
|
# If we aren't blocking, it's possible we didn't get the lock
|
||||||
# because someone else has it.
|
# because someone else has it.
|
||||||
@ -659,6 +666,10 @@ class ZooKeeper(object):
|
|||||||
except kze.LockTimeout:
|
except kze.LockTimeout:
|
||||||
raise npe.TimeoutException(
|
raise npe.TimeoutException(
|
||||||
"Timeout trying to acquire lock %s" % lock_path)
|
"Timeout trying to acquire lock %s" % lock_path)
|
||||||
|
except kze.NoNodeError:
|
||||||
|
have_lock = False
|
||||||
|
self.log.error("Image upload not found for locking: %s, %s, %s",
|
||||||
|
build_number, provider, image)
|
||||||
|
|
||||||
# If we aren't blocking, it's possible we didn't get the lock
|
# If we aren't blocking, it's possible we didn't get the lock
|
||||||
# because someone else has it.
|
# because someone else has it.
|
||||||
@ -1436,6 +1447,9 @@ class ZooKeeper(object):
|
|||||||
except kze.LockTimeout:
|
except kze.LockTimeout:
|
||||||
raise npe.TimeoutException(
|
raise npe.TimeoutException(
|
||||||
"Timeout trying to acquire lock %s" % path)
|
"Timeout trying to acquire lock %s" % path)
|
||||||
|
except kze.NoNodeError:
|
||||||
|
have_lock = False
|
||||||
|
self.log.error("Request not found for locking: %s", request)
|
||||||
|
|
||||||
# If we aren't blocking, it's possible we didn't get the lock
|
# If we aren't blocking, it's possible we didn't get the lock
|
||||||
# because someone else has it.
|
# because someone else has it.
|
||||||
@ -1483,6 +1497,9 @@ class ZooKeeper(object):
|
|||||||
except kze.LockTimeout:
|
except kze.LockTimeout:
|
||||||
raise npe.TimeoutException(
|
raise npe.TimeoutException(
|
||||||
"Timeout trying to acquire lock %s" % path)
|
"Timeout trying to acquire lock %s" % path)
|
||||||
|
except kze.NoNodeError:
|
||||||
|
have_lock = False
|
||||||
|
self.log.error("Node not found for locking: %s", node)
|
||||||
|
|
||||||
# If we aren't blocking, it's possible we didn't get the lock
|
# If we aren't blocking, it's possible we didn't get the lock
|
||||||
# because someone else has it.
|
# because someone else has it.
|
||||||
|
Loading…
Reference in New Issue
Block a user