Reset the client object after proxy timeouts
In situations where the connection between nodepool and the cloud it manages may be through an evil proxy, the fact that the requests library in python-novaclient keeps port connections open means that the evil proxy might timeout the connection during the period of time that the image is being built. If the Task object encounters a ProxyError it needs to be propagated up in the calling thread to the ProviderManager rather than caught and handled in TaskManager in the main thread. Once caught in the ProviderManager, create a new client object and retry once. If a second ProxyError is raised, then it will be passed to the main thread the same as any other Exception. Change-Id: I3d69de48be1993c04356d209f622b0a17e6e8d03
This commit is contained in:
parent
ddd600368a
commit
8b87890b97
@ -16,6 +16,7 @@
|
||||
|
||||
import StringIO
|
||||
import novaclient
|
||||
import requests.exceptions
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
@ -118,6 +119,12 @@ class FakeHTTPClient(object):
|
||||
return None, dict(extensions=dict())
|
||||
|
||||
|
||||
class BadHTTPClient(object):
|
||||
'''Always raises a ProxyError'''
|
||||
def get(self, path):
|
||||
raise requests.exceptions.ProxyError
|
||||
|
||||
|
||||
class FakeClient(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.flavors = FakeList([
|
||||
@ -130,6 +137,12 @@ class FakeClient(object):
|
||||
self.servers.api = self
|
||||
|
||||
|
||||
class BadClient(FakeClient):
|
||||
def __init__(self):
|
||||
super(BadClient, self).__init__()
|
||||
self.client = BadHTTPClient()
|
||||
|
||||
|
||||
class FakeGlanceClient(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.id = 'fake-glance-id'
|
||||
@ -226,3 +239,4 @@ class FakeJenkins(object):
|
||||
|
||||
|
||||
FAKE_CLIENT = FakeClient()
|
||||
BAD_CLIENT = BadClient()
|
||||
|
@ -27,6 +27,8 @@ import glanceclient
|
||||
import glanceclient.client
|
||||
import keystoneclient.v2_0.client as ksclient
|
||||
import time
|
||||
import requests.exceptions
|
||||
import sys
|
||||
|
||||
from nodeutils import iterate_timeout
|
||||
from task_manager import Task, TaskManager, ManagerStoppedException
|
||||
@ -251,7 +253,7 @@ class ProviderManager(TaskManager):
|
||||
super(ProviderManager, self).__init__(None, provider.name,
|
||||
provider.rate)
|
||||
self.provider = provider
|
||||
self._client = self._getClient()
|
||||
self.resetClient()
|
||||
self._images = {}
|
||||
self._networks = {}
|
||||
self._cloud_metadata_read = False
|
||||
@ -297,6 +299,24 @@ class ProviderManager(TaskManager):
|
||||
kwargs['timeout'] = self.provider.api_timeout
|
||||
return novaclient.client.Client(*args, **kwargs)
|
||||
|
||||
def runTask(self, task):
|
||||
try:
|
||||
task.run(self._client)
|
||||
except requests.exceptions.ProxyError:
|
||||
# Try to get a new client object if we get a ProxyError
|
||||
self.log.exception('Resetting client due to ProxyError')
|
||||
self.resetClient()
|
||||
try:
|
||||
task.run(self._client)
|
||||
except requests.exceptions.ProxyError as e:
|
||||
# If we get a second ProxyError, then make sure it gets raised
|
||||
# the same way all other Exceptions from the Task object do.
|
||||
# This will move the Exception to the main thread.
|
||||
task.exception(e, sys.exc_info()[2])
|
||||
|
||||
def resetClient(self):
|
||||
self._client = self._getClient()
|
||||
|
||||
def _getFlavors(self):
|
||||
flavors = self.listFlavors()
|
||||
flavors.sort(lambda a, b: cmp(a['ram'], b['ram']))
|
||||
|
@ -22,6 +22,7 @@ from six.moves import queue as Queue
|
||||
import logging
|
||||
import time
|
||||
from statsd import statsd
|
||||
import requests.exceptions
|
||||
|
||||
|
||||
class ManagerStoppedException(Exception):
|
||||
@ -54,6 +55,8 @@ class Task(object):
|
||||
def run(self, client):
|
||||
try:
|
||||
self.done(self.main(client))
|
||||
except requests.exceptions.ProxyError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
self.exception(e, sys.exc_info()[2])
|
||||
|
||||
@ -90,7 +93,7 @@ class TaskManager(threading.Thread):
|
||||
self.log.debug("Manager %s running task %s (queue: %s)" %
|
||||
(self.name, task, self.queue.qsize()))
|
||||
start = time.time()
|
||||
task.run(self._client)
|
||||
self.runTask(task)
|
||||
last_ts = time.time()
|
||||
dt = last_ts - start
|
||||
self.log.debug("Manager %s ran task %s in %ss" %
|
||||
@ -110,3 +113,6 @@ class TaskManager(threading.Thread):
|
||||
"Manager %s is no longer running" % self.name)
|
||||
self.queue.put(task)
|
||||
return task.wait()
|
||||
|
||||
def runTask(self, task):
|
||||
task.run(self._client)
|
||||
|
@ -17,8 +17,12 @@ import fixtures
|
||||
|
||||
from nodepool import tests
|
||||
from nodepool import nodedb
|
||||
import nodepool.fakeprovider
|
||||
import nodepool.nodepool
|
||||
|
||||
import requests.exceptions
|
||||
from testtools import ExpectedException
|
||||
|
||||
|
||||
class TestNodepool(tests.DBTestCase):
|
||||
def test_db(self):
|
||||
@ -243,3 +247,32 @@ class TestNodepool(tests.DBTestCase):
|
||||
# Make sure our old node is in delete state
|
||||
self.assertEqual(len(deleted_nodes), 1)
|
||||
self.assertEqual(node_id, deleted_nodes[0].id)
|
||||
|
||||
def test_proxy_timeout(self):
|
||||
"""Test that we re-run a task after a ProxyError"""
|
||||
configfile = self.setup_config('node.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
self.waitForNodes(pool)
|
||||
|
||||
provider = pool.config.providers['fake-provider']
|
||||
manager = pool.getProviderManager(provider)
|
||||
|
||||
# In order to test recovering from a ProxyError from the client
|
||||
# we are going manually set the client object to be a bad client that
|
||||
# always raises a ProxyError. If our client reset works correctly
|
||||
# then we will create a new client object, which in this case would
|
||||
# be a new fake client in place of the bad client.
|
||||
manager._client = nodepool.fakeprovider.BAD_CLIENT
|
||||
|
||||
# The only implemented function for the fake and bad clients
|
||||
# If we don't raise an uncaught exception, we pass
|
||||
manager.listExtensions()
|
||||
|
||||
# Now let's do it again, but let's prevent the client object from being
|
||||
# replaced and then assert that we raised the exception that we expect.
|
||||
manager._client = nodepool.fakeprovider.BAD_CLIENT
|
||||
manager._getClient = lambda: nodepool.fakeprovider.BAD_CLIENT
|
||||
|
||||
with ExpectedException(requests.exceptions.ProxyError):
|
||||
manager.listExtensions()
|
||||
|
Loading…
x
Reference in New Issue
Block a user