From 8b87890b978a54be10eff272f7bb6d9955e2d999 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Wed, 19 Feb 2014 22:41:33 -0800 Subject: [PATCH] Reset the client object after proxy timeouts In situations where the connection between nodepool and the cloud it manages may be through an evil proxy, the fact that the requests library in python-novaclient keeps port connections open means that the evil proxy might timeout the connection during the period of time that the image is being built. If the Task object encounters a ProxyError it needs to be propagated up in the calling thread to the ProviderManager rather than caught and handled in TaskManager in the main thread. Once caught in the ProviderManager, create a new client object and retry once. If a second ProxyError is raised, then it will be passed to the main thread the same as any other Exception. Change-Id: I3d69de48be1993c04356d209f622b0a17e6e8d03 --- nodepool/fakeprovider.py | 14 ++++++++++++++ nodepool/provider_manager.py | 22 +++++++++++++++++++++- nodepool/task_manager.py | 8 +++++++- nodepool/tests/test_nodepool.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 2 deletions(-) diff --git a/nodepool/fakeprovider.py b/nodepool/fakeprovider.py index 9a59c49cc..bd83cc1df 100644 --- a/nodepool/fakeprovider.py +++ b/nodepool/fakeprovider.py @@ -16,6 +16,7 @@ import StringIO import novaclient +import requests.exceptions import threading import time import uuid @@ -118,6 +119,12 @@ class FakeHTTPClient(object): return None, dict(extensions=dict()) +class BadHTTPClient(object): + '''Always raises a ProxyError''' + def get(self, path): + raise requests.exceptions.ProxyError + + class FakeClient(object): def __init__(self, *args, **kwargs): self.flavors = FakeList([ @@ -130,6 +137,12 @@ class FakeClient(object): self.servers.api = self +class BadClient(FakeClient): + def __init__(self): + super(BadClient, self).__init__() + self.client = BadHTTPClient() + + class FakeGlanceClient(object): def __init__(self, *args, **kwargs): self.id = 'fake-glance-id' @@ -226,3 +239,4 @@ class FakeJenkins(object): FAKE_CLIENT = FakeClient() +BAD_CLIENT = BadClient() diff --git a/nodepool/provider_manager.py b/nodepool/provider_manager.py index a47cd24ee..ed582dfe0 100644 --- a/nodepool/provider_manager.py +++ b/nodepool/provider_manager.py @@ -27,6 +27,8 @@ import glanceclient import glanceclient.client import keystoneclient.v2_0.client as ksclient import time +import requests.exceptions +import sys from nodeutils import iterate_timeout from task_manager import Task, TaskManager, ManagerStoppedException @@ -251,7 +253,7 @@ class ProviderManager(TaskManager): super(ProviderManager, self).__init__(None, provider.name, provider.rate) self.provider = provider - self._client = self._getClient() + self.resetClient() self._images = {} self._networks = {} self._cloud_metadata_read = False @@ -297,6 +299,24 @@ class ProviderManager(TaskManager): kwargs['timeout'] = self.provider.api_timeout return novaclient.client.Client(*args, **kwargs) + def runTask(self, task): + try: + task.run(self._client) + except requests.exceptions.ProxyError: + # Try to get a new client object if we get a ProxyError + self.log.exception('Resetting client due to ProxyError') + self.resetClient() + try: + task.run(self._client) + except requests.exceptions.ProxyError as e: + # If we get a second ProxyError, then make sure it gets raised + # the same way all other Exceptions from the Task object do. + # This will move the Exception to the main thread. + task.exception(e, sys.exc_info()[2]) + + def resetClient(self): + self._client = self._getClient() + def _getFlavors(self): flavors = self.listFlavors() flavors.sort(lambda a, b: cmp(a['ram'], b['ram'])) diff --git a/nodepool/task_manager.py b/nodepool/task_manager.py index 1221ed8d3..e48de1a9f 100644 --- a/nodepool/task_manager.py +++ b/nodepool/task_manager.py @@ -22,6 +22,7 @@ from six.moves import queue as Queue import logging import time from statsd import statsd +import requests.exceptions class ManagerStoppedException(Exception): @@ -54,6 +55,8 @@ class Task(object): def run(self, client): try: self.done(self.main(client)) + except requests.exceptions.ProxyError as e: + raise e except Exception as e: self.exception(e, sys.exc_info()[2]) @@ -90,7 +93,7 @@ class TaskManager(threading.Thread): self.log.debug("Manager %s running task %s (queue: %s)" % (self.name, task, self.queue.qsize())) start = time.time() - task.run(self._client) + self.runTask(task) last_ts = time.time() dt = last_ts - start self.log.debug("Manager %s ran task %s in %ss" % @@ -110,3 +113,6 @@ class TaskManager(threading.Thread): "Manager %s is no longer running" % self.name) self.queue.put(task) return task.wait() + + def runTask(self, task): + task.run(self._client) diff --git a/nodepool/tests/test_nodepool.py b/nodepool/tests/test_nodepool.py index e79e3f97b..35ee06f48 100644 --- a/nodepool/tests/test_nodepool.py +++ b/nodepool/tests/test_nodepool.py @@ -17,8 +17,12 @@ import fixtures from nodepool import tests from nodepool import nodedb +import nodepool.fakeprovider import nodepool.nodepool +import requests.exceptions +from testtools import ExpectedException + class TestNodepool(tests.DBTestCase): def test_db(self): @@ -243,3 +247,32 @@ class TestNodepool(tests.DBTestCase): # Make sure our old node is in delete state self.assertEqual(len(deleted_nodes), 1) self.assertEqual(node_id, deleted_nodes[0].id) + + def test_proxy_timeout(self): + """Test that we re-run a task after a ProxyError""" + configfile = self.setup_config('node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.waitForNodes(pool) + + provider = pool.config.providers['fake-provider'] + manager = pool.getProviderManager(provider) + + # In order to test recovering from a ProxyError from the client + # we are going manually set the client object to be a bad client that + # always raises a ProxyError. If our client reset works correctly + # then we will create a new client object, which in this case would + # be a new fake client in place of the bad client. + manager._client = nodepool.fakeprovider.BAD_CLIENT + + # The only implemented function for the fake and bad clients + # If we don't raise an uncaught exception, we pass + manager.listExtensions() + + # Now let's do it again, but let's prevent the client object from being + # replaced and then assert that we raised the exception that we expect. + manager._client = nodepool.fakeprovider.BAD_CLIENT + manager._getClient = lambda: nodepool.fakeprovider.BAD_CLIENT + + with ExpectedException(requests.exceptions.ProxyError): + manager.listExtensions()