diff --git a/nodepool/driver/openstack/provider.py b/nodepool/driver/openstack/provider.py index a78255b5f..b9a1462d8 100755 --- a/nodepool/driver/openstack/provider.py +++ b/nodepool/driver/openstack/provider.py @@ -151,26 +151,30 @@ class OpenStackProvider(Provider): for node in self._zk.nodeIterator(): if node.provider == self.provider.name: - if pool and not node.pool == pool.name: - continue - provider_pool = self.provider.pools.get(node.pool) - if not provider_pool: - self.log.warning( - "Cannot find provider pool for node %s" % node) - # This node is in a funny state we log it for debugging - # but move on and don't account it as we can't properly - # calculate its cost without pool info. - continue - if node.type[0] not in provider_pool.labels: - self.log.warning( - "Node type is not in provider pool for node %s" % node) - # This node is also in a funny state; the config - # may have changed under it. It should settle out - # eventually when it's deleted. - continue - node_resources = self.quotaNeededByNodeType( - node.type[0], provider_pool) - used_quota.add(node_resources) + try: + if pool and not node.pool == pool.name: + continue + provider_pool = self.provider.pools.get(node.pool) + if not provider_pool: + self.log.warning( + "Cannot find provider pool for node %s" % node) + # This node is in a funny state we log it for debugging + # but move on and don't account it as we can't properly + # calculate its cost without pool info. + continue + if node.type[0] not in provider_pool.labels: + self.log.warning("Node type is not in provider pool " + "for node %s" % node) + # This node is also in a funny state; the config + # may have changed under it. It should settle out + # eventually when it's deleted. + continue + node_resources = self.quotaNeededByNodeType( + node.type[0], provider_pool) + used_quota.add(node_resources) + except Exception: + self.log.exception("Couldn't consider invalid node %s " + "for quota:" % node) return used_quota def unmanagedQuotaUsed(self): diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index ca684a3b2..46f62d6ba 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py @@ -729,6 +729,39 @@ class TestLauncher(tests.DBTestCase): # retries in config is set to 2, so 2 attempts to create a server self.assertEqual(0, manager.createServer_fails) + def test_node_launch_with_broken_znodes(self): + """Test that node launch still works if there are broken znodes""" + # Create a znode without type + znode = zk.Node() + znode.provider = 'fake-provider' + znode.pool = 'main' + znode.external_id = 'fakeid' + znode.state = zk.READY + + # Create znode without pool + self.zk.storeNode(znode) + znode = zk.Node() + znode.provider = 'fake-provider' + znode.type = ['fake-label'] + znode.external_id = 'fakeid' + znode.state = zk.READY + self.zk.storeNode(znode) + + configfile = self.setup_config('node_launch_retry.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.useBuilder(configfile) + pool.start() + self.wait_for_config(pool) + self.waitForImage('fake-provider', 'fake-image') + + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('fake-label') + self.zk.storeNodeRequest(req) + + req = self.waitForNodeRequest(req) + self.assertEqual(req.state, zk.FULFILLED) + def test_node_launch_retries_with_external_id(self): configfile = self.setup_config('node_launch_retry.yaml') pool = self.useNodepool(configfile, watermark_sleep=1)