Revert "Cleanup down ports"

The port filter for DOWN port seems to have no effect. It actually
deleted *all* ports in the tenant.

This reverts commit cdd60504ec.

Change-Id: I48c1430bb768903af467cace1a720e45ecc8e98f
This commit is contained in:
Tobias Henkel 2018-10-30 12:58:40 +01:00
parent 5c87fdb046
commit 7e1b8a7261
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
5 changed files with 1 additions and 133 deletions

View File

@ -329,12 +329,6 @@ Nodepool launcher
* ready
* used
.. zuul:stat:: nodepool.provider.<provider>.downPorts
:type: counter
Number of ports in the DOWN state that have been removed automatically
in the cleanup resources phase of the OpenStack driver.
.. zuul:stat:: nodepool.provider.<provider>.nodes.<state>
:type: gauge

View File

@ -31,7 +31,6 @@ class Dummy(object):
INSTANCE = 'Instance'
FLAVOR = 'Flavor'
LOCATION = 'Server.Location'
PORT = 'Port'
def __init__(self, kind, **kw):
self.__kind = kind
@ -105,12 +104,6 @@ class FakeOpenStackCloud(object):
self._server_list = []
self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\
_get_quota()
self._down_ports = [
Dummy(Dummy.PORT, id='1a', status='DOWN',
device_owner="compute:nova"),
Dummy(Dummy.PORT, id='2b', status='DOWN',
device_owner=None),
]
def _get(self, name_or_id, instance_list):
self.log.debug("Get %s in %s" % (name_or_id, repr(instance_list)))
@ -288,20 +281,6 @@ class FakeOpenStackCloud(object):
total_ram_used=8192 * len(self._server_list)
)
def list_ports(self, filters=None):
if filters and filters.get('status') == 'DOWN':
return self._down_ports
return []
def delete_port(self, port_id):
tmp_ports = []
for port in self._down_ports:
if port.id != port_id:
tmp_ports.append(port)
else:
self.log.debug("Deleted port ID: %s", port_id)
self._down_ports = tmp_ports
class FakeUploadFailCloud(FakeOpenStackCloud):
log = logging.getLogger("nodepool.FakeUploadFailCloud")

View File

@ -26,7 +26,6 @@ from nodepool.driver import Provider
from nodepool.driver.utils import QuotaInformation
from nodepool.nodeutils import iterate_timeout
from nodepool.task_manager import TaskManager
from nodepool import stats
from nodepool import version
from nodepool import zk
@ -51,10 +50,6 @@ class OpenStackProvider(Provider):
self._taskmanager = None
self._current_nodepool_quota = None
self._zk = None
self._down_ports = set()
self._last_port_cleanup = None
self._port_cleanup_interval_secs = 180
self._statsd = stats.get_client()
def start(self, zk_conn):
if self._use_taskmanager:
@ -422,21 +417,6 @@ class OpenStackProvider(Provider):
**meta)
return image.id
def listPorts(self, status=None):
'''
List known ports.
:param str status: A valid port status. E.g., 'ACTIVE' or 'DOWN'.
'''
if status:
ports = self._client.list_ports(filters={'status': status})
else:
ports = self._client.list_ports()
return ports
def deletePort(self, port_id):
self._client.delete_port(port_id)
def listImages(self):
return self._client.list_images()
@ -464,7 +444,7 @@ class OpenStackProvider(Provider):
self.log.debug('Deleting server %s' % server_id)
self.deleteServer(server_id)
def cleanupLeakedInstances(self):
def cleanupLeakedResources(self):
'''
Delete any leaked server instances.
@ -512,63 +492,6 @@ class OpenStackProvider(Provider):
node.state = zk.DELETING
self._zk.storeNode(node)
def filterComputePorts(self, ports):
'''
Return a list of compute ports (or no device owner).
We are not interested in ports for routers or DHCP.
'''
ret = []
for p in ports:
if p.device_owner is None or p.device_owner.startswith("compute:"):
ret.append(p)
return ret
def cleanupLeakedPorts(self):
if not self._last_port_cleanup:
self._last_port_cleanup = time.monotonic()
ports = self.listPorts(status='DOWN')
ports = self.filterComputePorts(ports)
self._down_ports = set([p.id for p in ports])
return
# Return if not enough time has passed between cleanup
last_check_in_secs = int(time.monotonic() - self._last_port_cleanup)
if last_check_in_secs <= self._port_cleanup_interval_secs:
return
ports = self.listPorts(status='DOWN')
ports = self.filterComputePorts(ports)
current_set = set([p.id for p in ports])
remove_set = current_set & self._down_ports
removed_count = 0
for port_id in remove_set:
try:
self.deletePort(port_id)
except Exception:
self.log.exception("Exception deleting port %s in %s:",
port_id, self.provider.name)
else:
removed_count += 1
self.log.debug("Removed DOWN port %s in %s",
port_id, self.provider.name)
if self._statsd and removed_count:
key = 'nodepool.provider.%s.downPorts' % (self.provider.name)
self._statsd.incr(key, removed_count)
self._last_port_cleanup = time.monotonic()
# Rely on OpenStack to tell us the down ports rather than doing our
# own set adjustment.
ports = self.listPorts(status='DOWN')
ports = self.filterComputePorts(ports)
self._down_ports = set([p.id for p in ports])
def cleanupLeakedResources(self):
self.cleanupLeakedInstances()
self.cleanupLeakedPorts()
if self.provider.clean_floating_ips:
self._client.delete_unattached_floating_ips()

View File

@ -1773,24 +1773,3 @@ class TestLauncher(tests.DBTestCase):
req3 = self.waitForNodeRequest(req3)
self.assertEqual(req3.state, zk.FAILED)
def test_leaked_port_cleanup(self):
configfile = self.setup_config('node.yaml')
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.cleanup_interval = 1
pool.start()
self.waitForNodes('fake-label')
manager = pool.getProviderManager('fake-provider')
down_ports = manager.listPorts(status='DOWN')
self.assertEqual(2, len(down_ports))
self.log.debug("Down ports: %s", down_ports)
# Change the port cleanup interval to happen quicker
manager._port_cleanup_interval_secs = 2
while manager.listPorts(status='DOWN'):
time.sleep(1)
self.assertReportedStat('nodepool.provider.fake-provider.downPorts',
value='2', kind='c')

View File

@ -1,7 +0,0 @@
---
features:
- |
Added a new routine to the OpenStack driver cleanup resources phase that
will remove any ports reported to be in the DOWN state. Ports will have
to be seen as DOWN for at least three minutes before they will be removed.
The number of ports removed will be reported to statsd.