From 66b6c27dbfaa10be356c8da204de2575ab41ab15 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Tue, 9 Mar 2021 13:58:01 -0800 Subject: [PATCH] Uniquely identify launchers Previously launchers were identify as: hostname-pid-provider-pool The problem with this setup is that using containers with host networking and namespaced pids you can end up with multiple launchers using the same launcher id for the same provider pool. This problem also arises if you are replacing launcher1.example.com with launcher1.otherexample.com. To fix this we update the launcher ids to be: fqdn-provider-pool-randomuuid These ids are already not expected to survive beyond the lifetime of any single process as they use a pid value. This means we can replace this pid value with a randomly generated uuid value instead. We also use the host fqdn instead of hostname to be less ambiguous in the case where two different hosts with different fqdns share a hostname. Change-Id: I419718e63b31b12d8dfe971031cd8a81ad582480 --- nodepool/launcher.py | 7 ++++--- nodepool/tests/unit/test_zk.py | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 84ba2cfe2..9147ad606 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -21,6 +21,7 @@ import os.path import socket import threading import time +import uuid from kazoo import exceptions as kze @@ -137,9 +138,9 @@ class PoolWorker(threading.Thread, stats.StatsReporter): self.request_handlers = [] self.watermark_sleep = nodepool.watermark_sleep self.zk = self.getZK() - self.launcher_id = "%s-%s-%s" % (socket.gethostname(), - os.getpid(), - self.name) + self.launcher_id = "%s-%s-%s" % (socket.getfqdn(), + self.name, + uuid.uuid4().hex) stats.StatsReporter.__init__(self) # --------------------------------------------------------------- diff --git a/nodepool/tests/unit/test_zk.py b/nodepool/tests/unit/test_zk.py index 39930cd5c..f6756f2fd 100644 --- a/nodepool/tests/unit/test_zk.py +++ b/nodepool/tests/unit/test_zk.py @@ -13,6 +13,7 @@ import testtools import time +import uuid from nodepool import exceptions as npe from nodepool import tests @@ -564,7 +565,7 @@ class TestZooKeeper(tests.DBTestCase): def test_registerLauncher(self): launcher = zk.Launcher() - launcher.id = "launcher-000-001" + launcher.id = "launcher-Poolworker.provider-main-" + uuid.uuid4().hex self.zk.registerLauncher(launcher) launchers = self.zk.getRegisteredLaunchers() self.assertEqual(1, len(launchers)) @@ -572,7 +573,7 @@ class TestZooKeeper(tests.DBTestCase): def test_registerLauncher_safe_repeat(self): launcher = zk.Launcher() - launcher.id = "launcher-000-001" + launcher.id = "launcher-Poolworker.provider-main-" + uuid.uuid4().hex self.zk.registerLauncher(launcher) self.zk.registerLauncher(launcher) launchers = self.zk.getRegisteredLaunchers() @@ -1014,7 +1015,7 @@ class TestZKModel(tests.BaseTestCase): o.public_ipv6 = '' o.host_id = 'fake-host-id' o.image_id = 'image-id' - o.launcher = 'launcher-id' + o.launcher = 'launcher-Poolworker.provider-main-' + uuid.uuid4().hex o.external_id = 'ABCD' o.hostname = 'xyz' o.comment = 'comment' @@ -1048,6 +1049,7 @@ class TestZKModel(tests.BaseTestCase): def test_Node_fromDict(self): now = int(time.time()) node_id = '123' + launcher_id = 'launcher-Poolworker.provider-main-' + uuid.uuid4().hex d = { 'state': zk.READY, 'state_time': now, @@ -1062,7 +1064,7 @@ class TestZKModel(tests.BaseTestCase): 'public_ipv6': '', 'host_id': 'fake-host-id', 'image_id': 'image-id', - 'launcher': 'launcher-id', + 'launcher': launcher_id, 'external_id': 'ABCD', 'hostname': 'xyz', 'comment': 'comment',