diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 474ca86e9..def0b9c9a 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -162,6 +162,11 @@ Options The path to the PEM encoded CA certificate. +.. attr:: zookeeper-timeout + :type: float + :default: 10.0 + + The ZooKeeper session timeout, in seconds. .. attr:: labels :type: list diff --git a/nodepool/builder.py b/nodepool/builder.py index 261d0e884..d14008336 100644 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -1397,7 +1397,9 @@ class NodePoolBuilder(object): list(self._config.zookeeper_servers.values()), tls_cert=self._config.zookeeper_tls_cert, tls_key=self._config.zookeeper_tls_key, - tls_ca=self._config.zookeeper_tls_ca) + tls_ca=self._config.zookeeper_tls_ca, + timeout=self._config.zookeeper_timeout, + ) self.log.debug('Starting listener for build jobs') diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py index b49fc5321..c06aa7043 100644 --- a/nodepool/cmd/config_validator.py +++ b/nodepool/cmd/config_validator.py @@ -81,6 +81,7 @@ class ConfigValidator: 'chroot': str, }], 'zookeeper-tls': zk_tls, + 'zookeeper-timeout': float, 'providers': list, 'labels': [label], 'diskimages': [diskimage], diff --git a/nodepool/config.py b/nodepool/config.py index 6794a4b8d..a49ffe64c 100644 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -38,6 +38,7 @@ class Config(ConfigValue): self.providers = {} self.provider_managers = {} self.zookeeper_servers = {} + self.zookeeper_timeout = 10.0 self.zookeeper_tls_cert = None self.zookeeper_tls_key = None self.zookeeper_tls_ca = None @@ -56,6 +57,7 @@ class Config(ConfigValue): self.providers == other.providers and self.provider_managers == other.provider_managers and self.zookeeper_servers == other.zookeeper_servers and + self.zookeeper_timeout == other.zookeeper_timeout and self.elements_dir == other.elements_dir and self.images_dir == other.images_dir and self.build_log_dir == other.build_log_dir and @@ -109,6 +111,9 @@ class Config(ConfigValue): name = z.host + '_' + str(z.port) self.zookeeper_servers[name] = z + def setZooKeeperTimeout(self, timeout): + self.zookeeper_timeout = float(timeout) + def setDiskImages(self, diskimages_cfg): if not diskimages_cfg: return @@ -368,6 +373,7 @@ def loadConfig(config_path, env=os.environ): newconfig.setMaxHoldAge(config.get('max-hold-age')) newconfig.setWebApp(config.get('webapp')) newconfig.setZooKeeperServers(config.get('zookeeper-servers')) + newconfig.setZooKeeperTimeout(config.get('zookeeper-timeout', 10.0)) newconfig.setDiskImages(config.get('diskimages')) newconfig.setLabels(config.get('labels')) newconfig.setProviders(config.get('providers')) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index fd19f9e67..82b6789e3 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -946,7 +946,9 @@ class NodePool(threading.Thread): self.zk.connect(configured, tls_cert=config.zookeeper_tls_cert, tls_key=config.zookeeper_tls_key, - tls_ca=config.zookeeper_tls_ca) + tls_ca=config.zookeeper_tls_ca, + timeout=config.zookeeper_timeout, + ) else: self.log.debug("Detected ZooKeeper server changes") self.zk.resetHosts(configured) diff --git a/nodepool/tests/fixtures/node.yaml b/nodepool/tests/fixtures/node.yaml index 24bf3b998..4a379d762 100644 --- a/nodepool/tests/fixtures/node.yaml +++ b/nodepool/tests/fixtures/node.yaml @@ -13,6 +13,8 @@ zookeeper-tls: cert: {zookeeper_cert} key: {zookeeper_key} +zookeeper-timeout: 20.0 + labels: - name: fake-label min-ready: 1 diff --git a/nodepool/zk.py b/nodepool/zk.py index 1307941d0..b83b37167 100644 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -1012,7 +1012,7 @@ class ZooKeeper(object): self._became_lost = False def connect(self, host_list, read_only=False, tls_cert=None, - tls_key=None, tls_ca=None): + tls_key=None, tls_ca=None, timeout=10.0): ''' Establish a connection with ZooKeeper cluster. @@ -1030,8 +1030,11 @@ class ZooKeeper(object): ''' if self.client is None: hosts = buildZooKeeperHosts(host_list) - args = dict(hosts=hosts, - read_only=read_only) + args = dict( + hosts=hosts, + read_only=read_only, + timeout=timeout, + ) args['use_ssl'] = True if not (tls_key and tls_cert and tls_ca): diff --git a/releasenotes/notes/zk-timeout-bede88920e248c96.yaml b/releasenotes/notes/zk-timeout-bede88920e248c96.yaml new file mode 100644 index 000000000..cb8062802 --- /dev/null +++ b/releasenotes/notes/zk-timeout-bede88920e248c96.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + It is now possible to configure the :attr:`zookeeper-timeout`. which can + help to avoid zookeeper session losses on busy systems.