19f80b7d89
An indentation error caused the operator to only create the last provider listed in the nodepool config. Additionally, add the nodepool provider to the nodepool launcher deployment spec. This is needed to separate the pods for different nodepool launchers (otherwise, the deployments may consider pods for one launcher to belong to another). Change-Id: I76bbae948d1a633028b3565d512fb48d68111dd7
458 lines
17 KiB
Python
458 lines
17 KiB
Python
# Copyright 2021 Acme Gating, LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import kopf
|
|
import copy
|
|
import base64
|
|
import hashlib
|
|
|
|
import jinja2
|
|
import pykube
|
|
import yaml
|
|
|
|
from . import objects
|
|
from . import utils
|
|
from . import certmanager
|
|
from . import pxc
|
|
from . import zookeeper
|
|
|
|
|
|
class Zuul:
|
|
def __init__(self, namespace, name, logger, spec):
|
|
self.api = pykube.HTTPClient(pykube.KubeConfig.from_env())
|
|
self.namespace = namespace
|
|
self.name = name
|
|
self.log = logger
|
|
self.spec = copy.deepcopy(dict(spec))
|
|
self.zuul_conf_sha = None
|
|
|
|
db_secret = spec.get('database', {}).get('secretName')
|
|
if db_secret:
|
|
self.db_secret = db_secret
|
|
self.manage_db = False
|
|
else:
|
|
self.db_secret = 'zuul-db'
|
|
self.manage_db = True
|
|
|
|
self.nodepool_secret = spec.get('launcher', {}).get('config', {}).\
|
|
get('secretName')
|
|
|
|
zk_spec = self.spec.setdefault('zookeeper', {})
|
|
zk_str = spec.get('zookeeper', {}).get('hosts')
|
|
if zk_str:
|
|
self.manage_zk = False
|
|
else:
|
|
zk_str = f'zookeeper.{self.namespace}:2281'
|
|
zk_spec['hosts'] = zk_str
|
|
zk_spec['secretName'] = 'zookeeper-client-tls'
|
|
self.manage_zk = True
|
|
|
|
zk_spec['tls_ca'] = '/tls/client/ca.crt'
|
|
zk_spec['tls_cert'] = '/tls/client/tls.crt'
|
|
zk_spec['tls_key'] = '/tls/client/tls.key'
|
|
|
|
self.tenant_secret = spec.get('scheduler', {}).\
|
|
get('config', {}).get('secretName')
|
|
|
|
self.spec.setdefault('scheduler', {})['tenant_config'] = \
|
|
'/etc/zuul/tenant/main.yaml'
|
|
|
|
self.spec.setdefault('executor', {}).setdefault('count', 1)
|
|
self.spec.setdefault('executor', {}).setdefault(
|
|
'terminationGracePeriodSeconds', 21600)
|
|
self.spec.setdefault('merger', {}).setdefault('count', 0)
|
|
self.spec.setdefault('web', {}).setdefault('count', 1)
|
|
self.spec.setdefault('fingergw', {}).setdefault('count', 1)
|
|
self.spec.setdefault('preview', {}).setdefault('count', 0)
|
|
registry = self.spec.setdefault('registry', {})
|
|
registry.setdefault('count', 0)
|
|
registry.setdefault('volumeSize', '80Gi')
|
|
registry_tls = registry.setdefault('tls', {})
|
|
self.manage_registry_cert = ('secretName' not in registry_tls)
|
|
registry_tls.setdefault('secretName', 'zuul-registry-tls')
|
|
|
|
self.spec.setdefault('imagePrefix', 'docker.io/zuul')
|
|
self.spec.setdefault('zuulImageVersion', 'latest')
|
|
self.spec.setdefault('zuulPreviewImageVersion', 'latest')
|
|
self.spec.setdefault('zuulRegistryImageVersion', 'latest')
|
|
self.spec.setdefault('nodepoolImageVersion', 'latest')
|
|
|
|
default_env = {
|
|
'KUBECONFIG': '/etc/kubernetes/kube.config'
|
|
}
|
|
env = self.spec.setdefault('env', [])
|
|
for default_key, default_value in default_env.items():
|
|
# Don't allow the user to override our defaults
|
|
for item in env:
|
|
if item.get('name') == default_key:
|
|
env.remove(item)
|
|
break
|
|
# Set our defaults
|
|
env.append({'name': default_key,
|
|
'value': default_value})
|
|
|
|
self.cert_manager = certmanager.CertManager(
|
|
self.api, self.namespace, self.log)
|
|
self.installing_cert_manager = False
|
|
|
|
def install_cert_manager(self):
|
|
if self.cert_manager.is_installed():
|
|
return
|
|
self.installing_cert_manager = True
|
|
self.cert_manager.install()
|
|
|
|
def wait_for_cert_manager(self):
|
|
if not self.installing_cert_manager:
|
|
return
|
|
self.log.info("Waiting for Cert-Manager")
|
|
self.cert_manager.wait_for_webhook()
|
|
|
|
def create_cert_manager_ca(self):
|
|
self.cert_manager.create_ca()
|
|
|
|
def install_zk(self):
|
|
if not self.manage_zk:
|
|
self.log.info("ZK is externally managed")
|
|
return
|
|
self.zk = zookeeper.ZooKeeper(self.api, self.namespace, self.log)
|
|
self.zk.create()
|
|
|
|
def wait_for_zk(self):
|
|
if not self.manage_zk:
|
|
return
|
|
self.log.info("Waiting for ZK cluster")
|
|
self.zk.wait_for_cluster()
|
|
|
|
# A two-part process for PXC so that this can run while other
|
|
# installations are happening.
|
|
def install_db(self):
|
|
if not self.manage_db:
|
|
self.log.info("DB is externally managed")
|
|
return
|
|
|
|
small = self.spec.get('database', {}).get('allowUnsafeConfig', False)
|
|
|
|
self.log.info("DB is internally managed")
|
|
self.pxc = pxc.PXC(self.api, self.namespace, self.log)
|
|
if not self.pxc.is_installed():
|
|
self.log.info("Installing PXC operator")
|
|
self.pxc.create_operator()
|
|
|
|
self.log.info("Creating PXC cluster")
|
|
self.pxc.create_cluster(small)
|
|
|
|
def wait_for_db(self):
|
|
if not self.manage_db:
|
|
return
|
|
self.log.info("Waiting for PXC cluster")
|
|
self.pxc.wait_for_cluster()
|
|
|
|
dburi = self.get_db_uri()
|
|
if not dburi:
|
|
self.log.info("Creating database")
|
|
self.pxc.create_database()
|
|
|
|
def get_db_uri(self):
|
|
try:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=self.db_secret)
|
|
uri = base64.b64decode(obj.obj['data']['dburi']).decode('utf8')
|
|
return uri
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
return None
|
|
|
|
def get_keystore_password(self):
|
|
secret_name = 'zuul-keystore'
|
|
secret_key = 'password'
|
|
try:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=secret_name)
|
|
pw = base64.b64decode(obj.obj['data'][secret_key]).decode('utf8')
|
|
return pw
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
pw = utils.generate_password(512)
|
|
utils.update_secret(self.api, self.namespace, secret_name,
|
|
string_data={secret_key: pw})
|
|
return pw
|
|
|
|
def write_zuul_conf(self):
|
|
dburi = self.get_db_uri()
|
|
self.spec.setdefault('database', {})['dburi'] = dburi
|
|
|
|
for volume in self.spec.get('jobVolumes', []):
|
|
key = f"{volume['context']}_{volume['access']}_paths"
|
|
paths = self.spec['executor'].get(key, '')
|
|
if paths:
|
|
paths += ':'
|
|
paths += volume['path']
|
|
self.spec['executor'][key] = paths
|
|
|
|
connections = self.spec['connections']
|
|
|
|
# Copy in any information from connection secrets
|
|
for connection_name, connection in connections.items():
|
|
if 'secretName' in connection:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=connection['secretName'])
|
|
for k, v in obj.obj['data'].items():
|
|
if k == 'sshkey':
|
|
v = f'/etc/zuul/connections/{connection_name}/sshkey'
|
|
else:
|
|
v = base64.b64decode(v)
|
|
connection[k] = v
|
|
|
|
kw = {'connections': connections,
|
|
'spec': self.spec,
|
|
'keystore_password': self.get_keystore_password()}
|
|
|
|
env = jinja2.Environment(
|
|
loader=jinja2.PackageLoader('zuul_operator', 'templates'))
|
|
tmpl = env.get_template('zuul.conf')
|
|
text = tmpl.render(**kw)
|
|
|
|
# Create a sha of the zuul.conf so that we can set it as an
|
|
# annotation on objects which should be recreated when it
|
|
# changes.
|
|
m = hashlib.sha256()
|
|
m.update(text.encode('utf8'))
|
|
self.zuul_conf_sha = m.hexdigest()
|
|
|
|
utils.update_secret(self.api, self.namespace, 'zuul-config',
|
|
string_data={'zuul.conf': text})
|
|
|
|
def parse_zk_string(self, hosts):
|
|
if '/' in hosts:
|
|
hosts, chroot = hosts.split('/', 1)
|
|
else:
|
|
chroot = None
|
|
hosts = hosts.split(',')
|
|
ret = []
|
|
for entry in hosts:
|
|
host, port = entry.rsplit(':', 1)
|
|
server = {'host': host,
|
|
'port': port}
|
|
if chroot:
|
|
server['chroot'] = chroot
|
|
ret.append(server)
|
|
return ret
|
|
|
|
def write_nodepool_conf(self):
|
|
self.nodepool_provider_secrets = {}
|
|
# load nodepool config
|
|
|
|
if not self.nodepool_secret:
|
|
self.log.warning("No nodepool config secret found")
|
|
|
|
try:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=self.nodepool_secret)
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
self.log.error("Nodepool config secret not found")
|
|
return None
|
|
|
|
# Shard the config so we can create a deployment + secret for
|
|
# each provider.
|
|
nodepool_yaml = yaml.safe_load(base64.b64decode(
|
|
obj.obj['data']['nodepool.yaml']))
|
|
|
|
nodepool_yaml['zookeeper-servers'] = self.parse_zk_string(
|
|
self.spec['zookeeper']['hosts'])
|
|
nodepool_yaml['zookeeper-tls'] = {
|
|
'cert': '/tls/client/tls.crt',
|
|
'key': '/tls/client/tls.key',
|
|
'ca': '/tls/client/ca.crt',
|
|
}
|
|
for provider in nodepool_yaml['providers']:
|
|
self.log.info("Configuring provider %s", provider.get('name'))
|
|
|
|
secret_name = f"nodepool-config-{self.name}-{provider['name']}"
|
|
|
|
provider_yaml = nodepool_yaml.copy()
|
|
provider_yaml['providers'] = [provider]
|
|
|
|
text = yaml.dump(provider_yaml)
|
|
utils.update_secret(self.api, self.namespace, secret_name,
|
|
string_data={'nodepool.yaml': text})
|
|
self.nodepool_provider_secrets[provider['name']] = secret_name
|
|
|
|
def create_nodepool(self):
|
|
# Create secrets
|
|
self.write_nodepool_conf()
|
|
|
|
# Create providers
|
|
for provider_name, secret_name in\
|
|
self.nodepool_provider_secrets.items():
|
|
kw = {
|
|
'instance_name': self.name,
|
|
'provider_name': provider_name,
|
|
'nodepool_config_secret_name': secret_name,
|
|
'external_config': self.spec.get('externalConfig', {}),
|
|
'spec': self.spec,
|
|
}
|
|
utils.apply_file(self.api, 'nodepool-launcher.yaml',
|
|
namespace=self.namespace, **kw)
|
|
|
|
# Get current providers
|
|
providers = objects.Deployment.objects(self.api).filter(
|
|
namespace=self.namespace,
|
|
selector={'app.kubernetes.io/instance': self.name,
|
|
'app.kubernetes.io/component': 'nodepool-launcher',
|
|
'app.kubernetes.io/name': 'nodepool',
|
|
'app.kubernetes.io/part-of': 'zuul'})
|
|
|
|
new_providers = set(self.nodepool_provider_secrets.keys())
|
|
old_providers = set([x.labels['operator.zuul-ci.org/nodepool-provider']
|
|
for x in providers])
|
|
# delete any unecessary provider deployments and secrets
|
|
for unused_provider in old_providers - new_providers:
|
|
self.log.info("Deleting unused provider %s", unused_provider)
|
|
|
|
deployment_name = "nodepool-launcher-"\
|
|
f"{self.name}-{unused_provider}"
|
|
secret_name = f"nodepool-config-{self.name}-{unused_provider}"
|
|
|
|
try:
|
|
obj = objects.Deployment.objects(self.api).filter(
|
|
namespace=self.namespace).get(deployment_name)
|
|
obj.delete()
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
pass
|
|
|
|
try:
|
|
obj = objects.Secret.objects(self.api).filter(
|
|
namespace=self.namespace).get(secret_name)
|
|
obj.delete()
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
pass
|
|
|
|
def write_registry_conf(self):
|
|
config_secret = self.spec['registry'].get('config', {}).\
|
|
get('secretName')
|
|
if not config_secret:
|
|
raise kopf.PermanentError("No registry config secret found")
|
|
|
|
try:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=config_secret)
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
raise kopf.TemporaryError("Registry config secret not found")
|
|
|
|
# Shard the config so we can create a deployment + secret for
|
|
# each provider.
|
|
registry_yaml = yaml.safe_load(base64.b64decode(
|
|
obj.obj['data']['registry.yaml']))
|
|
|
|
reg = registry_yaml['registry']
|
|
if 'public-url' not in reg:
|
|
reg['public-url'] = 'https://zuul-registry'
|
|
reg['address'] = '0.0.0.0'
|
|
reg['port'] = 9000
|
|
reg['tls-cert'] = '/tls/tls.crt'
|
|
reg['tls-key'] = '/tls/tls.key'
|
|
reg['secret'] = utils.generate_password(56)
|
|
reg['storage'] = {
|
|
'driver': 'filesystem',
|
|
'root': '/storage',
|
|
}
|
|
|
|
text = yaml.dump(registry_yaml)
|
|
utils.update_secret(self.api, self.namespace,
|
|
'zuul-registry-generated-config',
|
|
string_data={'registry.yaml': text})
|
|
|
|
def create_registry(self):
|
|
self.write_registry_conf()
|
|
kw = {
|
|
'instance_name': self.name,
|
|
'spec': self.spec,
|
|
'manage_registry_cert': self.manage_registry_cert,
|
|
}
|
|
utils.apply_file(self.api, 'zuul-registry.yaml',
|
|
namespace=self.namespace, **kw)
|
|
|
|
def create_zuul(self):
|
|
if self.spec['registry']['count']:
|
|
self.create_registry()
|
|
|
|
kw = {
|
|
'zuul_conf_sha': self.zuul_conf_sha,
|
|
'zuul_tenant_secret': self.tenant_secret,
|
|
'instance_name': self.name,
|
|
'connections': self.spec['connections'],
|
|
'executor_ssh_secret': self.spec['executor'].get(
|
|
'sshkey', {}).get('secretName'),
|
|
'spec': self.spec,
|
|
'manage_zk': self.manage_zk,
|
|
'manage_db': self.manage_db,
|
|
}
|
|
utils.apply_file(self.api, 'zuul.yaml', namespace=self.namespace, **kw)
|
|
self.create_nodepool()
|
|
|
|
def smart_reconfigure(self):
|
|
self.log.info("Smart reconfigure")
|
|
try:
|
|
obj = objects.Secret.objects(self.api).\
|
|
filter(namespace=self.namespace).\
|
|
get(name=self.tenant_secret)
|
|
tenant_config = base64.b64decode(
|
|
obj.obj['data']['main.yaml'])
|
|
except pykube.exceptions.ObjectDoesNotExist:
|
|
self.log.error("Tenant config secret not found")
|
|
return
|
|
|
|
m = hashlib.sha256()
|
|
m.update(tenant_config)
|
|
conf_sha = m.hexdigest()
|
|
|
|
expected = f"{conf_sha} /etc/zuul/tenant/main.yaml"
|
|
|
|
for obj in objects.Pod.objects(self.api).filter(
|
|
namespace=self.namespace,
|
|
selector={'app.kubernetes.io/instance': 'zuul',
|
|
'app.kubernetes.io/component': 'zuul-scheduler',
|
|
'app.kubernetes.io/name': 'zuul'}):
|
|
self.log.info("Waiting for config to update on %s",
|
|
obj.name)
|
|
|
|
delay = 10
|
|
retries = 30
|
|
timeout = delay * retries
|
|
command = [
|
|
'/usr/bin/timeout',
|
|
str(timeout),
|
|
'/bin/sh',
|
|
'-c',
|
|
f'while !( echo -n "{expected}" | sha256sum -c - );'
|
|
f'do sleep {delay}; done'
|
|
]
|
|
resp = utils.pod_exec(self.namespace, obj.name, command)
|
|
self.log.debug("Response: %s", resp)
|
|
|
|
if '/etc/zuul/tenant/main.yaml: OK' in resp:
|
|
self.log.info("Issuing smart-reconfigure on %s", obj.name)
|
|
command = [
|
|
'zuul-scheduler',
|
|
'smart-reconfigure',
|
|
]
|
|
resp = utils.pod_exec(self.namespace, obj.name, command)
|
|
self.log.debug("Response: %s", resp)
|
|
else:
|
|
self.log.error("Tenant config file never updated on %s",
|
|
obj.name)
|