[mariadb] Add cluster wait job
Add job that waits when initial bootstrapping of cluster is completed which is required to pause db creation and initialization when cluster is not fully bootstrapped. Change-Id: I705df1a1b1a34f464dc36a36dd7964f8a7bf72d9
This commit is contained in:
parent
243289aae3
commit
9e5fea6e18
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v10.6.7
|
||||
description: OpenStack-Helm MariaDB
|
||||
name: mariadb
|
||||
version: 0.2.49
|
||||
version: 0.2.50
|
||||
home: https://mariadb.com/kb/en/
|
||||
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
|
||||
sources:
|
||||
|
190
mariadb/templates/bin/_mariadb-wait-for-cluster.py.tpl
Normal file
190
mariadb/templates/bin/_mariadb-wait-for-cluster.py.tpl
Normal file
@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
from enum import Enum
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import pymysql
|
||||
import pykube
|
||||
|
||||
MARIADB_HOST = os.getenv("MARIADB_HOST")
|
||||
MARIADB_PASSWORD = os.getenv("MARIADB_PASSWORD")
|
||||
MARIADB_REPLICAS = os.getenv("MARIADB_REPLICAS")
|
||||
|
||||
MARIADB_CLUSTER_STATE_LOG_LEVEL = os.getenv("MARIADB_CLUSTER_STATE_LOG_LEVEL", "INFO")
|
||||
|
||||
MARIADB_CLUSTER_STABILITY_COUNT = int(
|
||||
os.getenv("MARIADB_CLUSTER_STABILITY_COUNT", "30")
|
||||
)
|
||||
MARIADB_CLUSTER_STABILITY_WAIT = int(os.getenv("MARIADB_CLUSTER_STABILITY_WAIT", "4"))
|
||||
MARIADB_CLUSTER_CHECK_WAIT = int(os.getenv("MARIADB_CLUSTER_CHECK_WAIT", "30"))
|
||||
|
||||
MARIADB_CLUSTER_STATE_CONFIGMAP = os.getenv("MARIADB_CLUSTER_STATE_CONFIGMAP")
|
||||
MARIADB_CLUSTER_STATE_CONFIGMAP_NAMESPACE = os.getenv(
|
||||
"MARIADB_CLUSTER_STATE_CONFIGMAP_NAMESPACE", "openstack"
|
||||
)
|
||||
MARIADB_CLUSTER_STATE_PYKUBE_REQUEST_TIMEOUT = int(
|
||||
os.getenv("MARIADB_CLUSTER_STATE_PYKUBE_REQUEST_TIMEOUT", 60)
|
||||
)
|
||||
|
||||
log_level = MARIADB_CLUSTER_STATE_LOG_LEVEL
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout,
|
||||
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
LOG = logging.getLogger("mariadb-cluster-wait")
|
||||
LOG.setLevel(log_level)
|
||||
|
||||
|
||||
def login():
|
||||
config = pykube.KubeConfig.from_env()
|
||||
client = pykube.HTTPClient(
|
||||
config=config, timeout=MARIADB_CLUSTER_STATE_PYKUBE_REQUEST_TIMEOUT
|
||||
)
|
||||
LOG.info(f"Created k8s api client from context {config.current_context}")
|
||||
return client
|
||||
|
||||
|
||||
api = login()
|
||||
cluster_state_map = (
|
||||
pykube.ConfigMap.objects(api)
|
||||
.filter(namespace=MARIADB_CLUSTER_STATE_CONFIGMAP_NAMESPACE)
|
||||
.get_by_name(MARIADB_CLUSTER_STATE_CONFIGMAP)
|
||||
)
|
||||
|
||||
|
||||
def get_current_state(cluster_state_map):
|
||||
cluster_state_map.get(
|
||||
MARIADB_CLUSTER_STATE_INITIAL_BOOTSTRAP_COMPLETED_KEY, "False"
|
||||
)
|
||||
|
||||
|
||||
def retry(times, exceptions):
|
||||
def decorator(func):
|
||||
def newfn(*args, **kwargs):
|
||||
attempt = 0
|
||||
while attempt < times:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except exceptions:
|
||||
attempt += 1
|
||||
LOG.exception(
|
||||
f"Exception thrown when attempting to run {func}, attempt {attempt} of {times}"
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
return newfn
|
||||
return decorator
|
||||
|
||||
|
||||
class initalClusterState:
|
||||
|
||||
initial_state_key = "initial-bootstrap-completed.cluster"
|
||||
|
||||
@retry(times=100, exceptions=(Exception))
|
||||
def __init__(self, api, namespace, name):
|
||||
self.namespace = namespace
|
||||
self.name = name
|
||||
self.cm = (
|
||||
pykube.ConfigMap.objects(api)
|
||||
.filter(namespace=self.namespace)
|
||||
.get_by_name(self.name)
|
||||
)
|
||||
|
||||
def get_default(self):
|
||||
"""We have deployments with completed job, but it is not reflected
|
||||
in the configmap state. Assume when configmap is created more than
|
||||
1h and we doing update/restart, and key not in map this is
|
||||
existed environment. So we assume the cluster was initialy bootstrapped.
|
||||
This is needed to avoid manual actions.
|
||||
"""
|
||||
now = datetime.datetime.utcnow()
|
||||
created_at = datetime.datetime.strptime(
|
||||
self.cm.obj["metadata"]["creationTimestamp"], "%Y-%m-%dT%H:%M:%SZ"
|
||||
)
|
||||
delta = datetime.timedelta(seconds=3600)
|
||||
|
||||
if now - created_at > delta:
|
||||
self.complete()
|
||||
return "COMPLETED"
|
||||
return "NOT_COMPLETED"
|
||||
|
||||
@property
|
||||
@retry(times=10, exceptions=(Exception))
|
||||
def is_completed(self):
|
||||
|
||||
self.cm.reload()
|
||||
if self.initial_state_key in self.cm.obj["data"]:
|
||||
return self.cm.obj["data"][self.initial_state_key]
|
||||
|
||||
return self.get_default() == "COMPLETED"
|
||||
|
||||
@retry(times=100, exceptions=(Exception))
|
||||
def complete(self):
|
||||
patch = {"data": {self.initial_state_key: "COMPLETED"}}
|
||||
self.cm.patch(patch)
|
||||
|
||||
|
||||
ics = initalClusterState(
|
||||
api, MARIADB_CLUSTER_STATE_CONFIGMAP_NAMESPACE, MARIADB_CLUSTER_STATE_CONFIGMAP
|
||||
)
|
||||
|
||||
if ics.is_completed:
|
||||
LOG.info("The initial bootstrap was completed, skipping wait...")
|
||||
sys.exit(0)
|
||||
|
||||
LOG.info("Checking for mariadb cluster state.")
|
||||
|
||||
|
||||
def is_mariadb_stabe():
|
||||
try:
|
||||
wsrep_OK = {
|
||||
"wsrep_ready": "ON",
|
||||
"wsrep_connected": "ON",
|
||||
"wsrep_cluster_status": "Primary",
|
||||
"wsrep_local_state_comment": "Synced",
|
||||
"wsrep_cluster_size": str(MARIADB_REPLICAS),
|
||||
}
|
||||
wsrep_vars = ",".join(["'" + var + "'" for var in wsrep_OK.keys()])
|
||||
db_cursor = pymysql.connect(
|
||||
host=MARIADB_HOST, password=MARIADB_PASSWORD,
|
||||
read_default_file="/etc/mysql/admin_user.cnf"
|
||||
).cursor()
|
||||
db_cursor.execute(f"SHOW GLOBAL STATUS WHERE Variable_name IN ({wsrep_vars})")
|
||||
wsrep_vars = db_cursor.fetchall()
|
||||
diff = set(wsrep_vars).difference(set(wsrep_OK.items()))
|
||||
if diff:
|
||||
LOG.error(f"The wsrep is not OK: {diff}")
|
||||
else:
|
||||
LOG.info("The wspep is ready")
|
||||
return True
|
||||
except Exception as e:
|
||||
LOG.error(f"Got exception while checking state. {e}")
|
||||
return False
|
||||
|
||||
|
||||
count = 0
|
||||
ready = False
|
||||
stable_for = 1
|
||||
|
||||
while True:
|
||||
if is_mariadb_stabe():
|
||||
stable_for += 1
|
||||
LOG.info(
|
||||
f"The cluster is stable for {stable_for} out of {MARIADB_CLUSTER_STABILITY_COUNT}"
|
||||
)
|
||||
if stable_for == MARIADB_CLUSTER_STABILITY_COUNT:
|
||||
ics.complete()
|
||||
sys.exit(0)
|
||||
else:
|
||||
LOG.info(f"Sleeping for {MARIADB_CLUSTER_STABILITY_WAIT}")
|
||||
time.sleep(MARIADB_CLUSTER_STABILITY_WAIT)
|
||||
continue
|
||||
else:
|
||||
LOG.info("Resetting stable_for count.")
|
||||
stable_for = 0
|
||||
LOG.info(f"Sleeping for {MARIADB_CLUSTER_CHECK_WAIT}")
|
||||
time.sleep(MARIADB_CLUSTER_CHECK_WAIT)
|
@ -57,4 +57,6 @@ data:
|
||||
mariadb_controller.py: |
|
||||
{{ tuple "bin/_mariadb_controller.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
{{- end }}
|
||||
mariadb-wait-for-cluster.py: |
|
||||
{{ tuple "bin/_mariadb-wait-for-cluster.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
{{- end }}
|
||||
|
123
mariadb/templates/job-cluster-wait.yaml
Normal file
123
mariadb/templates/job-cluster-wait.yaml
Normal file
@ -0,0 +1,123 @@
|
||||
{{/*
|
||||
Copyright 2019 Mirantis inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
{{- if .Values.manifests.job_cluster_wait }}
|
||||
{{- $envAll := . }}
|
||||
|
||||
{{- $serviceAccountName := print .Release.Name "-cluster-wait" }}
|
||||
{{ tuple $envAll "cluster_wait" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: {{ $envAll.Release.Name }}-{{ $serviceAccountName }}-pod
|
||||
namespace: {{ $envAll.Release.Namespace }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- update
|
||||
- patch
|
||||
- get
|
||||
- list
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: {{ $envAll.Release.Name }}-{{ $serviceAccountName }}-pod
|
||||
namespace: {{ $envAll.Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: {{ $envAll.Release.Name }}-{{ $serviceAccountName }}-pod
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ $serviceAccountName }}
|
||||
namespace: {{ $envAll.Release.Namespace }}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: "{{.Release.Name}}-cluster-wait"
|
||||
labels:
|
||||
{{ tuple $envAll "mariadb" "cluster-wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
|
||||
annotations:
|
||||
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
|
||||
spec:
|
||||
backoffLimit: {{ .Values.jobs.cluster_wait.clusterCheckRetries }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{ tuple $envAll "mariadb" "cluster-wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
|
||||
spec:
|
||||
{{ dict "envAll" $envAll "application" "cluster_wait" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
|
||||
serviceAccountName: {{ $serviceAccountName }}
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value }}
|
||||
initContainers:
|
||||
{{ tuple $envAll "cluster_wait" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
|
||||
containers:
|
||||
- name: {{.Release.Name}}-mariadb-cluster-wait
|
||||
{{ tuple $envAll "mariadb_scripted_test" | include "helm-toolkit.snippets.image" | indent 10 }}
|
||||
{{ dict "envAll" $envAll "application" "cluster_wait" "container" "mariadb_cluster_wait" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
|
||||
env:
|
||||
- name: MARIADB_HOST
|
||||
value: {{ tuple "oslo_db" "internal" $envAll | include "helm-toolkit.endpoints.endpoint_host_lookup" }}
|
||||
- name: MARIADB_REPLICAS
|
||||
value: {{ .Values.pod.replicas.server | quote }}
|
||||
- name: MARIADB_CLUSTER_CHECK_WAIT
|
||||
value: {{ .Values.jobs.cluster_wait.clusterCheckWait | quote }}
|
||||
- name: MARIADB_CLUSTER_STABILITY_COUNT
|
||||
value: {{ .Values.jobs.cluster_wait.clusterStabilityCount | quote }}
|
||||
- name: MARIADB_CLUSTER_STABILITY_WAIT
|
||||
value: {{ .Values.jobs.cluster_wait.clusterStabilityWait | quote }}
|
||||
- name: MARIADB_CLUSTER_STATE_CONFIGMAP
|
||||
value: {{ printf "%s-%s" .Release.Name "mariadb-state" | quote }}
|
||||
- name: MARIADB_CLUSTER_STATE_CONFIGMAP_NAMESPACE
|
||||
value: {{ $envAll.Release.Namespace }}
|
||||
- name: MARIADB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: mariadb-dbadmin-password
|
||||
key: MYSQL_DBADMIN_PASSWORD
|
||||
command:
|
||||
- /tmp/mariadb-wait-for-cluster.py
|
||||
volumeMounts:
|
||||
- name: pod-tmp
|
||||
mountPath: /tmp
|
||||
- name: mariadb-bin
|
||||
mountPath: /tmp/mariadb-wait-for-cluster.py
|
||||
subPath: mariadb-wait-for-cluster.py
|
||||
readOnly: true
|
||||
- name: mariadb-secrets
|
||||
mountPath: /etc/mysql/admin_user.cnf
|
||||
subPath: admin_user.cnf
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: pod-tmp
|
||||
emptyDir: {}
|
||||
- name: mariadb-bin
|
||||
configMap:
|
||||
name: mariadb-bin
|
||||
defaultMode: 0555
|
||||
- name: mariadb-secrets
|
||||
secret:
|
||||
secretName: mariadb-secrets
|
||||
defaultMode: 0444
|
||||
{{- end }}
|
@ -130,6 +130,16 @@ pod:
|
||||
controller:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
cluster_wait:
|
||||
pod:
|
||||
runAsUser: 65534
|
||||
runAsNonRoot: true
|
||||
container:
|
||||
mariadb_cluster_wait:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
affinity:
|
||||
anti:
|
||||
type:
|
||||
@ -238,6 +248,10 @@ dependencies:
|
||||
service: oslo_db
|
||||
controller:
|
||||
services: null
|
||||
cluster_wait:
|
||||
services:
|
||||
- endpoint: internal
|
||||
service: oslo_db
|
||||
volume:
|
||||
# this value is used for single pod deployments of mariadb to prevent losing all data
|
||||
# if the pod is restarted
|
||||
@ -254,6 +268,11 @@ volume:
|
||||
size: 5Gi
|
||||
|
||||
jobs:
|
||||
cluster_wait:
|
||||
clusterCheckWait: 30
|
||||
clusterCheckRetries: 30
|
||||
clusterStabilityCount: 30
|
||||
clusterStabilityWait: 4
|
||||
exporter_create_sql_user:
|
||||
backoffLimit: 87600
|
||||
activeDeadlineSeconds: 3600
|
||||
@ -672,4 +691,5 @@ manifests:
|
||||
statefulset: true
|
||||
deployment_controller: true
|
||||
service_master: true
|
||||
job_cluster_wait: false
|
||||
...
|
||||
|
@ -65,4 +65,5 @@ mariadb:
|
||||
- 0.2.47 Deploy exporter as sidecar
|
||||
- 0.2.48 Switch to mariadb controller deployment
|
||||
- 0.2.49 Remove ingress deployment
|
||||
- 0.2.50 Add cluster-wait job
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user