Merge "Prevent mariadb from split brain while cluster is in reboot state"
This commit is contained in:
commit
6dd39da6ad
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v10.2.31
|
||||
description: OpenStack-Helm MariaDB
|
||||
name: mariadb
|
||||
version: 0.2.0
|
||||
version: 0.2.1
|
||||
home: https://mariadb.com/kb/en/
|
||||
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
|
||||
sources:
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import select
|
||||
import signal
|
||||
import subprocess # nosec
|
||||
@ -58,6 +59,8 @@ kubernetes_version = kubernetes.client.VersionApi().get_code().git_version
|
||||
logger.info("Kubernetes API Version: {0}".format(kubernetes_version))
|
||||
k8s_api_instance = kubernetes.client.CoreV1Api()
|
||||
|
||||
# Setup secrets generator
|
||||
secretsGen = secrets.SystemRandom()
|
||||
|
||||
def check_env_var(env_var):
|
||||
"""Check if an env var exists.
|
||||
@ -325,26 +328,33 @@ def safe_update_configmap(configmap_dict, configmap_patch):
|
||||
# ensure nothing else has modified the confimap since we read it.
|
||||
configmap_patch['metadata']['resourceVersion'] = configmap_dict[
|
||||
'metadata']['resource_version']
|
||||
try:
|
||||
api_response = k8s_api_instance.patch_namespaced_config_map(
|
||||
name=state_configmap_name,
|
||||
namespace=pod_namespace,
|
||||
body=configmap_patch)
|
||||
return True
|
||||
except kubernetes.client.rest.ApiException as error:
|
||||
if error.status == 409:
|
||||
# This status code indicates a collision trying to write to the
|
||||
# config map while another instance is also trying the same.
|
||||
logger.warning("Collision writing configmap: {0}".format(error))
|
||||
# This often happens when the replicas were started at the same
|
||||
# time, and tends to be persistent. Sleep briefly to break the
|
||||
# synchronization.
|
||||
time.sleep(1)
|
||||
return True
|
||||
else:
|
||||
logger.error("Failed to set configmap: {0}".format(error))
|
||||
return error
|
||||
|
||||
# Retry up to 8 times in case of 409 only. Each retry has a ~1 second
|
||||
# sleep in between so do not want to exceed the roughly 10 second
|
||||
# write interval per cm update.
|
||||
for i in range(8):
|
||||
try:
|
||||
api_response = k8s_api_instance.patch_namespaced_config_map(
|
||||
name=state_configmap_name,
|
||||
namespace=pod_namespace,
|
||||
body=configmap_patch)
|
||||
return True
|
||||
except kubernetes.client.rest.ApiException as error:
|
||||
if error.status == 409:
|
||||
# This status code indicates a collision trying to write to the
|
||||
# config map while another instance is also trying the same.
|
||||
logger.warning("Collision writing configmap: {0}".format(error))
|
||||
# This often happens when the replicas were started at the same
|
||||
# time, and tends to be persistent. Sleep with some random
|
||||
# jitter value briefly to break the synchronization.
|
||||
naptime = secretsGen.uniform(0.8,1.2)
|
||||
time.sleep(naptime)
|
||||
else:
|
||||
logger.error("Failed to set configmap: {0}".format(error))
|
||||
return error
|
||||
logger.info("Retry writing configmap attempt={0} sleep={1}".format(
|
||||
i+1, naptime))
|
||||
return True
|
||||
|
||||
def set_configmap_annotation(key, value):
|
||||
"""Update a configmap's annotations via patching.
|
||||
@ -843,6 +853,14 @@ def run_mysqld(cluster='existing'):
|
||||
"This is a fresh node joining the cluster for the 1st time, not attempting to set admin passwords"
|
||||
)
|
||||
|
||||
# Node ready to start MariaDB, update cluster state to live and remove
|
||||
# reboot node info, if set previously.
|
||||
if cluster == 'new':
|
||||
set_configmap_annotation(
|
||||
key='openstackhelm.openstack.org/cluster.state', value='live')
|
||||
set_configmap_annotation(
|
||||
key='openstackhelm.openstack.org/reboot.node', value='')
|
||||
|
||||
logger.info("Launching MariaDB")
|
||||
run_cmd_with_logging(mysqld_cmd, logger)
|
||||
|
||||
|
@ -16,4 +16,5 @@ mariadb:
|
||||
- 0.1.13 Fix race condition for grastate.dat
|
||||
- 0.1.14 Update mysqld-exporter image to v0.12.1
|
||||
- 0.2.0 Uplift mariadb version and ubuntu release
|
||||
- 0.2.1 Prevent potential splitbrain issue if cluster is in reboot state
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user