diff --git a/ceph/templates/bin/_check_zombie_mons.py.tpl b/ceph/templates/bin/_check_zombie_mons.py.tpl new file mode 100644 index 0000000000..55e801eb29 --- /dev/null +++ b/ceph/templates/bin/_check_zombie_mons.py.tpl @@ -0,0 +1,50 @@ +#!/usr/bin/python2 +import re +import os +import subprocess +import json + +MON_REGEX = r"^\d: ([0-9\.]*):\d+/\d* mon.([^ ]*)$" +# kubctl_command = 'kubectl get pods --namespace=${CLUSTER} -l component=mon -l application=ceph -o template --template="{ {{"}}"}}range .items{{"}}"}} \\"{{"}}"}}.metadata.name{{"}}"}}\\": \\"{{"}}"}}.status.podIP{{"}}"}}\\" , {{"}}"}}end{{"}}"}} }"' +if int(os.getenv('K8S_HOST_NETWORK', 0)) > 0: + kubectl_command = 'kubectl get pods --namespace=${CLUSTER} -l component=mon -l application=ceph -o template --template="{ {{"}}"}}range \$i, \$v := .items{{"}}"}} {{"}}"}} if \$i{{"}}"}} , {{"}}"}} end {{"}}"}} \\"{{"}}"}}\$v.spec.nodeName{{"}}"}}\\": \\"{{"}}"}}\$v.status.podIP{{"}}"}}\\" {{"}}"}}end{{"}}"}} }"' +else: + kubectl_command = 'kubectl get pods --namespace=${CLUSTER} -l component=mon -l application=ceph -o template --template="{ {{"}}"}}range \$i, \$v := .items{{"}}"}} {{"}}"}} if \$i{{"}}"}} , {{"}}"}} end {{"}}"}} \\"{{"}}"}}\$v.metadata.name{{"}}"}}\\": \\"{{"}}"}}\$v.status.podIP{{"}}"}}\\" {{"}}"}}end{{"}}"}} }"' + +monmap_command = "ceph --cluster=${CLUSTER} mon getmap > /tmp/monmap && monmaptool -f /tmp/monmap --print" + + +def extract_mons_from_monmap(): + monmap = subprocess.check_output(monmap_command, shell=True) + mons = {} + for line in monmap.split("\n"): + m = re.match(MON_REGEX, line) + if m is not None: + mons[m.group(2)] = m.group(1) + return mons + +def extract_mons_from_kubeapi(): + kubemap = subprocess.check_output(kubectl_command, shell=True) + return json.loads(kubemap) + +current_mons = extract_mons_from_monmap() +expected_mons = extract_mons_from_kubeapi() + +print "current mons:", current_mons +print "expected mons:", expected_mons + +for mon in current_mons: + removed_mon = False + if not mon in expected_mons: + print "removing zombie mon ", mon + subprocess.call(["ceph", "--cluster", os.environ["CLUSTER"], "mon", "remove", mon]) + removed_mon = True + elif current_mons[mon] != expected_mons[mon]: # check if for some reason the ip of the mon changed + print "ip change dedected for pod ", mon + subprocess.call(["kubectl", "--namespace", os.environ["CLUSTER"], "delete", "pod", mon]) + removed_mon = True + print "deleted mon %s via the kubernetes api" % mon + + +if not removed_mon: + print "no zombie mons found ..." diff --git a/ceph/templates/configmap-bin.yaml b/ceph/templates/configmap-bin.yaml index 7ae4de1b4a..0cb339cb4e 100644 --- a/ceph/templates/configmap-bin.yaml +++ b/ceph/templates/configmap-bin.yaml @@ -68,3 +68,5 @@ data: {{ tuple "bin/_watch_mon_health.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} variables_entrypoint.sh: | {{ tuple "bin/_variables_entrypoint.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + check_zombie_mons.py: | +{{ tuple "bin/_check_zombie_mons.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} diff --git a/ceph/templates/deployment-moncheck.yaml b/ceph/templates/deployment-moncheck.yaml index df3d17efdc..2d4e46441b 100644 --- a/ceph/templates/deployment-moncheck.yaml +++ b/ceph/templates/deployment-moncheck.yaml @@ -89,6 +89,10 @@ spec: mountPath: /var/lib/ceph/bootstrap-rgw/ceph.keyring subPath: ceph.keyring readOnly: false + - name: ceph-bin + mountPath: /check_zombie_mons.py + subPath: check_zombie_mons.py + readOnly: true volumes: - name: ceph-etc configMap: