Elasticsearch: Add job to wait for Elasticsearch cluster formation

This adds a job that will query the Elasticsearch HTTP cat API to
determine whether the desired number of nodes have been discovered
via the Zen discovery mechanism to be included in the cluster.
This aims to address issues seen when upgrading Elasticsearch,
where the snapshot repository job may trigger due to endpoints
from older pods being present. This new job will be the dependency
required by the snapshot repository job to ensure the ES cluster
has the desired number of nodes before attempting to register a
snapshot repository or interact with the cluster

Change-Id: I94fbbfdec7ca66d04acca9558e56dca3b2bc7d52
This commit is contained in:
Steve Wilkerson 2019-04-01 13:46:39 -05:00
parent 8673bdda53
commit 8193542c98
5 changed files with 201 additions and 15 deletions

View File

@ -0,0 +1,110 @@
#!/bin/bash
{{/*
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
function check_master_nodes() {
numMasterNodes=0
expectedMasterNodes={{ .Values.pod.replicas.master | int64 }}
while [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
do
currentMasterNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-master.")) | .name')
numMasterNodes=$(echo $currentMasterNodes | wc -w)
if [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
then
if [ "$numMasterNodes" -eq 0 ]
then
echo "No Elasticsearch data nodes account for: 0/${expectedMasterNodes}"
else
echo "Not all Elasticsearch data nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
echo "$currentMasterNodes"
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
fi
done
echo "All Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
echo "$currentMasterNodes"
echo ""
}
function check_data_nodes() {
numDataNodes=0
expectedDataNodes={{ .Values.pod.replicas.data | int64 }}
while [ "$numDataNodes" -ne "$expectedDataNodes" ]
do
currentDataNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-data.")) | .name')
numDataNodes=$(echo $currentDataNodes | wc -w)
if [ "$numDataNodes" -ne "$expectedDataNodes" ]
then
if [ "$numDataNodes" -eq 0 ]
then
echo "No Elasticsearch data nodes accounted for: 0/${expectedDataNodes}"
else
echo "Not all Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
echo "$currentDataNodes"
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
fi
done
echo "All Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
echo "$currentDataNodes"
echo ""
}
function check_client_nodes() {
numClientNodes=0
expectedClientNodes={{ .Values.pod.replicas.client | int64 }}
while [ "$numClientNodes" -ne "$expectedClientNodes" ]
do
currentClientNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-client.")) | .name')
numClientNodes=$(echo $currentClientNodes | wc -w)
if [ "$numClientNodes" -ne "$expectedClientNodes" ]
then
if [ "$numClientNodes" -eq 0 ]
then
echo "No Elasticsearch data nodes account for: 0/${expectedClientNodes}"
else
echo "Not all Elasticsearch data nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
echo "$currentClientNodes"
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
fi
done
echo "All Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
echo "$currentClientNodes"
echo ""
}
function check_cluster_health() {
clusterHealth=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty")
echo "Elasticsearch cluster health is:"
echo "$clusterHealth"
}
sleep 10
check_data_nodes
check_client_nodes
check_master_nodes
check_cluster_health

View File

@ -38,6 +38,8 @@ data:
{{ tuple "bin/_register-repository.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
curator.sh: |
{{ tuple "bin/_curator.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
es-cluster-wait.sh: |
{{ tuple "bin/_es-cluster-wait.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
image-repo-sync.sh: |
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -0,0 +1,72 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_cluster_wait }}
{{- $envAll := . }}
{{- $esUserSecret := .Values.secrets.elasticsearch.user }}
{{- $serviceAccountName := "elasticsearch-cluster-wait" }}
{{ tuple $envAll "es_cluster_wait" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: elasticsearch-cluster-wait
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
spec:
template:
metadata:
labels:
{{ tuple $envAll "elasticsearch" "es_cluster_wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value | quote }}
initContainers:
{{ tuple $envAll "es_cluster_wait" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: elasticsearch-cluster-wait
{{ tuple $envAll "es_cluster_wait" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.es_cluster_wait | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
- name: ELASTICSEARCH_USERNAME
valueFrom:
secretKeyRef:
name: {{ $esUserSecret }}
key: ELASTICSEARCH_USERNAME
- name: ELASTICSEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: {{ $esUserSecret }}
key: ELASTICSEARCH_PASSWORD
- name: ELASTICSEARCH_HOST
value: {{ tuple "elasticsearch" "internal" "http" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }}
command:
- /tmp/es-cluster-wait.sh
volumeMounts:
- name: elasticsearch-bin
mountPath: /tmp/es-cluster-wait.sh
subPath: es-cluster-wait.sh
readOnly: true
volumes:
- name: elasticsearch-bin
configMap:
name: elasticsearch-bin
defaultMode: 0555
{{- end }}

View File

@ -29,6 +29,7 @@ images:
prometheus_elasticsearch_exporter: docker.io/justwatch/elasticsearch_exporter:1.0.1
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
snapshot_repository: docker.io/port/ceph-config-helper:v1.10.3
es_cluster_wait: docker.io/port/ceph-config-helper:v1.10.3
image_repo_sync: docker.io/docker:17.07.0
pull_policy: "IfNotPresent"
local_registry:
@ -77,6 +78,10 @@ dependencies:
elasticsearch_master:
services: null
jobs: null
es_cluster_wait:
services:
- endpoint: internal
service: elasticsearch
image_repo_sync:
services:
- endpoint: internal
@ -86,15 +91,10 @@ dependencies:
- endpoint: internal
service: elasticsearch
snapshot_repository:
services:
- endpoint: internal
service: elasticsearch
- endpoint: data
service: elasticsearch
- endpoint: discovery
service: elasticsearch
services: null
jobs:
- elasticsearch-s3-bucket
- elasticsearch-cluster-wait
s3_user:
services:
- endpoint: internal
@ -103,13 +103,7 @@ dependencies:
jobs:
- elasticsearch-s3-user
tests:
services:
- endpoint: internal
service: elasticsearch
- endpoint: data
service: elasticsearch
- endpoint: discovery
service: elasticsearch
services: null
jobs:
- elasticsearch-register-snapshot-repository
@ -214,6 +208,13 @@ pod:
limits:
memory: "1024Mi"
cpu: "2000m"
es_cluster_wait:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
storage_init:
requests:
memory: "128Mi"
@ -685,6 +686,7 @@ manifests:
deployment_client: true
deployment_master: true
ingress: true
job_cluster_wait: true
job_image_repo_sync: true
job_snapshot_repository: true
job_s3_user: true

View File

@ -1,5 +1,5 @@
{{/*
Copyright 2017 The Openstack-Helm Authors.
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.