From 8193542c98b27a030db859fa19cf1ae2dc614996 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Mon, 1 Apr 2019 13:46:39 -0500 Subject: [PATCH] Elasticsearch: Add job to wait for Elasticsearch cluster formation This adds a job that will query the Elasticsearch HTTP cat API to determine whether the desired number of nodes have been discovered via the Zen discovery mechanism to be included in the cluster. This aims to address issues seen when upgrading Elasticsearch, where the snapshot repository job may trigger due to endpoints from older pods being present. This new job will be the dependency required by the snapshot repository job to ensure the ES cluster has the desired number of nodes before attempting to register a snapshot repository or interact with the cluster Change-Id: I94fbbfdec7ca66d04acca9558e56dca3b2bc7d52 --- .../templates/bin/_es-cluster-wait.sh.tpl | 110 ++++++++++++++++++ elasticsearch/templates/configmap-bin.yaml | 2 + .../templates/job-es-cluster-wait.yaml | 72 ++++++++++++ elasticsearch/values.yaml | 30 ++--- rabbitmq/templates/job-cluster-wait.yaml | 2 +- 5 files changed, 201 insertions(+), 15 deletions(-) create mode 100644 elasticsearch/templates/bin/_es-cluster-wait.sh.tpl create mode 100644 elasticsearch/templates/job-es-cluster-wait.yaml diff --git a/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl b/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl new file mode 100644 index 000000000..edf6eebf0 --- /dev/null +++ b/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl @@ -0,0 +1,110 @@ +#!/bin/bash +{{/* +Copyright 2019 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +function check_master_nodes() { + numMasterNodes=0 + expectedMasterNodes={{ .Values.pod.replicas.master | int64 }} + while [ "$numMasterNodes" -ne "$expectedMasterNodes" ] + do + currentMasterNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \ + "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-master.")) | .name') + numMasterNodes=$(echo $currentMasterNodes | wc -w) + if [ "$numMasterNodes" -ne "$expectedMasterNodes" ] + then + if [ "$numMasterNodes" -eq 0 ] + then + echo "No Elasticsearch data nodes account for: 0/${expectedMasterNodes}" + else + echo "Not all Elasticsearch data nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})" + echo "$currentMasterNodes" + echo "Sleeping for 10 seconds before next check" + echo "" + sleep 10 + fi + fi + done + echo "All Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})" + echo "$currentMasterNodes" + echo "" +} + +function check_data_nodes() { + numDataNodes=0 + expectedDataNodes={{ .Values.pod.replicas.data | int64 }} + while [ "$numDataNodes" -ne "$expectedDataNodes" ] + do + currentDataNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \ + "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-data.")) | .name') + numDataNodes=$(echo $currentDataNodes | wc -w) + if [ "$numDataNodes" -ne "$expectedDataNodes" ] + then + if [ "$numDataNodes" -eq 0 ] + then + echo "No Elasticsearch data nodes accounted for: 0/${expectedDataNodes}" + else + echo "Not all Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})" + echo "$currentDataNodes" + echo "Sleeping for 10 seconds before next check" + echo "" + sleep 10 + fi + fi + done + echo "All Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})" + echo "$currentDataNodes" + echo "" +} + +function check_client_nodes() { + numClientNodes=0 + expectedClientNodes={{ .Values.pod.replicas.client | int64 }} + while [ "$numClientNodes" -ne "$expectedClientNodes" ] + do + currentClientNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \ + "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-client.")) | .name') + numClientNodes=$(echo $currentClientNodes | wc -w) + if [ "$numClientNodes" -ne "$expectedClientNodes" ] + then + if [ "$numClientNodes" -eq 0 ] + then + echo "No Elasticsearch data nodes account for: 0/${expectedClientNodes}" + else + echo "Not all Elasticsearch data nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})" + echo "$currentClientNodes" + echo "Sleeping for 10 seconds before next check" + echo "" + sleep 10 + fi + fi + done + echo "All Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})" + echo "$currentClientNodes" + echo "" +} + +function check_cluster_health() { + clusterHealth=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \ + "${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty") + echo "Elasticsearch cluster health is:" + echo "$clusterHealth" +} + +sleep 10 +check_data_nodes +check_client_nodes +check_master_nodes +check_cluster_health diff --git a/elasticsearch/templates/configmap-bin.yaml b/elasticsearch/templates/configmap-bin.yaml index 543e9461f..99c9c6366 100644 --- a/elasticsearch/templates/configmap-bin.yaml +++ b/elasticsearch/templates/configmap-bin.yaml @@ -38,6 +38,8 @@ data: {{ tuple "bin/_register-repository.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} curator.sh: | {{ tuple "bin/_curator.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + es-cluster-wait.sh: | +{{ tuple "bin/_es-cluster-wait.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} image-repo-sync.sh: | {{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} {{- end }} diff --git a/elasticsearch/templates/job-es-cluster-wait.yaml b/elasticsearch/templates/job-es-cluster-wait.yaml new file mode 100644 index 000000000..e9e8a47c1 --- /dev/null +++ b/elasticsearch/templates/job-es-cluster-wait.yaml @@ -0,0 +1,72 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_cluster_wait }} +{{- $envAll := . }} + +{{- $esUserSecret := .Values.secrets.elasticsearch.user }} + +{{- $serviceAccountName := "elasticsearch-cluster-wait" }} +{{ tuple $envAll "es_cluster_wait" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: elasticsearch-cluster-wait + annotations: + {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }} +spec: + template: + metadata: + labels: +{{ tuple $envAll "elasticsearch" "es_cluster_wait" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value | quote }} + initContainers: +{{ tuple $envAll "es_cluster_wait" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: elasticsearch-cluster-wait +{{ tuple $envAll "es_cluster_wait" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.es_cluster_wait | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: ELASTICSEARCH_USERNAME + valueFrom: + secretKeyRef: + name: {{ $esUserSecret }} + key: ELASTICSEARCH_USERNAME + - name: ELASTICSEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: {{ $esUserSecret }} + key: ELASTICSEARCH_PASSWORD + - name: ELASTICSEARCH_HOST + value: {{ tuple "elasticsearch" "internal" "http" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }} + command: + - /tmp/es-cluster-wait.sh + volumeMounts: + - name: elasticsearch-bin + mountPath: /tmp/es-cluster-wait.sh + subPath: es-cluster-wait.sh + readOnly: true + volumes: + - name: elasticsearch-bin + configMap: + name: elasticsearch-bin + defaultMode: 0555 +{{- end }} diff --git a/elasticsearch/values.yaml b/elasticsearch/values.yaml index 55e3585b0..7a77adacb 100644 --- a/elasticsearch/values.yaml +++ b/elasticsearch/values.yaml @@ -29,6 +29,7 @@ images: prometheus_elasticsearch_exporter: docker.io/justwatch/elasticsearch_exporter:1.0.1 dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1 snapshot_repository: docker.io/port/ceph-config-helper:v1.10.3 + es_cluster_wait: docker.io/port/ceph-config-helper:v1.10.3 image_repo_sync: docker.io/docker:17.07.0 pull_policy: "IfNotPresent" local_registry: @@ -77,6 +78,10 @@ dependencies: elasticsearch_master: services: null jobs: null + es_cluster_wait: + services: + - endpoint: internal + service: elasticsearch image_repo_sync: services: - endpoint: internal @@ -86,15 +91,10 @@ dependencies: - endpoint: internal service: elasticsearch snapshot_repository: - services: - - endpoint: internal - service: elasticsearch - - endpoint: data - service: elasticsearch - - endpoint: discovery - service: elasticsearch + services: null jobs: - elasticsearch-s3-bucket + - elasticsearch-cluster-wait s3_user: services: - endpoint: internal @@ -103,13 +103,7 @@ dependencies: jobs: - elasticsearch-s3-user tests: - services: - - endpoint: internal - service: elasticsearch - - endpoint: data - service: elasticsearch - - endpoint: discovery - service: elasticsearch + services: null jobs: - elasticsearch-register-snapshot-repository @@ -214,6 +208,13 @@ pod: limits: memory: "1024Mi" cpu: "2000m" + es_cluster_wait: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" storage_init: requests: memory: "128Mi" @@ -685,6 +686,7 @@ manifests: deployment_client: true deployment_master: true ingress: true + job_cluster_wait: true job_image_repo_sync: true job_snapshot_repository: true job_s3_user: true diff --git a/rabbitmq/templates/job-cluster-wait.yaml b/rabbitmq/templates/job-cluster-wait.yaml index fe91b2d68..8f77f6692 100644 --- a/rabbitmq/templates/job-cluster-wait.yaml +++ b/rabbitmq/templates/job-cluster-wait.yaml @@ -1,5 +1,5 @@ {{/* -Copyright 2017 The Openstack-Helm Authors. +Copyright 2019 The Openstack-Helm Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.