Elasticsearch - Cluster Wait Function Improvements

This change modifies the cluster wait function to
check the cluster health status explicitly.

Once a status of at least "yellow" has been reached,
the Elasticsearch cluster should be able to facilitate
the API calls required by the other jobs of this chart.

Change-Id: I2660422a8e8122186d648042f5422ca9a82d23c7
This commit is contained in:
Steven Fitzpatrick 2020-07-10 14:31:22 -05:00
parent 2b4cf6a2d9
commit 57b1f3905b
3 changed files with 10 additions and 95 deletions

View File

@ -13,96 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
function check_master_nodes() {
numMasterNodes=0
expectedMasterNodes={{ .Values.pod.replicas.master | int64 }}
while [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
do
currentMasterNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-master.")) | .name')
numMasterNodes=$(echo $currentMasterNodes | wc -w)
if [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
then
if [ "$numMasterNodes" -eq 0 ]
then
echo "No Elasticsearch master nodes accounted for: 0/${expectedMasterNodes}"
else
echo "Not all Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
echo "$currentMasterNodes"
fi
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
done
echo "All Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
echo "$currentMasterNodes"
echo ""
}
function check_data_nodes() {
numDataNodes=0
expectedDataNodes={{ .Values.pod.replicas.data | int64 }}
while [ "$numDataNodes" -ne "$expectedDataNodes" ]
do
currentDataNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-data.")) | .name')
numDataNodes=$(echo $currentDataNodes | wc -w)
if [ "$numDataNodes" -ne "$expectedDataNodes" ]
then
if [ "$numDataNodes" -eq 0 ]
then
echo "No Elasticsearch data nodes accounted for: 0/${expectedDataNodes}"
else
echo "Not all Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
echo "$currentDataNodes"
fi
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
done
echo "All Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
echo "$currentDataNodes"
echo ""
}
function check_client_nodes() {
numClientNodes=0
expectedClientNodes={{ .Values.pod.replicas.client | int64 }}
while [ "$numClientNodes" -ne "$expectedClientNodes" ]
do
currentClientNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-client.")) | .name')
numClientNodes=$(echo $currentClientNodes | wc -w)
if [ "$numClientNodes" -ne "$expectedClientNodes" ]
then
if [ "$numClientNodes" -eq 0 ]
then
echo "No Elasticsearch client nodes accounted for: 0/${expectedClientNodes}"
else
echo "Not all Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
echo "$currentClientNodes"
fi
echo "Sleeping for 10 seconds before next check"
echo ""
sleep 10
fi
done
echo "All Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
echo "$currentClientNodes"
echo ""
}
function check_cluster_health() {
clusterHealth=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty")
echo "Elasticsearch cluster health is:"
echo "$clusterHealth"
STATUS=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
"${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty" | jq -r .[].status)
echo "Status: $STATUS"
}
sleep 10
check_data_nodes
check_client_nodes
check_master_nodes
check_cluster_health
while [[ $STATUS == "red" ]]; do
echo "Waiting for cluster to become ready."
sleep 30
check_cluster_health
done
echo "Cluster is ready."

View File

@ -28,7 +28,6 @@ metadata:
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
spec:
backoffLimit: {{ .Values.jobs.snapshot_repository.backoffLimit }}
template:
metadata:
labels:
@ -38,7 +37,6 @@ spec:
spec:
{{ dict "envAll" $envAll "application" "snapshot_repository" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
serviceAccountName: {{ $serviceAccountName }}
activeDeadlineSeconds: {{ .Values.jobs.snapshot_repository.activeDeadlineSeconds }}
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value | quote }}

View File

@ -420,10 +420,7 @@ jobs:
failed: 1
es_cluster_wait:
backoffLimit: 6
activeDeadlineSeconds: 600
snapshot_repository:
backoffLimit: 6
activeDeadlineSeconds: 600
activeDeadlineSeconds: 1200
verify_repositories:
cron: "*/30 * * * *"
history: