From e89c1c3c06c75ae84b61f384aba5e8a458ad4c57 Mon Sep 17 00:00:00 2001
From: willxz <xz8905@att.com>
Date: Thu, 2 Jul 2020 15:16:37 -0400
Subject: [PATCH] allocate_data_node function improvement

- Remove "if" condition of allocate_data_node
- Dealy 5 seconds for wait_to_join initial check to start
- Set 60 minutes timeout for wait_to_join function

Change-Id: Ie42af89551bd8804b87fe936c676e85130564187
---
 .../templates/bin/_elasticsearch.sh.tpl       | 33 ++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/elasticsearch/templates/bin/_elasticsearch.sh.tpl b/elasticsearch/templates/bin/_elasticsearch.sh.tpl
index 27a0cda22..32656d376 100644
--- a/elasticsearch/templates/bin/_elasticsearch.sh.tpl
+++ b/elasticsearch/templates/bin/_elasticsearch.sh.tpl
@@ -35,28 +35,31 @@ function stop () {
 }
 
 function wait_to_join() {
+  # delay 5 seconds before the first check
+  sleep 5
   joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
-
+  i=0
   while [ -z "$joined" ]; do
     sleep 5
     joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
+    i=$((i+1))
+    # Waiting for up to 60 minutes
+    if [ $i -gt 720 ]; then
+      break
+    fi
   done
 }
 
 function allocate_data_node () {
-  if [ -f /data/restarting ]; then
-    rm /data/restarting
-    echo "Node ${NODE_NAME} has restarted. Waiting to rejoin the cluster."
-    wait_to_join
-
-    echo "Re-enabling Replica Shard Allocation"
-    curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
-     "${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
-      \"persistent\": {
-        \"cluster.routing.allocation.enable\": null
-      }
-    }"
-  fi
+  echo "Node ${NODE_NAME} has started. Waiting to rejoin the cluster."
+  wait_to_join
+  echo "Re-enabling Replica Shard Allocation"
+  curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
+    "${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
+    \"persistent\": {
+      \"cluster.routing.allocation.enable\": null
+    }
+  }"
 }
 
 function start_master_node () {
@@ -116,12 +119,12 @@ function start_data_node () {
     # although the request itself still returns a 200 OK status. If there are failures, reissue the request.
     # (The only side effect of not doing so is slower start up times. See flush documentation linked above)
 
-    touch /data/restarting
     echo "Node ${NODE_NAME} is ready to shutdown"
     kill -TERM 1
   }
   trap drain_data_node TERM EXIT HUP INT
   wait
+
 }
 
 $COMMAND