diff --git a/neutron/templates/bin/_health-probe.py.tpl b/neutron/templates/bin/_health-probe.py.tpl index 284163afe6..0aa4a5647d 100644 --- a/neutron/templates/bin/_health-probe.py.tpl +++ b/neutron/templates/bin/_health-probe.py.tpl @@ -39,8 +39,10 @@ Usage example for Neutron metadata agent: import httplib2 from six.moves import http_client as httplib +import json import os import psutil +import signal import socket import sys @@ -292,8 +294,36 @@ def test_rpc_liveness(): check_agent_status(transport) +def check_pid_running(pid): + if psutil.pid_exists(int(pid)): + return True + else: + return False if __name__ == "__main__": + + if "liveness-probe" in ','.join(sys.argv): + pidfile = "/tmp/liveness.pid" #nosec + else: + pidfile = "/tmp/readiness.pid" #nosec + data = {} + if os.path.isfile(pidfile): + with open(pidfile,'r') as f: + data = json.load(f) + if check_pid_running(data['pid']): + if data['exit_count'] > 1: + # Third time in, kill the previous process + os.kill(int(data['pid']), signal.SIGTERM) + else: + data['exit_count'] = data['exit_count'] + 1 + with open(pidfile, 'w') as f: + json.dump(data, f) + sys.exit(0) + data['pid'] = os.getpid() + data['exit_count'] = 0 + with open(pidfile, 'w') as f: + json.dump(data, f) + if "sriov_agent.ini" in ','.join(sys.argv): sriov_readiness_check() elif "metadata_agent.ini" not in ','.join(sys.argv): diff --git a/neutron/templates/daemonset-l2gw-agent.yaml b/neutron/templates/daemonset-l2gw-agent.yaml index 50daf29a9e..3f673990cd 100644 --- a/neutron/templates/daemonset-l2gw-agent.yaml +++ b/neutron/templates/daemonset-l2gw-agent.yaml @@ -81,8 +81,8 @@ spec: - --use-fqdn {{- end }} initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 65 + periodSeconds: 190 + timeoutSeconds: 185 livenessProbe: exec: command: @@ -99,8 +99,8 @@ spec: - --use-fqdn {{- end }} initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 command: - /tmp/neutron-l2gw-agent.sh volumeMounts: diff --git a/neutron/values.yaml b/neutron/values.yaml index 1e61a828e2..8cb00d38bd 100644 --- a/neutron/values.yaml +++ b/neutron/values.yaml @@ -345,28 +345,28 @@ pod: enabled: true params: initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 65 + periodSeconds: 190 + timeoutSeconds: 185 liveness: enabled: true params: initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 l3_agent: l3_agent: readiness: enabled: true params: initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 65 + periodSeconds: 190 + timeoutSeconds: 185 liveness: enabled: true params: initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 lb_agent: lb_agent: readiness: @@ -377,14 +377,14 @@ pod: enabled: true params: initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 65 + periodSeconds: 190 + timeoutSeconds: 185 liveness: enabled: true params: initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 ovs_agent: ovs_agent: readiness: @@ -394,16 +394,16 @@ pod: enabled: true params: initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 sriov_agent: sriov_agent: readiness: enabled: true params: initialDelaySeconds: 30 - periodSeconds: 15 - timeoutSeconds: 65 + periodSeconds: 190 + timeoutSeconds: 185 server: server: readiness: diff --git a/nova/templates/bin/_health-probe.py.tpl b/nova/templates/bin/_health-probe.py.tpl index d78e70139d..d1127fb989 100644 --- a/nova/templates/bin/_health-probe.py.tpl +++ b/nova/templates/bin/_health-probe.py.tpl @@ -33,7 +33,10 @@ Usage example for Nova Compute: """ +import json +import os import psutil +import signal import socket import sys @@ -218,8 +221,36 @@ def test_rpc_liveness(): check_service_status(transport) +def check_pid_running(pid): + if psutil.pid_exists(int(pid)): + return True + else: + return False if __name__ == "__main__": + + if "liveness-probe" in ','.join(sys.argv): + pidfile = "/tmp/liveness.pid" #nosec + else: + pidfile = "/tmp/readiness.pid" #nosec + data = {} + if os.path.isfile(pidfile): + with open(pidfile,'r') as f: + data = json.load(f) + if check_pid_running(data['pid']): + if data['exit_count'] > 1: + # Third time in, kill the previous process + os.kill(int(data['pid']), signal.SIGTERM) + else: + data['exit_count'] = data['exit_count'] + 1 + with open(pidfile, 'w') as f: + json.dump(data, f) + sys.exit(0) + data['pid'] = os.getpid() + data['exit_count'] = 0 + with open(pidfile, 'w') as f: + json.dump(data, f) + test_rpc_liveness() sys.exit(0) # return success diff --git a/nova/templates/daemonset-compute.yaml b/nova/templates/daemonset-compute.yaml index 43e53d72d7..fd1f37f431 100644 --- a/nova/templates/daemonset-compute.yaml +++ b/nova/templates/daemonset-compute.yaml @@ -203,8 +203,8 @@ spec: - --use-fqdn {{- end }} initialDelaySeconds: 80 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 190 + timeoutSeconds: 185 livenessProbe: exec: command: @@ -219,8 +219,8 @@ spec: - --use-fqdn {{- end }} initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 command: - /tmp/nova-compute.sh volumeMounts: diff --git a/nova/templates/deployment-conductor.yaml b/nova/templates/deployment-conductor.yaml index f927afa6ae..ad511646a1 100644 --- a/nova/templates/deployment-conductor.yaml +++ b/nova/templates/deployment-conductor.yaml @@ -69,8 +69,8 @@ spec: - --service-queue-name - conductor initialDelaySeconds: 80 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 190 + timeoutSeconds: 185 livenessProbe: exec: command: @@ -82,8 +82,8 @@ spec: - conductor - --liveness-probe initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 command: - /tmp/nova-conductor.sh volumeMounts: diff --git a/nova/templates/deployment-consoleauth.yaml b/nova/templates/deployment-consoleauth.yaml index b9cb71732e..ddeea3381b 100644 --- a/nova/templates/deployment-consoleauth.yaml +++ b/nova/templates/deployment-consoleauth.yaml @@ -69,8 +69,8 @@ spec: - --service-queue-name - consoleauth initialDelaySeconds: 80 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 190 + timeoutSeconds: 185 livenessProbe: exec: command: @@ -82,8 +82,8 @@ spec: - consoleauth - --liveness-probe initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 command: - /tmp/nova-consoleauth.sh volumeMounts: diff --git a/nova/templates/deployment-scheduler.yaml b/nova/templates/deployment-scheduler.yaml index cb9e9df35c..f2d5055e72 100644 --- a/nova/templates/deployment-scheduler.yaml +++ b/nova/templates/deployment-scheduler.yaml @@ -69,8 +69,8 @@ spec: - --service-queue-name - scheduler initialDelaySeconds: 80 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 190 + timeoutSeconds: 185 livenessProbe: exec: command: @@ -82,8 +82,8 @@ spec: - scheduler - --liveness-probe initialDelaySeconds: 120 - periodSeconds: 90 - timeoutSeconds: 70 + periodSeconds: 600 + timeoutSeconds: 580 command: - /tmp/nova-scheduler.sh volumeMounts: