Merge "Fix health-probe concurrency and timings"

This commit is contained in:
Zuul 2020-02-20 22:04:39 +00:00 committed by Gerrit Code Review
commit 8c6269f719
8 changed files with 97 additions and 36 deletions

View File

@ -39,8 +39,10 @@ Usage example for Neutron metadata agent:
import httplib2 import httplib2
from six.moves import http_client as httplib from six.moves import http_client as httplib
import json
import os import os
import psutil import psutil
import signal
import socket import socket
import sys import sys
@ -292,8 +294,36 @@ def test_rpc_liveness():
check_agent_status(transport) check_agent_status(transport)
def check_pid_running(pid):
if psutil.pid_exists(int(pid)):
return True
else:
return False
if __name__ == "__main__": if __name__ == "__main__":
if "liveness-probe" in ','.join(sys.argv):
pidfile = "/tmp/liveness.pid" #nosec
else:
pidfile = "/tmp/readiness.pid" #nosec
data = {}
if os.path.isfile(pidfile):
with open(pidfile,'r') as f:
data = json.load(f)
if check_pid_running(data['pid']):
if data['exit_count'] > 1:
# Third time in, kill the previous process
os.kill(int(data['pid']), signal.SIGTERM)
else:
data['exit_count'] = data['exit_count'] + 1
with open(pidfile, 'w') as f:
json.dump(data, f)
sys.exit(0)
data['pid'] = os.getpid()
data['exit_count'] = 0
with open(pidfile, 'w') as f:
json.dump(data, f)
if "sriov_agent.ini" in ','.join(sys.argv): if "sriov_agent.ini" in ','.join(sys.argv):
sriov_readiness_check() sriov_readiness_check()
elif "metadata_agent.ini" not in ','.join(sys.argv): elif "metadata_agent.ini" not in ','.join(sys.argv):

View File

@ -81,8 +81,8 @@ spec:
- --use-fqdn - --use-fqdn
{{- end }} {{- end }}
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 15 periodSeconds: 190
timeoutSeconds: 65 timeoutSeconds: 185
livenessProbe: livenessProbe:
exec: exec:
command: command:
@ -99,8 +99,8 @@ spec:
- --use-fqdn - --use-fqdn
{{- end }} {{- end }}
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
command: command:
- /tmp/neutron-l2gw-agent.sh - /tmp/neutron-l2gw-agent.sh
volumeMounts: volumeMounts:

View File

@ -345,28 +345,28 @@ pod:
enabled: true enabled: true
params: params:
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 15 periodSeconds: 190
timeoutSeconds: 65 timeoutSeconds: 185
liveness: liveness:
enabled: true enabled: true
params: params:
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
l3_agent: l3_agent:
l3_agent: l3_agent:
readiness: readiness:
enabled: true enabled: true
params: params:
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 15 periodSeconds: 190
timeoutSeconds: 65 timeoutSeconds: 185
liveness: liveness:
enabled: true enabled: true
params: params:
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
lb_agent: lb_agent:
lb_agent: lb_agent:
readiness: readiness:
@ -377,14 +377,14 @@ pod:
enabled: true enabled: true
params: params:
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 15 periodSeconds: 190
timeoutSeconds: 65 timeoutSeconds: 185
liveness: liveness:
enabled: true enabled: true
params: params:
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
ovs_agent: ovs_agent:
ovs_agent: ovs_agent:
readiness: readiness:
@ -394,16 +394,16 @@ pod:
enabled: true enabled: true
params: params:
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
sriov_agent: sriov_agent:
sriov_agent: sriov_agent:
readiness: readiness:
enabled: true enabled: true
params: params:
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 15 periodSeconds: 190
timeoutSeconds: 65 timeoutSeconds: 185
server: server:
server: server:
readiness: readiness:

View File

@ -33,7 +33,10 @@ Usage example for Nova Compute:
""" """
import json
import os
import psutil import psutil
import signal
import socket import socket
import sys import sys
@ -218,8 +221,36 @@ def test_rpc_liveness():
check_service_status(transport) check_service_status(transport)
def check_pid_running(pid):
if psutil.pid_exists(int(pid)):
return True
else:
return False
if __name__ == "__main__": if __name__ == "__main__":
if "liveness-probe" in ','.join(sys.argv):
pidfile = "/tmp/liveness.pid" #nosec
else:
pidfile = "/tmp/readiness.pid" #nosec
data = {}
if os.path.isfile(pidfile):
with open(pidfile,'r') as f:
data = json.load(f)
if check_pid_running(data['pid']):
if data['exit_count'] > 1:
# Third time in, kill the previous process
os.kill(int(data['pid']), signal.SIGTERM)
else:
data['exit_count'] = data['exit_count'] + 1
with open(pidfile, 'w') as f:
json.dump(data, f)
sys.exit(0)
data['pid'] = os.getpid()
data['exit_count'] = 0
with open(pidfile, 'w') as f:
json.dump(data, f)
test_rpc_liveness() test_rpc_liveness()
sys.exit(0) # return success sys.exit(0) # return success

View File

@ -203,8 +203,8 @@ spec:
- --use-fqdn - --use-fqdn
{{- end }} {{- end }}
initialDelaySeconds: 80 initialDelaySeconds: 80
periodSeconds: 90 periodSeconds: 190
timeoutSeconds: 70 timeoutSeconds: 185
livenessProbe: livenessProbe:
exec: exec:
command: command:
@ -219,8 +219,8 @@ spec:
- --use-fqdn - --use-fqdn
{{- end }} {{- end }}
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
command: command:
- /tmp/nova-compute.sh - /tmp/nova-compute.sh
volumeMounts: volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name - --service-queue-name
- conductor - conductor
initialDelaySeconds: 80 initialDelaySeconds: 80
periodSeconds: 90 periodSeconds: 190
timeoutSeconds: 70 timeoutSeconds: 185
livenessProbe: livenessProbe:
exec: exec:
command: command:
@ -82,8 +82,8 @@ spec:
- conductor - conductor
- --liveness-probe - --liveness-probe
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
command: command:
- /tmp/nova-conductor.sh - /tmp/nova-conductor.sh
volumeMounts: volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name - --service-queue-name
- consoleauth - consoleauth
initialDelaySeconds: 80 initialDelaySeconds: 80
periodSeconds: 90 periodSeconds: 190
timeoutSeconds: 70 timeoutSeconds: 185
livenessProbe: livenessProbe:
exec: exec:
command: command:
@ -82,8 +82,8 @@ spec:
- consoleauth - consoleauth
- --liveness-probe - --liveness-probe
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
command: command:
- /tmp/nova-consoleauth.sh - /tmp/nova-consoleauth.sh
volumeMounts: volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name - --service-queue-name
- scheduler - scheduler
initialDelaySeconds: 80 initialDelaySeconds: 80
periodSeconds: 90 periodSeconds: 190
timeoutSeconds: 70 timeoutSeconds: 185
livenessProbe: livenessProbe:
exec: exec:
command: command:
@ -82,8 +82,8 @@ spec:
- scheduler - scheduler
- --liveness-probe - --liveness-probe
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 600
timeoutSeconds: 70 timeoutSeconds: 580
command: command:
- /tmp/nova-scheduler.sh - /tmp/nova-scheduler.sh
volumeMounts: volumeMounts: