Merge "Fix health-probe concurrency and timings"

This commit is contained in:
Zuul 2020-02-20 22:04:39 +00:00 committed by Gerrit Code Review
commit 8c6269f719
8 changed files with 97 additions and 36 deletions

View File

@ -39,8 +39,10 @@ Usage example for Neutron metadata agent:
import httplib2
from six.moves import http_client as httplib
import json
import os
import psutil
import signal
import socket
import sys
@ -292,8 +294,36 @@ def test_rpc_liveness():
check_agent_status(transport)
def check_pid_running(pid):
if psutil.pid_exists(int(pid)):
return True
else:
return False
if __name__ == "__main__":
if "liveness-probe" in ','.join(sys.argv):
pidfile = "/tmp/liveness.pid" #nosec
else:
pidfile = "/tmp/readiness.pid" #nosec
data = {}
if os.path.isfile(pidfile):
with open(pidfile,'r') as f:
data = json.load(f)
if check_pid_running(data['pid']):
if data['exit_count'] > 1:
# Third time in, kill the previous process
os.kill(int(data['pid']), signal.SIGTERM)
else:
data['exit_count'] = data['exit_count'] + 1
with open(pidfile, 'w') as f:
json.dump(data, f)
sys.exit(0)
data['pid'] = os.getpid()
data['exit_count'] = 0
with open(pidfile, 'w') as f:
json.dump(data, f)
if "sriov_agent.ini" in ','.join(sys.argv):
sriov_readiness_check()
elif "metadata_agent.ini" not in ','.join(sys.argv):

View File

@ -81,8 +81,8 @@ spec:
- --use-fqdn
{{- end }}
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 65
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
@ -99,8 +99,8 @@ spec:
- --use-fqdn
{{- end }}
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
command:
- /tmp/neutron-l2gw-agent.sh
volumeMounts:

View File

@ -345,28 +345,28 @@ pod:
enabled: true
params:
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 65
periodSeconds: 190
timeoutSeconds: 185
liveness:
enabled: true
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
l3_agent:
l3_agent:
readiness:
enabled: true
params:
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 65
periodSeconds: 190
timeoutSeconds: 185
liveness:
enabled: true
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
lb_agent:
lb_agent:
readiness:
@ -377,14 +377,14 @@ pod:
enabled: true
params:
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 65
periodSeconds: 190
timeoutSeconds: 185
liveness:
enabled: true
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
ovs_agent:
ovs_agent:
readiness:
@ -394,16 +394,16 @@ pod:
enabled: true
params:
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
sriov_agent:
sriov_agent:
readiness:
enabled: true
params:
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 65
periodSeconds: 190
timeoutSeconds: 185
server:
server:
readiness:

View File

@ -33,7 +33,10 @@ Usage example for Nova Compute:
"""
import json
import os
import psutil
import signal
import socket
import sys
@ -218,8 +221,36 @@ def test_rpc_liveness():
check_service_status(transport)
def check_pid_running(pid):
if psutil.pid_exists(int(pid)):
return True
else:
return False
if __name__ == "__main__":
if "liveness-probe" in ','.join(sys.argv):
pidfile = "/tmp/liveness.pid" #nosec
else:
pidfile = "/tmp/readiness.pid" #nosec
data = {}
if os.path.isfile(pidfile):
with open(pidfile,'r') as f:
data = json.load(f)
if check_pid_running(data['pid']):
if data['exit_count'] > 1:
# Third time in, kill the previous process
os.kill(int(data['pid']), signal.SIGTERM)
else:
data['exit_count'] = data['exit_count'] + 1
with open(pidfile, 'w') as f:
json.dump(data, f)
sys.exit(0)
data['pid'] = os.getpid()
data['exit_count'] = 0
with open(pidfile, 'w') as f:
json.dump(data, f)
test_rpc_liveness()
sys.exit(0) # return success

View File

@ -203,8 +203,8 @@ spec:
- --use-fqdn
{{- end }}
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
@ -219,8 +219,8 @@ spec:
- --use-fqdn
{{- end }}
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
command:
- /tmp/nova-compute.sh
volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name
- conductor
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
@ -82,8 +82,8 @@ spec:
- conductor
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
command:
- /tmp/nova-conductor.sh
volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name
- consoleauth
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
@ -82,8 +82,8 @@ spec:
- consoleauth
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
command:
- /tmp/nova-consoleauth.sh
volumeMounts:

View File

@ -69,8 +69,8 @@ spec:
- --service-queue-name
- scheduler
initialDelaySeconds: 80
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 190
timeoutSeconds: 185
livenessProbe:
exec:
command:
@ -82,8 +82,8 @@ spec:
- scheduler
- --liveness-probe
initialDelaySeconds: 120
periodSeconds: 90
timeoutSeconds: 70
periodSeconds: 600
timeoutSeconds: 580
command:
- /tmp/nova-scheduler.sh
volumeMounts: