Fix health probe for several conductor workers
It was observed that when increasing amount of conductor workers from default "1" to higher value the readiness probe fails to check rabbitmq connections for conductor processes - it happens since the script is trying to obtain rabbitmq connections for parent conductor process which in case of workers>1 doesn`t open rabbit connections but spawns child processes which handle rabbitmq connections instead. This patch removes the "check-all-pids" option, keeps the logic but simplifies and fastens he code - instead of checking all processes when "check-all-pids" option was set (however regardless of "sock_count value" if only one process opens connection the check returns positive result) processes will be checked one-by-one until the first one with open rabbitmq connection(s) is found. Change-Id: I72be0bbdefcba77a55b6ceed6e192c9621c069eb
This commit is contained in:
parent
4844a63543
commit
d467d685a3
@ -97,18 +97,10 @@ def check_service_status(transport):
|
|||||||
|
|
||||||
def tcp_socket_status(process, ports):
|
def tcp_socket_status(process, ports):
|
||||||
"""Check the tcp socket status on a process"""
|
"""Check the tcp socket status on a process"""
|
||||||
sock_count = 0
|
|
||||||
parentId = 0
|
|
||||||
for p in psutil.process_iter():
|
for p in psutil.process_iter():
|
||||||
try:
|
try:
|
||||||
with p.oneshot():
|
with p.oneshot():
|
||||||
if process in " ".join(p.cmdline()):
|
if process in " ".join(p.cmdline()):
|
||||||
if parentId == 0:
|
|
||||||
parentId = p.pid
|
|
||||||
else:
|
|
||||||
if p.ppid() == parentId and \
|
|
||||||
not cfg.CONF.check_all_pids:
|
|
||||||
continue
|
|
||||||
pcon = p.connections()
|
pcon = p.connections()
|
||||||
for con in pcon:
|
for con in pcon:
|
||||||
try:
|
try:
|
||||||
@ -117,14 +109,10 @@ def tcp_socket_status(process, ports):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
continue
|
continue
|
||||||
if rport in ports and status == tcp_established:
|
if rport in ports and status == tcp_established:
|
||||||
sock_count = sock_count + 1
|
return 1
|
||||||
except psutil.Error:
|
except psutil.Error:
|
||||||
continue
|
continue
|
||||||
|
return 0
|
||||||
if sock_count == 0:
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
def configured_port_in_conf():
|
def configured_port_in_conf():
|
||||||
@ -198,8 +186,6 @@ def test_rpc_liveness():
|
|||||||
cfg.CONF.register_cli_opt(cfg.StrOpt('service-queue-name'))
|
cfg.CONF.register_cli_opt(cfg.StrOpt('service-queue-name'))
|
||||||
cfg.CONF.register_cli_opt(cfg.BoolOpt('liveness-probe', default=False,
|
cfg.CONF.register_cli_opt(cfg.BoolOpt('liveness-probe', default=False,
|
||||||
required=False))
|
required=False))
|
||||||
cfg.CONF.register_cli_opt(cfg.BoolOpt('check-all-pids', default=False,
|
|
||||||
required=False))
|
|
||||||
cfg.CONF.register_cli_opt(cfg.BoolOpt('use-fqdn', default=False,
|
cfg.CONF.register_cli_opt(cfg.BoolOpt('use-fqdn', default=False,
|
||||||
required=False))
|
required=False))
|
||||||
|
|
||||||
|
@ -68,7 +68,6 @@ spec:
|
|||||||
- /etc/nova/nova.conf
|
- /etc/nova/nova.conf
|
||||||
- --service-queue-name
|
- --service-queue-name
|
||||||
- scheduler
|
- scheduler
|
||||||
- --check-all-pids
|
|
||||||
initialDelaySeconds: 80
|
initialDelaySeconds: 80
|
||||||
periodSeconds: 90
|
periodSeconds: 90
|
||||||
timeoutSeconds: 70
|
timeoutSeconds: 70
|
||||||
@ -82,7 +81,6 @@ spec:
|
|||||||
- --service-queue-name
|
- --service-queue-name
|
||||||
- scheduler
|
- scheduler
|
||||||
- --liveness-probe
|
- --liveness-probe
|
||||||
- --check-all-pids
|
|
||||||
initialDelaySeconds: 120
|
initialDelaySeconds: 120
|
||||||
periodSeconds: 90
|
periodSeconds: 90
|
||||||
timeoutSeconds: 70
|
timeoutSeconds: 70
|
||||||
|
Loading…
x
Reference in New Issue
Block a user