Fix health probe for several conductor workers

It was observed that when increasing amount of
conductor workers from default "1" to higher value
the readiness probe fails to check rabbitmq connections
for conductor processes - it happens since the script is trying
to obtain rabbitmq connections for parent conductor process
which in case of workers>1 doesn`t open rabbit connections
but spawns child processes which handle rabbitmq
connections instead.
This patch removes the "check-all-pids" option, keeps the logic
but simplifies and fastens he code - instead of checking all
processes when "check-all-pids" option was set (however
regardless of "sock_count value" if only one process opens connection
the check returns positive result) processes will be checked one-by-one
until the first one with open rabbitmq connection(s) is
found.

Change-Id: I72be0bbdefcba77a55b6ceed6e192c9621c069eb
This commit is contained in:
Oleksii Grudev 2019-11-13 15:28:14 +02:00 committed by Alex Grudev
parent 4844a63543
commit d467d685a3
2 changed files with 2 additions and 18 deletions

View File

@ -97,18 +97,10 @@ def check_service_status(transport):
def tcp_socket_status(process, ports): def tcp_socket_status(process, ports):
"""Check the tcp socket status on a process""" """Check the tcp socket status on a process"""
sock_count = 0
parentId = 0
for p in psutil.process_iter(): for p in psutil.process_iter():
try: try:
with p.oneshot(): with p.oneshot():
if process in " ".join(p.cmdline()): if process in " ".join(p.cmdline()):
if parentId == 0:
parentId = p.pid
else:
if p.ppid() == parentId and \
not cfg.CONF.check_all_pids:
continue
pcon = p.connections() pcon = p.connections()
for con in pcon: for con in pcon:
try: try:
@ -117,14 +109,10 @@ def tcp_socket_status(process, ports):
except IndexError: except IndexError:
continue continue
if rport in ports and status == tcp_established: if rport in ports and status == tcp_established:
sock_count = sock_count + 1 return 1
except psutil.Error: except psutil.Error:
continue continue
return 0
if sock_count == 0:
return 0
else:
return 1
def configured_port_in_conf(): def configured_port_in_conf():
@ -198,8 +186,6 @@ def test_rpc_liveness():
cfg.CONF.register_cli_opt(cfg.StrOpt('service-queue-name')) cfg.CONF.register_cli_opt(cfg.StrOpt('service-queue-name'))
cfg.CONF.register_cli_opt(cfg.BoolOpt('liveness-probe', default=False, cfg.CONF.register_cli_opt(cfg.BoolOpt('liveness-probe', default=False,
required=False)) required=False))
cfg.CONF.register_cli_opt(cfg.BoolOpt('check-all-pids', default=False,
required=False))
cfg.CONF.register_cli_opt(cfg.BoolOpt('use-fqdn', default=False, cfg.CONF.register_cli_opt(cfg.BoolOpt('use-fqdn', default=False,
required=False)) required=False))

View File

@ -68,7 +68,6 @@ spec:
- /etc/nova/nova.conf - /etc/nova/nova.conf
- --service-queue-name - --service-queue-name
- scheduler - scheduler
- --check-all-pids
initialDelaySeconds: 80 initialDelaySeconds: 80
periodSeconds: 90 periodSeconds: 90
timeoutSeconds: 70 timeoutSeconds: 70
@ -82,7 +81,6 @@ spec:
- --service-queue-name - --service-queue-name
- scheduler - scheduler
- --liveness-probe - --liveness-probe
- --check-all-pids
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 90 periodSeconds: 90
timeoutSeconds: 70 timeoutSeconds: 70