From 931e4eba54368c67111bddd878e91e5cd8a6b514 Mon Sep 17 00:00:00 2001 From: Itxaka Date: Wed, 20 Mar 2019 16:55:52 +0100 Subject: [PATCH] Add an option to the health probe to test all pids On some services it looks like the parent pid does not connect to rabbitmq and its the children the ones that do instead, for example in nova-scheduler from rocky version onwards. The current health check only checks for the main parent pid to see if it has an active connection to the rabbitmq port. This patch adds a flag to allow the health probe to check all processes for the mysql/rabbit connection instead of skipping any children process. It also enables it by default for nova-scheduler as it wont affect older versions than only run 1 process, but will work on later versions where the main process forks. Change-Id: I9677fd2aff11b563ab18059927ca12d5ace107ce --- nova/templates/bin/_health-probe.py.tpl | 4 +++- nova/templates/deployment-scheduler.yaml | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/nova/templates/bin/_health-probe.py.tpl b/nova/templates/bin/_health-probe.py.tpl index 683387476a..9518e7fb9f 100644 --- a/nova/templates/bin/_health-probe.py.tpl +++ b/nova/templates/bin/_health-probe.py.tpl @@ -92,7 +92,7 @@ def tcp_socket_status(process, port): if parentId == 0: parentId = p.pid else: - if p.ppid() == parentId: + if p.ppid() == parentId and not cfg.CONF.check_all_pids: continue pcon = p.connections() for con in pcon: @@ -172,6 +172,8 @@ def test_rpc_liveness(): cfg.CONF.register_cli_opt(cfg.StrOpt('service-queue-name')) cfg.CONF.register_cli_opt(cfg.BoolOpt('liveness-probe', default=False, required=False)) + cfg.CONF.register_cli_opt(cfg.BoolOpt('check-all-pids', default=False, + required=False)) cfg.CONF(sys.argv[1:]) diff --git a/nova/templates/deployment-scheduler.yaml b/nova/templates/deployment-scheduler.yaml index 9611d9509f..c75eb02482 100644 --- a/nova/templates/deployment-scheduler.yaml +++ b/nova/templates/deployment-scheduler.yaml @@ -69,6 +69,7 @@ spec: - /etc/nova/nova.conf - --service-queue-name - scheduler + - --check-all-pids initialDelaySeconds: 80 periodSeconds: 90 timeoutSeconds: 70 @@ -82,6 +83,7 @@ spec: - --service-queue-name - scheduler - --liveness-probe + - --check-all-pids initialDelaySeconds: 120 periodSeconds: 90 timeoutSeconds: 70