From d514395d81b459ad2cda56034cadb3d79e4bafc3 Mon Sep 17 00:00:00 2001 From: "Anderson, Craig (ca846m)" Date: Thu, 17 Feb 2022 22:23:52 -0800 Subject: [PATCH] Improve health probe logging for nova and neutron 1. Log specific compute services failing rabbitMQ socket tests in nova health probe 2. Log specific compute services failing Database socket tests in nova health probe 3. Make log level configurable for nova and neutron health probes Change-Id: I5e5d909d598af734596eb1732ae42808c1f6cd12 --- neutron/Chart.yaml | 2 +- neutron/templates/bin/_health-probe.py.tpl | 2 +- neutron/values.yaml | 4 ++++ nova/Chart.yaml | 2 +- nova/templates/bin/_health-probe.py.tpl | 9 ++++++--- nova/values.yaml | 4 ++++ releasenotes/notes/neutron.yaml | 1 + releasenotes/notes/nova.yaml | 1 + 8 files changed, 19 insertions(+), 6 deletions(-) diff --git a/neutron/Chart.yaml b/neutron/Chart.yaml index 84c2e83824..33c6a740c0 100644 --- a/neutron/Chart.yaml +++ b/neutron/Chart.yaml @@ -14,7 +14,7 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Neutron name: neutron -version: 0.2.10 +version: 0.2.11 home: https://docs.openstack.org/neutron/latest/ icon: https://www.openstack.org/themes/openstack/images/project-mascots/Neutron/OpenStack_Project_Neutron_vertical.png sources: diff --git a/neutron/templates/bin/_health-probe.py.tpl b/neutron/templates/bin/_health-probe.py.tpl index 3012757c52..faa9702703 100644 --- a/neutron/templates/bin/_health-probe.py.tpl +++ b/neutron/templates/bin/_health-probe.py.tpl @@ -53,7 +53,7 @@ rpc_timeout = int(os.getenv('RPC_PROBE_TIMEOUT', '60')) rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2')) rabbit_port = 5672 tcp_established = "ESTABLISHED" -log.logging.basicConfig(level=log.ERROR) +log.logging.basicConfig(level=log.{{ .Values.health_probe.logging.level }}) def _get_hostname(use_fqdn): diff --git a/neutron/values.yaml b/neutron/values.yaml index c72a55b52c..2d7761446b 100644 --- a/neutron/values.yaml +++ b/neutron/values.yaml @@ -2516,6 +2516,10 @@ network_policy: helm3_hook: true +health_probe: + logging: + level: ERROR + manifests: certificates: false configmap_bin: true diff --git a/nova/Chart.yaml b/nova/Chart.yaml index d6c0e89e50..311370bd88 100644 --- a/nova/Chart.yaml +++ b/nova/Chart.yaml @@ -14,7 +14,7 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Nova name: nova -version: 0.2.29 +version: 0.2.30 home: https://docs.openstack.org/nova/latest/ icon: https://www.openstack.org/themes/openstack/images/project-mascots/Nova/OpenStack_Project_Nova_vertical.png sources: diff --git a/nova/templates/bin/_health-probe.py.tpl b/nova/templates/bin/_health-probe.py.tpl index fa3fd923cc..393d3067d3 100644 --- a/nova/templates/bin/_health-probe.py.tpl +++ b/nova/templates/bin/_health-probe.py.tpl @@ -161,8 +161,10 @@ def test_tcp_socket(service): if service in dict_services: proc = dict_services[service] + transport = oslo_messaging.TransportURL.parse(cfg.CONF) if r_ports and tcp_socket_status(proc, r_ports) == 0: - sys.stderr.write("RabbitMQ socket not established") + sys.stderr.write("RabbitMQ socket not established for service " + "%s with transport %s" % (proc, transport)) # Do not kill the pod if RabbitMQ is not reachable/down if not cfg.CONF.liveness_probe: sys.exit(1) @@ -170,7 +172,8 @@ def test_tcp_socket(service): # let's do the db check if service != "compute": if d_ports and tcp_socket_status(proc, d_ports) == 0: - sys.stderr.write("Database socket not established") + sys.stderr.write("Database socket not established for service " + "%s with transport %s" % (proc, transport)) # Do not kill the pod if database is not reachable/down # there could be no socket as well as typically connections # get closed after an idle timeout @@ -194,7 +197,7 @@ def test_rpc_liveness(): cfg.CONF(sys.argv[1:]) - log.logging.basicConfig(level=log.ERROR) + log.logging.basicConfig(level=log.{{ .Values.health_probe.logging.level }}) try: transport = oslo_messaging.get_transport(cfg.CONF) diff --git a/nova/values.yaml b/nova/values.yaml index 72dd0d1130..7d4c1e589c 100644 --- a/nova/values.yaml +++ b/nova/values.yaml @@ -2556,6 +2556,10 @@ network_policy: # set helm3_hook: false when using the helm2 binary. helm3_hook: true +health_probe: + logging: + level: ERROR + manifests: certificates: false configmap_bin: true diff --git a/releasenotes/notes/neutron.yaml b/releasenotes/notes/neutron.yaml index 94ffb92fb9..9c9acfd95e 100644 --- a/releasenotes/notes/neutron.yaml +++ b/releasenotes/notes/neutron.yaml @@ -24,4 +24,5 @@ neutron: - 0.2.8 Add Victoria and Wallaby releases support - 0.2.9 Add option to disable helm.sh/hook annotations - 0.2.10 Update htk requirements repo + - 0.2.11 Improve health probe logging ... diff --git a/releasenotes/notes/nova.yaml b/releasenotes/notes/nova.yaml index fe0af75e39..f51e1942ee 100644 --- a/releasenotes/notes/nova.yaml +++ b/releasenotes/notes/nova.yaml @@ -50,4 +50,5 @@ nova: - 0.2.27 Add tls1.2 minimum version to tls overrides - 0.2.28 Move ssl_minimum_version to console section - 0.2.29 Remove ssh-config + - 0.2.30 Improve health probe logging ...