From 6969a5d596f0c124ae3a776f4ec37a6a7654a3be Mon Sep 17 00:00:00 2001 From: Oleksii Grudev Date: Mon, 10 Feb 2020 17:41:40 +0200 Subject: [PATCH] [neutron] Unhardcode probes timings This patch adds ability to unhardcode readiness/ liveness probes timings for those probes which still were hardcoded. Moreover it introduces RPC_PROBE_TIMEOUT and RPC_PROBE_RETRIES variables which are passed to health probe script and allow to unhardcode RPCtest timeout and number of retries Change-Id: I2e48eed26abb82208a4ac4ae596d27ca8db99c90 --- neutron/templates/bin/_health-probe.py.tpl | 6 +- neutron/templates/daemonset-bagpipe-bgp.yaml | 19 +++-- neutron/templates/daemonset-dhcp-agent.yaml | 5 ++ neutron/templates/daemonset-l2gw-agent.yaml | 75 ++++++++++--------- neutron/templates/daemonset-l3-agent.yaml | 5 ++ .../templates/daemonset-metadata-agent.yaml | 5 ++ neutron/templates/daemonset-ovs-agent.yaml | 5 ++ neutron/templates/daemonset-sriov-agent.yaml | 5 ++ neutron/values.yaml | 25 +++++++ 9 files changed, 106 insertions(+), 44 deletions(-) diff --git a/neutron/templates/bin/_health-probe.py.tpl b/neutron/templates/bin/_health-probe.py.tpl index 0aa4a5647d..01ace1e8e1 100644 --- a/neutron/templates/bin/_health-probe.py.tpl +++ b/neutron/templates/bin/_health-probe.py.tpl @@ -51,6 +51,8 @@ from oslo_context import context from oslo_log import log import oslo_messaging +rpc_timeout = int(os.getenv('RPC_PROBE_TIMEOUT', '60')) +rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2')) rabbit_port = 5672 tcp_established = "ESTABLISHED" log.logging.basicConfig(level=log.ERROR) @@ -69,8 +71,8 @@ def check_agent_status(transport): topic=cfg.CONF.agent_queue_name, server=_get_hostname(use_fqdn)) client = oslo_messaging.RPCClient(transport, target, - timeout=60, - retry=2) + timeout=rpc_timeout, + retry=rpc_retries) client.call(context.RequestContext(), 'pod_health_probe_method_ignore_errors') except oslo_messaging.exceptions.MessageDeliveryFailure: diff --git a/neutron/templates/daemonset-bagpipe-bgp.yaml b/neutron/templates/daemonset-bagpipe-bgp.yaml index ebd02738e4..5131fc4b19 100644 --- a/neutron/templates/daemonset-bagpipe-bgp.yaml +++ b/neutron/templates/daemonset-bagpipe-bgp.yaml @@ -14,6 +14,16 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- define "bagpipeBgpLivenessProbeTemplate" }} +tcpSocket: + port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} +{{- end }} + +{{- define "bagpipeBgpReadinessProbeTemplate" }} +tcpSocket: + port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} +{{- end }} + {{- define "neutron.bagpipe_bgp.daemonset" }} {{- $daemonset := index . 0 }} {{- $configMapName := index . 1 }} @@ -81,13 +91,8 @@ spec: {{ tuple $envAll "neutron_bagpipe_bgp" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.bagpipe_bgp | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} {{ dict "envAll" $envAll "application" "neutron_bagpipe_bgp" "container" "neutron_bagpipe_bgp" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} - readinessProbe: - tcpSocket: - port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - livenessProbe: - tcpSocket: - port: {{ tuple "network" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - initialDelaySeconds: 60 +{{ dict "envAll" $envAll "component" "bagpipe_bgp" "container" "bagpipe_bgp" "type" "liveness" "probeTemplate" (include "bagpipeBgpLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} +{{ dict "envAll" $envAll "component" "bagpipe_bgp" "container" "bagpipe_bgp" "type" "readiness" "probeTemplate" (include "bagpipeBgpReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} command: - /tmp/neutron-bagpipe-bgp.sh volumeMounts: diff --git a/neutron/templates/daemonset-dhcp-agent.yaml b/neutron/templates/daemonset-dhcp-agent.yaml index 187e491469..7a7496601d 100644 --- a/neutron/templates/daemonset-dhcp-agent.yaml +++ b/neutron/templates/daemonset-dhcp-agent.yaml @@ -157,6 +157,11 @@ spec: {{ tuple $envAll "neutron_dhcp" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.dhcp | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} {{ dict "envAll" $envAll "application" "neutron_dhcp_agent" "container" "neutron_dhcp_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" {{ dict "envAll" $envAll "component" "dhcp_agent" "container" "dhcp_agent" "type" "readiness" "probeTemplate" (include "dhcpAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} {{ dict "envAll" $envAll "component" "dhcp_agent" "container" "dhcp_agent" "type" "liveness" "probeTemplate" (include "dhcpAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} command: diff --git a/neutron/templates/daemonset-l2gw-agent.yaml b/neutron/templates/daemonset-l2gw-agent.yaml index 3f673990cd..8affb30bd5 100644 --- a/neutron/templates/daemonset-l2gw-agent.yaml +++ b/neutron/templates/daemonset-l2gw-agent.yaml @@ -14,6 +14,39 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- define "l2gwAgentLivenessProbeTemplate" }} +exec: + command: + - python + - /tmp/health-probe.py + - --config-file + - /etc/neutron/neutron.conf + - --config-file + - /etc/neutron/l2gw_agent.ini + - --agent-queue-name + - l2gateway_agent + - --liveness-probe +{{- if .Values.pod.use_fqdn.neutron_agent }} + - --use-fqdn +{{- end }} +{{- end }} + +{{- define "l2gwAgentReadinessProbeTemplate" }} +exec: + command: + - python + - /tmp/health-probe.py + - --config-file + - /etc/neutron/neutron.conf + - --config-file + - /etc/neutron/l2gw_agent.ini + - --agent-queue-name + - l2gateway_agent +{{- if .Values.pod.use_fqdn.neutron_agent }} + - --use-fqdn +{{- end }} +{{- end }} + {{- define "neutron.l2gw_agent.daemonset" }} {{- $daemonset := index . 0 }} {{- $configMapName := index . 1 }} @@ -66,41 +99,13 @@ spec: {{ tuple $envAll $envAll.Values.pod.resources.agent.l2gw | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} securityContext: privileged: true - readinessProbe: - exec: - command: - - python - - /tmp/health-probe.py - - --config-file - - /etc/neutron/neutron.conf - - --config-file - - /etc/neutron/l2gw_agent.ini - - --agent-queue-name - - l2gateway_agent -{{- if .Values.pod.use_fqdn.neutron_agent }} - - --use-fqdn -{{- end }} - initialDelaySeconds: 30 - periodSeconds: 190 - timeoutSeconds: 185 - livenessProbe: - exec: - command: - - python - - /tmp/health-probe.py - - --config-file - - /etc/neutron/neutron.conf - - --config-file - - /etc/neutron/l2gw_agent.ini - - --agent-queue-name - - l2gateway_agent - - --liveness-probe -{{- if .Values.pod.use_fqdn.neutron_agent }} - - --use-fqdn -{{- end }} - initialDelaySeconds: 120 - periodSeconds: 600 - timeoutSeconds: 580 + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" +{{ dict "envAll" $envAll "component" "l2gw_agent" "container" "l2gw_agent" "type" "liveness" "probeTemplate" (include "l2gwAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} +{{ dict "envAll" $envAll "component" "l2gw_agent" "container" "l2gw_agent" "type" "readiness" "probeTemplate" (include "l2gwAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} command: - /tmp/neutron-l2gw-agent.sh volumeMounts: diff --git a/neutron/templates/daemonset-l3-agent.yaml b/neutron/templates/daemonset-l3-agent.yaml index 15861486d1..d85dc557ee 100644 --- a/neutron/templates/daemonset-l3-agent.yaml +++ b/neutron/templates/daemonset-l3-agent.yaml @@ -158,6 +158,11 @@ spec: {{ tuple $envAll "neutron_l3" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.l3 | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} {{ dict "envAll" $envAll "application" "neutron_l3_agent" "container" "neutron_l3_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" {{ dict "envAll" $envAll "component" "l3_agent" "container" "l3_agent" "type" "readiness" "probeTemplate" (include "l3AgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} {{ dict "envAll" $envAll "component" "l3_agent" "container" "l3_agent" "type" "liveness" "probeTemplate" (include "l3AgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} command: diff --git a/neutron/templates/daemonset-metadata-agent.yaml b/neutron/templates/daemonset-metadata-agent.yaml index 215508c8a6..84cbe1fd90 100644 --- a/neutron/templates/daemonset-metadata-agent.yaml +++ b/neutron/templates/daemonset-metadata-agent.yaml @@ -115,6 +115,11 @@ spec: - name: neutron-metadata-agent {{ tuple $envAll "neutron_metadata" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.metadata | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" {{ dict "envAll" $envAll "component" "metadata_agent" "container" "metadata_agent" "type" "readiness" "probeTemplate" (include "metadataAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} {{ dict "envAll" $envAll "component" "metadata_agent" "container" "metadata_agent" "type" "liveness" "probeTemplate" (include "metadataAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} securityContext: diff --git a/neutron/templates/daemonset-ovs-agent.yaml b/neutron/templates/daemonset-ovs-agent.yaml index 22c13a4aa6..c0f1766aab 100644 --- a/neutron/templates/daemonset-ovs-agent.yaml +++ b/neutron/templates/daemonset-ovs-agent.yaml @@ -179,6 +179,11 @@ spec: - name: neutron-ovs-agent {{ tuple $envAll "neutron_openvswitch_agent" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.ovs | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" {{ dict "envAll" $envAll "component" "ovs_agent" "container" "ovs_agent" "type" "readiness" "probeTemplate" (include "ovsAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} {{ dict "envAll" $envAll "component" "ovs_agent" "container" "ovs_agent" "type" "liveness" "probeTemplate" (include "ovsAgentLivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} {{ dict "envAll" $envAll "application" "neutron_ovs_agent" "container" "neutron_ovs_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} diff --git a/neutron/templates/daemonset-sriov-agent.yaml b/neutron/templates/daemonset-sriov-agent.yaml index 86b835a1ed..4ebac26798 100644 --- a/neutron/templates/daemonset-sriov-agent.yaml +++ b/neutron/templates/daemonset-sriov-agent.yaml @@ -137,6 +137,11 @@ spec: {{ tuple $envAll "neutron_sriov_agent" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.agent.sriov | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} {{ dict "envAll" $envAll "application" "neutron_sriov_agent" "container" "neutron_sriov_agent" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} + env: + - name: RPC_PROBE_TIMEOUT + value: "{{ .Values.pod.probes.rpc_timeout }}" + - name: RPC_PROBE_RETRIES + value: "{{ .Values.pod.probes.rpc_retries }}" {{ dict "envAll" $envAll "component" "sriov_agent" "container" "sriov_agent" "type" "readiness" "probeTemplate" (include "sriovAgentReadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 10 }} command: - /tmp/neutron-sriov-agent.sh diff --git a/neutron/values.yaml b/neutron/values.yaml index 3a9ecd7e1b..3a4691faa2 100644 --- a/neutron/values.yaml +++ b/neutron/values.yaml @@ -339,6 +339,8 @@ pod: use_fqdn: neutron_agent: true probes: + rpc_timeout: 60 + rpc_retries: 2 dhcp_agent: dhcp_agent: readiness: @@ -404,6 +406,29 @@ pod: initialDelaySeconds: 30 periodSeconds: 190 timeoutSeconds: 185 + bagpipe_bgp: + bagpipe_bgp: + readiness: + enabled: true + params: + liveness: + enabled: true + params: + initialDelaySeconds: 60 + l2gw_agent: + l2gw_agent: + readiness: + enabled: true + params: + initialDelaySeconds: 30 + periodSeconds: 15 + timeoutSeconds: 65 + liveness: + enabled: true + params: + initialDelaySeconds: 120 + periodSeconds: 90 + timeoutSeconds: 70 server: server: readiness: