From 0d332a6332be6b000c4f8f81354ea069854e0159 Mon Sep 17 00:00:00 2001 From: Andy Ning Date: Tue, 23 Apr 2024 16:26:54 -0400 Subject: [PATCH] Increase etcd health check timeout Under high load, etcd /health check QGET times out occasionally. This has been observed during IPsec enabled system deployment when controller-1 is unlocked and drbd is synchronizing. In such cases the etcd /health check timed out and causes uncontrolled swact. This change increase the timeout value to 5s. Test Plan (DX system): PASS: etcd package build and image build. PASS: controller-0 successfully installed, bootstrapped and unlocked, with IPsec enabled. PASS: controller-1 successfully installed, IPsec configed and enabled, IPsec SAs established between controllers. PASS: After controller-1 is unlocked, verify there is no uncontrolled swact during drbd synchronization, and controller-1 comes up in "enabled" and "available" state. Story: 2010940 Task: 49930 Change-Id: I7ba66599de255c204157de82115a415d5568920d Signed-off-by: Andy Ning --- .../0001-Increate-health-check-timeout.patch | 32 +++++++++++++++++++ .../etcd/debian/deb_folder/patches/series | 1 + 2 files changed, 33 insertions(+) create mode 100644 kubernetes/etcd/debian/deb_folder/patches/0001-Increate-health-check-timeout.patch create mode 100644 kubernetes/etcd/debian/deb_folder/patches/series diff --git a/kubernetes/etcd/debian/deb_folder/patches/0001-Increate-health-check-timeout.patch b/kubernetes/etcd/debian/deb_folder/patches/0001-Increate-health-check-timeout.patch new file mode 100644 index 000000000..dc56a6b35 --- /dev/null +++ b/kubernetes/etcd/debian/deb_folder/patches/0001-Increate-health-check-timeout.patch @@ -0,0 +1,32 @@ +From ec992b6080f5fff7545a2d5026f444674ae1b0f1 Mon Sep 17 00:00:00 2001 +From: Andy Ning +Date: Fri, 19 Apr 2024 11:28:39 -0400 +Subject: [PATCH 1/1] Increate health check timeout + +Under high load, the /health check QGET times out occasionally. +This change increase the timeout value to 5s. + +Signed-off-by: Andy Ning +--- + etcdserver/api/etcdhttp/metrics.go | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/etcdserver/api/etcdhttp/metrics.go b/etcdserver/api/etcdhttp/metrics.go +index e5c062e..d12ead0 100644 +--- a/etcdserver/api/etcdhttp/metrics.go ++++ b/etcdserver/api/etcdhttp/metrics.go +@@ -134,7 +134,10 @@ func checkHealth(srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { + } + + if h.Health == "true" { +- ctx, cancel := context.WithTimeout(context.Background(), time.Second) ++ time_out := time.Second*5 ++ plog.Debugf("/health check; QGET timeout: %v", time_out) ++ ++ ctx, cancel := context.WithTimeout(context.Background(), time_out) + _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"}) + cancel() + if err != nil { +-- +2.25.1 + diff --git a/kubernetes/etcd/debian/deb_folder/patches/series b/kubernetes/etcd/debian/deb_folder/patches/series new file mode 100644 index 000000000..766ca7647 --- /dev/null +++ b/kubernetes/etcd/debian/deb_folder/patches/series @@ -0,0 +1 @@ +0001-Increate-health-check-timeout.patch