From 09d0409ed4a69f514355925754e832e752f817ca Mon Sep 17 00:00:00 2001
From: Michal Arbet <michal.arbet@ultimum.io>
Date: Fri, 26 Feb 2021 17:50:31 +0100
Subject: [PATCH] Allow user to set sysctl_net_ipv4_tcp_retries2

This patch is adding configuration option to
manipulate with kernel option sysctl_net_ipv4_tcp_retries2.

More informations about kernel option in [1][2]
and RedHat suggestion [3] to set for DBs and HA.

[1]: https://pracucci.com/linux-tcp-rto-min-max-and-tcp-retries2.html
[2]: https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/
[3]: https://access.redhat.com/solutions/726753

Closes-Bug: #1917068
Change-Id: Ia0decbbfa4e33b1889b635f8bb1c9094567a2ce6
---
 ansible/roles/haproxy/defaults/main.yml       |  4 ++
 ansible/roles/haproxy/tasks/config-host.yml   |  7 +--
 .../high-availability/haproxy-guide.rst       | 47 +++++++++++++++++++
 .../reference/high-availability/index.rst     | 10 ++++
 doc/source/reference/index.rst                |  1 +
 ...die-after-VIP-switch-5f9e811783c36041.yaml | 13 +++++
 6 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 doc/source/reference/high-availability/haproxy-guide.rst
 create mode 100644 doc/source/reference/high-availability/index.rst
 create mode 100644 releasenotes/notes/fix-TCP-connections-refusing-to-die-after-VIP-switch-5f9e811783c36041.yaml

diff --git a/ansible/roles/haproxy/defaults/main.yml b/ansible/roles/haproxy/defaults/main.yml
index ca5a3be975..c596e03152 100644
--- a/ansible/roles/haproxy/defaults/main.yml
+++ b/ansible/roles/haproxy/defaults/main.yml
@@ -90,4 +90,8 @@ haproxy_check_timeout: "10s"
 # Check http://www.haproxy.org/download/1.5/doc/configuration.txt for available options
 haproxy_defaults_balance: "roundrobin"
 
+# Avoid TCP connections refusing to die after VIP switch
+# https://bugs.launchpad.net/kolla-ansible/+bug/1917068
+haproxy_host_ipv4_tcp_retries2: "KOLLA_UNSET"
+
 kolla_externally_managed_cert: False
diff --git a/ansible/roles/haproxy/tasks/config-host.yml b/ansible/roles/haproxy/tasks/config-host.yml
index cad68d2c16..46b262c7a4 100644
--- a/ansible/roles/haproxy/tasks/config-host.yml
+++ b/ansible/roles/haproxy/tasks/config-host.yml
@@ -10,9 +10,10 @@
     sysctl_file: "{{ kolla_sysctl_conf_path }}"
   become: true
   with_items:
-    - { name: "net.ipv4.ip_nonlocal_bind", value: 1}
-    - { name: "net.ipv6.ip_nonlocal_bind", value: 1}
-    - { name: "net.unix.max_dgram_qlen", value: 128}
+    - { name: "net.ipv4.ip_nonlocal_bind", value: 1 }
+    - { name: "net.ipv6.ip_nonlocal_bind", value: 1 }
+    - { name: "net.ipv4.tcp_retries2", value: "{{ haproxy_host_ipv4_tcp_retries2 }}" }
+    - { name: "net.unix.max_dgram_qlen", value: 128 }
   when:
     - set_sysctl | bool
     - item.value != 'KOLLA_SKIP'
diff --git a/doc/source/reference/high-availability/haproxy-guide.rst b/doc/source/reference/high-availability/haproxy-guide.rst
new file mode 100644
index 0000000000..ae7d90171e
--- /dev/null
+++ b/doc/source/reference/high-availability/haproxy-guide.rst
@@ -0,0 +1,47 @@
+.. _haproxy-guide:
+
+=============
+HAProxy Guide
+=============
+
+Kolla Ansible supports a Highly Available (HA) deployment of
+Openstack and other services. High-availability in Kolla
+is implented as via Keepalived and HAProxy. Keepalived manages virtual IP
+addresses, while HAProxy load-balances traffic to service backends.
+These two components must be installed on the same hosts
+and they are deployed to hosts in the ``haproxy`` group.
+
+Preparation and deployment
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+HAProxy and Keepalived are enabled by default. They may be disabled by
+setting the following in ``/etc/kolla/globals.yml``:
+
+.. code-block:: yaml
+
+   enable_haproxy: "no"
+   enable_keepalived: "no"
+
+Configuration
+~~~~~~~~~~~~~
+
+Failover tuning
+---------------
+
+When a VIP fails over from one host to another, hosts may take some
+time to detect that the connection has been dropped. This can lead
+to service downtime.
+
+To reduce the time by the kernel to close dead connections to VIP
+address, modify the ``net.ipv4.tcp_retries2`` kernel option by setting
+the following in ``/etc/kolla/globals.yml``:
+
+.. code-block:: yaml
+
+   haproxy_host_ipv4_tcp_retries2: 6
+
+This is especially helpful for connections to MariaDB. See
+`here <https://pracucci.com/linux-tcp-rto-min-max-and-tcp-retries2.html>`__,
+`here <https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/>`__ and
+`here <https://access.redhat.com/solutions/726753>`__ for
+further information about this kernel option.
diff --git a/doc/source/reference/high-availability/index.rst b/doc/source/reference/high-availability/index.rst
new file mode 100644
index 0000000000..176b859572
--- /dev/null
+++ b/doc/source/reference/high-availability/index.rst
@@ -0,0 +1,10 @@
+=================
+High-availability
+=================
+
+This section describes high-availability configuration of services.
+
+.. toctree::
+   :maxdepth: 1
+
+   haproxy-guide
diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst
index 358aef91e6..c6631cfd08 100644
--- a/doc/source/reference/index.rst
+++ b/doc/source/reference/index.rst
@@ -17,3 +17,4 @@ Projects Deployment Configuration Reference
    message-queues/index
    deployment-config/index
    deployment-and-bootstrapping/index
+   high-availability/index
diff --git a/releasenotes/notes/fix-TCP-connections-refusing-to-die-after-VIP-switch-5f9e811783c36041.yaml b/releasenotes/notes/fix-TCP-connections-refusing-to-die-after-VIP-switch-5f9e811783c36041.yaml
new file mode 100644
index 0000000000..185ba8eb83
--- /dev/null
+++ b/releasenotes/notes/fix-TCP-connections-refusing-to-die-after-VIP-switch-5f9e811783c36041.yaml
@@ -0,0 +1,13 @@
+---
+features:
+  - |
+    Added a new haproxy configuration variable,
+    ``haproxy_host_ipv4_tcp_retries2``,
+    which allows users to modify this kernel option.
+    This option sets maximum number of times a TCP packet is retransmitted
+    in established state before giving up. The default kernel value is 15,
+    which corresponds to a duration of approximately between 13 to 30
+    minutes, depending on the retransmission timeout. This variable can be used
+    to mitigate an issue with stuck connections in case of VIP failover,
+    see `bug 1917068 <https://bugs.launchpad.net/kolla-ansible/+bug/1917068>`__
+    for details.