iptables rules for not tracking GRE/VXLAN traffic
On some busy cloud deployments, it has been reported that the nodes hosting the neutron-openvswitch are getting their nf-conntrack tables full and starting to drop connections. The reason is that GRE/VXLAN use source port randomization, and the number of "unique" flows is high from the nf-conntrack's perspective. This is no surprise since flows are usually identified with their 5-tuple (srcip/[srcport]/dstip/ dstport/tproto) by network elements, and GRE/VXLAN are leveraging this fact to have an even distribution in load-balancing systems in between [1][2]. The randomization causes the nf_conntrack table to be filled with many GRE/VXLAN-related flows, eventually leading to connection drops in a busy environment. As there is no particular reason and benefit to track these flows at the moment, the solution is to exclude GRE/VXLAN traffic from nf-conntrack tracking. This can be done by putting rules with `-j NOTRACK` jump into relevant iptables chains, which many people already use as a solution to this problem. This change incorporates the relevant rules to the charm code, so the rules become present by default. [1] https://www.rfc-editor.org/rfc/rfc8086.html#section-3.2 [2] https://www.rfc-editor.org/rfc/rfc7348.html#section-5 Closes-bug: #1978806 Change-Id: I9f6c7ca5207a3d587cc9cc2995d9938921ad88f1 Signed-off-by: Marcin Wilk <marcin.wilk@canonical.com>
This commit is contained in:
parent
230a5397ed
commit
d6d6570973
@ -83,6 +83,7 @@ from neutron_ovs_utils import (
|
||||
purge_sriov_systemd_files,
|
||||
use_fqdn_hint,
|
||||
deferrable_services,
|
||||
configure_iptables_rules,
|
||||
)
|
||||
|
||||
hooks = Hooks()
|
||||
@ -336,6 +337,13 @@ def update_nrpe_config():
|
||||
nrpe_setup.write()
|
||||
|
||||
|
||||
@hooks.hook('start')
|
||||
def start_charm():
|
||||
# add conntrack related iptables rules upon restart
|
||||
log('Configuring iptables NOTRACK rules for GRE/VXLAN tunnels')
|
||||
configure_iptables_rules()
|
||||
|
||||
|
||||
@hooks.hook('update-status')
|
||||
def dummy_update_status():
|
||||
"""Dummy function to silence missing hook log entry"""
|
||||
|
@ -826,6 +826,9 @@ def configure_ovs():
|
||||
if not init_is_systemd():
|
||||
service_restart('os-charm-phy-nic-mtu')
|
||||
|
||||
# Configure iptables rules to not track GRE/VXLAN connections
|
||||
configure_iptables_rules()
|
||||
|
||||
|
||||
def _get_interfaces_from_mappings(sriov_mappings):
|
||||
"""Returns list of interfaces based on sriov-device-mappings"""
|
||||
@ -1033,3 +1036,69 @@ def _pause_resume_helper(f, configs, exclude_services=None):
|
||||
exclude_services = []
|
||||
f(assess_status_func(configs, exclude_services),
|
||||
services=services(exclude_services), ports=None)
|
||||
|
||||
|
||||
def _run(*args):
|
||||
"""
|
||||
Run external process and return result.
|
||||
|
||||
:param *args: Command name and arguments.
|
||||
:type *args: str
|
||||
:returns: Data about completed process
|
||||
:rtype: subprocess.CompletedProcess
|
||||
:raises: subprocess.CalledProcessError
|
||||
"""
|
||||
cp = subprocess.run(
|
||||
args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True,
|
||||
universal_newlines=True)
|
||||
log(cp,)
|
||||
return cp
|
||||
|
||||
|
||||
def configure_iptables_rules():
|
||||
"""
|
||||
Configure `iptables` NOTRACK rules for GRE/VXLAN traffic in nf_conntrack
|
||||
|
||||
The GRE/VXLAN traffic has randomized source ports[1][2], which
|
||||
plays badly with nf_conntrack in a busy cloud environment. Having
|
||||
randomized source ports change the five-tuple info, resulting to an
|
||||
increase in unique connections that nf_conntrack tracks. That
|
||||
inevitably leads to a full nf_conntrack table and connections to be
|
||||
dropped.
|
||||
|
||||
NOTRACK rules allow nf_conntrack to ignore the said traffic. UDP
|
||||
flows with destination port 4754 (GRE), and 4789 (VXLAN) will not be
|
||||
tracked by nf_conntrack while rules are in effect.
|
||||
|
||||
[1]:(https://www.rfc-editor.org/rfc/rfc8086.html#section-3.2)
|
||||
[2]:(https://www.rfc-editor.org/rfc/rfc7348.html#section-5)
|
||||
"""
|
||||
log("Configuring iptables rules")
|
||||
|
||||
def append_notrack_rule(chain_name, port):
|
||||
"""
|
||||
Append a iptables NOTRACK rule to a chain in raw table.
|
||||
|
||||
:param str chain_name: Chain to append
|
||||
"""
|
||||
args = ['iptables', '-t', 'raw', '-C', chain_name, '-p', 'udp',
|
||||
'--dport', port, '-j', 'NOTRACK']
|
||||
try:
|
||||
_run(*args)
|
||||
log("Rule `{}` append to `{}` chain skipped (already exists)"
|
||||
.format(' '.join(args), chain_name))
|
||||
except subprocess.CalledProcessError as cpe:
|
||||
if cpe.returncode != 1:
|
||||
raise
|
||||
|
||||
# Append command is exactly the same with check
|
||||
# except the check (-C) is replaced with append
|
||||
# (-A)
|
||||
args[3] = '-A'
|
||||
_run(*args)
|
||||
log("Rule `{}` appended to `{}` chain"
|
||||
.format(' '.join(args), chain_name))
|
||||
|
||||
for _chain in ('PREROUTING', 'OUTPUT'):
|
||||
for port in ('4754', '4789'):
|
||||
append_notrack_rule(_chain, port)
|
||||
|
1
hooks/start
Symbolic link
1
hooks/start
Symbolic link
@ -0,0 +1 @@
|
||||
neutron_ovs_hooks.py
|
@ -54,6 +54,7 @@ TO_PATCH = [
|
||||
'is_container',
|
||||
'is_hook_allowed',
|
||||
'update_nrpe_config',
|
||||
'configure_iptables_rules',
|
||||
]
|
||||
NEUTRON_CONF_DIR = "/etc/neutron"
|
||||
|
||||
@ -315,3 +316,7 @@ class NeutronOVSHooksTests(CharmTestCase):
|
||||
self.CONFIGS.complete_contexts.return_value = ['amqp']
|
||||
self._call_hook('amqp-relation-departed')
|
||||
self.CONFIGS.write_all.assert_called_once()
|
||||
|
||||
def test_start(self):
|
||||
self._call_hook('start')
|
||||
self.configure_iptables_rules.assert_called_once()
|
||||
|
@ -590,13 +590,15 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
self.assertTrue(expect[item] == _restart_map[item])
|
||||
self.assertEqual(len(_restart_map.keys()), 3)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch('charmhelpers.contrib.openstack.context.list_nics',
|
||||
return_value=['eth0'])
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_ovs_data_port(
|
||||
self, mock_config, _charm_name, _use_dvr, _nics):
|
||||
self, mock_config, _charm_name, _use_dvr, _nics, mock_subprocess):
|
||||
_use_dvr.return_value = False
|
||||
_charm_name.return_value = "neutron-openvswitch"
|
||||
self.is_linuxbridge_interface.return_value = False
|
||||
@ -643,13 +645,15 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
# Not called since we have a bogus bridge in data-ports
|
||||
self.assertFalse(self.add_bridge_port.called)
|
||||
|
||||
@patch.object(nutils, 'configure_iptables_rules', return_value=None)
|
||||
@patch('charmhelpers.contrib.openstack.context.list_nics',
|
||||
return_value=['eth0', 'br-juju'])
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_data_port_with_bridge(
|
||||
self, mock_config, _use_dvr, _charm_name, _nics):
|
||||
self, mock_config, _use_dvr,
|
||||
_charm_name, _nics, mock_conf_iptables):
|
||||
_use_dvr.return_value = False
|
||||
_charm_name.return_value = "neutron-openvswitch"
|
||||
self.is_linuxbridge_interface.return_value = True
|
||||
@ -681,11 +685,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
'config is deprecated for removal after 21.10 release of OpenStack'
|
||||
' charms.', level='WARNING')
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_starts_service_if_required(
|
||||
self, mock_config, _charm_name, _use_dvr):
|
||||
self, mock_config, _charm_name, _use_dvr, mock_subprocess):
|
||||
_use_dvr.return_value = False
|
||||
_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -694,11 +700,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
nutils.configure_ovs()
|
||||
self.assertTrue(self.full_restart.called)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_doesnt_restart_service(
|
||||
self, mock_config, _charm_name, _use_dvr):
|
||||
self, mock_config, _charm_name, _use_dvr, mock_subprocess):
|
||||
_use_dvr.return_value = False
|
||||
_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -707,11 +715,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
nutils.configure_ovs()
|
||||
self.assertFalse(self.full_restart.called)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_ovs_ext_port(
|
||||
self, mock_config, _charm_name, _use_dvr):
|
||||
self, mock_config, _charm_name, _use_dvr, mock_subprocess):
|
||||
_use_dvr.return_value = True
|
||||
_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -940,6 +950,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
'charm-neutron-openvswitch': br[2]}},
|
||||
linkup=False, promisc=None)], any_order=True)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_hw_offload', return_value=False)
|
||||
@patch.object(nutils, 'parse_bridge_mappings')
|
||||
@patch.object(nutils, 'parse_data_port_mappings')
|
||||
@ -955,7 +967,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_NeutronAPIContext,
|
||||
_parse_data_port_mappings,
|
||||
_parse_bridge_mappings,
|
||||
_use_hw_offload):
|
||||
_use_hw_offload,
|
||||
_mock_subprocess):
|
||||
_NeutronAPIContext.return_value = DummyContext(
|
||||
return_value={'global_physnet_mtu': 1500})
|
||||
return self._run_configure_ovs_dpdk(mock_config, _use_dvr, _charm_name,
|
||||
@ -966,6 +979,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_late_init=False,
|
||||
_test_bonds=False)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_hw_offload', return_value=False)
|
||||
@patch.object(nutils, 'parse_bridge_mappings')
|
||||
@patch.object(nutils, 'parse_data_port_mappings')
|
||||
@ -982,7 +997,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_NeutronAPIContext,
|
||||
_parse_data_port_mappings,
|
||||
_parse_bridge_mappings,
|
||||
_use_hw_offload):
|
||||
_use_hw_offload,
|
||||
_mock_subprocess):
|
||||
_NeutronAPIContext.return_value = DummyContext(
|
||||
return_value={'global_physnet_mtu': 1500})
|
||||
return self._run_configure_ovs_dpdk(mock_config, _use_dvr, _charm_name,
|
||||
@ -993,6 +1009,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_late_init=True,
|
||||
_test_bonds=False)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_hw_offload', return_value=False)
|
||||
@patch.object(nutils, 'parse_bridge_mappings')
|
||||
@patch.object(nutils, 'parse_data_port_mappings')
|
||||
@ -1009,7 +1027,8 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_NeutronAPIContext,
|
||||
_parse_data_port_mappings,
|
||||
_parse_bridge_mappings,
|
||||
_use_hw_offload):
|
||||
_use_hw_offload,
|
||||
_mock_subprocess):
|
||||
_NeutronAPIContext.return_value = DummyContext(
|
||||
return_value={'global_physnet_mtu': 1500})
|
||||
return self._run_configure_ovs_dpdk(mock_config, _use_dvr, _charm_name,
|
||||
@ -1020,11 +1039,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
_late_init=True,
|
||||
_test_bonds=True)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_enable_ipfix(self, mock_config, mock_charm_name,
|
||||
mock_use_dvr):
|
||||
mock_use_dvr, mock_subprocess):
|
||||
mock_use_dvr.return_value = False
|
||||
mock_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -1036,11 +1057,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
call('br-ex', '127.0.0.1:80'),
|
||||
])
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_ensure_ext_port_ignored(
|
||||
self, mock_config, mock_charm_name, mock_use_dvr):
|
||||
self, mock_config, mock_charm_name, mock_use_dvr, mock_subprocess):
|
||||
mock_use_dvr.return_value = True
|
||||
mock_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -1056,11 +1079,13 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
self.assertNotIn(call('br-ex', 'p0', ifdata=ANY, portdata=ANY),
|
||||
self.add_bridge_port.call_args_list)
|
||||
|
||||
@patch.object(nutils, 'subprocess',
|
||||
return_value=subprocess.CompletedProcess("dummy-args", 0))
|
||||
@patch.object(nutils, 'use_dvr')
|
||||
@patch('charmhelpers.contrib.network.ovs.charm_name')
|
||||
@patch('charmhelpers.contrib.openstack.context.config')
|
||||
def test_configure_ovs_ensure_ext_port_used(
|
||||
self, mock_config, mock_charm_name, mock_use_dvr):
|
||||
self, mock_config, mock_charm_name, mock_use_dvr, mock_subprocess):
|
||||
mock_use_dvr.return_value = True
|
||||
mock_charm_name.return_value = "neutron-openvswitch"
|
||||
mock_config.side_effect = self.test_config.get
|
||||
@ -1348,3 +1373,57 @@ class TestNeutronOVSUtils(CharmTestCase):
|
||||
call('other_config:max-idle', '30000'),
|
||||
])
|
||||
self.service_restart.assert_not_called()
|
||||
|
||||
@patch.object(nutils, '_run')
|
||||
def test_configure_iptables_rules_not_exist(self, mock_run):
|
||||
# Confirm that iptables rules are being added when it is absent
|
||||
# in the environment
|
||||
|
||||
def dummy_run(*args):
|
||||
if args[3] == '-C':
|
||||
raise subprocess.CalledProcessError(1, "dummy")
|
||||
return subprocess.CompletedProcess("dummy-args", 0)
|
||||
|
||||
mock_run.side_effect = dummy_run
|
||||
nutils.configure_iptables_rules()
|
||||
mock_run.assert_has_calls([
|
||||
call('iptables', '-t', 'raw', '-C', 'PREROUTING',
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-A', 'PREROUTING',
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', 'PREROUTING',
|
||||
'-p', 'udp', '--dport', '4789', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-A', 'PREROUTING',
|
||||
'-p', 'udp', '--dport', '4789', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', 'OUTPUT',
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-A', 'OUTPUT',
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', 'OUTPUT',
|
||||
'-p', 'udp', '--dport', '4789', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-A', 'OUTPUT',
|
||||
'-p', 'udp', '--dport', '4789', '-j', 'NOTRACK')
|
||||
], any_order=False)
|
||||
self.assertEqual(8, mock_run.call_count)
|
||||
|
||||
@patch.object(nutils, '_run')
|
||||
def test_configure_iptables_rules_already_exist(self, mock_run):
|
||||
# Confirm that iptables rules are not being added when rules
|
||||
# already present in the environment
|
||||
|
||||
def dummy_run(*args):
|
||||
return subprocess.CompletedProcess("dummy-args", 0)
|
||||
|
||||
mock_run.side_effect = dummy_run
|
||||
nutils.configure_iptables_rules()
|
||||
mock_run.assert_has_calls([
|
||||
call('iptables', '-t', 'raw', '-C', "PREROUTING",
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', "PREROUTING", '-p',
|
||||
'udp', '--dport', '4789', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', "OUTPUT",
|
||||
'-p', 'udp', '--dport', '4754', '-j', 'NOTRACK'),
|
||||
call('iptables', '-t', 'raw', '-C', "OUTPUT", '-p',
|
||||
'udp', '--dport', '4789', '-j', 'NOTRACK'),
|
||||
])
|
||||
self.assertEqual(4, mock_run.call_count)
|
||||
|
Loading…
Reference in New Issue
Block a user