From 78f4d85a31d1001f9f87d55d3f2028d7976b4fe7 Mon Sep 17 00:00:00 2001 From: Kobi Samoray Date: Mon, 23 Aug 2021 11:19:47 +0300 Subject: [PATCH] NSXV: Recover member transaction failures When an Octavia member transaction to NSX fails, sometimes pool member lists are out of sync which results with transaction failures. The following leverages the fact that the Octavia driver nests the whole list of members during every transaction within the pool, and verifies that the pool members are synchronized with NSX Change-Id: I18221dbc564db5b3ef967e64540eba6e0772afae --- .../lbaas/nsx_v/implementation/member_mgr.py | 41 +++++++++++++---- .../nsx_v/test_edge_loadbalancer_driver_v2.py | 46 ++++++++++++++++++- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/vmware_nsx/services/lbaas/nsx_v/implementation/member_mgr.py b/vmware_nsx/services/lbaas/nsx_v/implementation/member_mgr.py index c0e326ac07..8b7087a730 100644 --- a/vmware_nsx/services/lbaas/nsx_v/implementation/member_mgr.py +++ b/vmware_nsx/services/lbaas/nsx_v/implementation/member_mgr.py @@ -29,6 +29,35 @@ from vmware_nsx.services.lbaas.nsx_v import lbaas_common as lb_common LOG = logging.getLogger(__name__) +def _member_to_backend_member(member): + return { + 'ipAddress': member['address'], + 'weight': member['weight'], + 'port': member['protocol_port'], + 'monitorPort': member['protocol_port'], + 'name': lb_common.get_member_id(member['id']), + 'condition': 'enabled' if member['admin_state_up'] else 'disabled'} + + +def _validate_pool_members(pool, edge_pool): + member_ips = set([m['address'] for m in pool.get('members', [])]) + edge_ips = set( + [m['ipAddress'] for m in edge_pool.get('member', [])]) + + # Remove redundant IPs from edge pool + for ip in edge_ips - member_ips: + for edge_member in edge_pool['member']: + if ip == edge_member['ipAddress']: + edge_pool['member'].remove(edge_member) + + # Add missing members on edge + for ip in member_ips - edge_ips: + for member in pool['members']: + if ip == member['address']: + edge_pool['member'].append( + _member_to_backend_member(member)) + + class EdgeMemberManagerFromDict(base_mgr.EdgeLoadbalancerBaseManager): @log_helpers.log_method_call def __init__(self, vcns_driver): @@ -89,14 +118,8 @@ class EdgeMemberManagerFromDict(base_mgr.EdgeLoadbalancerBaseManager): member['tenant_id']) edge_pool = self.vcns.get_pool(edge_id, edge_pool_id)[1] - edge_member = { - 'ipAddress': member['address'], - 'weight': member['weight'], - 'port': member['protocol_port'], - 'monitorPort': member['protocol_port'], - 'name': lb_common.get_member_id(member['id']), - 'condition': - 'enabled' if member['admin_state_up'] else 'disabled'} + _validate_pool_members(member['pool'], edge_pool) + edge_member = _member_to_backend_member(member) if edge_pool.get('member'): edge_pool['member'].append(edge_member) @@ -144,6 +167,7 @@ class EdgeMemberManagerFromDict(base_mgr.EdgeLoadbalancerBaseManager): with locking.LockManager.get_lock(edge_id): edge_pool = self.vcns.get_pool(edge_id, edge_pool_id)[1] + _validate_pool_members(new_member['pool'], edge_pool) if edge_pool.get('member'): for i, m in enumerate(edge_pool['member']): @@ -222,6 +246,7 @@ class EdgeMemberManagerFromDict(base_mgr.EdgeLoadbalancerBaseManager): edge_pool_id, edge_id) completor(success=True) return + _validate_pool_members(member['pool'], edge_pool) for i, m in enumerate(edge_pool['member']): if m['name'] == lb_common.get_member_id(member['id']): diff --git a/vmware_nsx/tests/unit/nsx_v/test_edge_loadbalancer_driver_v2.py b/vmware_nsx/tests/unit/nsx_v/test_edge_loadbalancer_driver_v2.py index 437bf28469..1d21e49cf9 100644 --- a/vmware_nsx/tests/unit/nsx_v/test_edge_loadbalancer_driver_v2.py +++ b/vmware_nsx/tests/unit/nsx_v/test_edge_loadbalancer_driver_v2.py @@ -655,6 +655,23 @@ class TestEdgeLbaasV2Pool(BaseTestEdgeLbaasV2): self.assertTrue(self.last_completor_succees) +def _nsx_member(ip_address): + return {'ipAddress': ip_address, + 'weight': 1, + 'port': 80, + 'monitorPort': 80, + 'name': 'member-test', + 'condition': 'enabled'} + + +def _lbaas_member(ip_address): + return {'address': ip_address, + 'weight': 1, + 'protocol_port': 80, + 'id': 'test', + 'admin_state_up': True} + + class TestEdgeLbaasV2Member(BaseTestEdgeLbaasV2): def setUp(self): super(TestEdgeLbaasV2Member, self).setUp() @@ -706,7 +723,7 @@ class TestEdgeLbaasV2Member(BaseTestEdgeLbaasV2): edge_pool_def = EDGE_POOL_DEF.copy() edge_pool_def['member'] = [EDGE_MEMBER_DEF] mock_get_pool.return_value = (None, edge_pool_def) - + new_member_dict['pool']['members'] = [{'address': MEMBER_ADDRESS}] self.edge_driver.member.update( self.context, self.member_dict, new_member_dict, self.completor) @@ -754,6 +771,33 @@ class TestEdgeLbaasV2Member(BaseTestEdgeLbaasV2): self.assertTrue(self.last_completor_called) self.assertTrue(self.last_completor_succees) + def _do_member_validation_test(self, in_ips, in_edge_ips, out_edge_ips): + pool = self.pool_dict.copy() + edge_pool = EDGE_POOL_DEF.copy() + + pool['members'] = [_lbaas_member(m) for m in in_ips] + + edge_pool['member'] = [_nsx_member(m) for m in in_edge_ips] + + member_mgr._validate_pool_members(pool, edge_pool) + self.assertEqual(edge_pool['member'], [_nsx_member(m) + for m in out_edge_ips]) + + def test_validate_pool_members_valid_lists(self): + self._do_member_validation_test(['10.0.0.10', '10.0.0.11'], + ['10.0.0.10', '10.0.0.11'], + ['10.0.0.10', '10.0.0.11']) + + def test_validate_pool_members_nsx_extra(self): + self._do_member_validation_test(['10.0.0.10'], + ['10.0.0.10', '10.0.0.11'], + ['10.0.0.10']) + + def test_validate_pool_members_lbaas_extra(self): + self._do_member_validation_test(['10.0.0.10', '10.0.0.11'], + ['10.0.0.10'], + ['10.0.0.10', '10.0.0.11']) + class TestEdgeLbaasV2HealthMonitor(BaseTestEdgeLbaasV2): def setUp(self):