Neutron Host Management Robustness
If a network agent goes down in the middle of processing a reschedule/rebalance operation, abort the current operation so as not to schedule resources onto a down agent. This is necessary as the reschedule/rebalance operations may take a fair amount of time, and users may lock hosts (bringing down the network agents) at any time. Change-Id: Ie933b517e4e4cea3beb7d00e3c65d2f538f14e5c Story: 2003857 Task: 30500 Signed-off-by: Kevin Smith <kevin.smith@windriver.com>
This commit is contained in:
parent
9b600d7b9e
commit
037727447b
@ -60,7 +60,8 @@ def build_get_agents_response():
|
|||||||
return get_agents_response
|
return get_agents_response
|
||||||
|
|
||||||
|
|
||||||
def build_get_dhcp_agent_networks_response(agent_id, use_strange_networks=False):
|
def build_get_dhcp_agent_networks_response(agent_id,
|
||||||
|
use_strange_networks=False):
|
||||||
get_dhcp_agent_networks_response = dict()
|
get_dhcp_agent_networks_response = dict()
|
||||||
get_dhcp_agent_networks_response['completed'] = True
|
get_dhcp_agent_networks_response['completed'] = True
|
||||||
get_dhcp_agent_networks_response['reason'] = ''
|
get_dhcp_agent_networks_response['reason'] = ''
|
||||||
@ -238,7 +239,188 @@ class TestNeutronDHCPRebalance(testcase.NFVTestCase):
|
|||||||
print("less than 2 agents, nothing to do")
|
print("less than 2 agents, nothing to do")
|
||||||
break
|
break
|
||||||
|
|
||||||
if loopcount > MAX_LOOPCOUNT:
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
|
||||||
|
@mock.patch('nfv_vim.nfvi.nfvi_get_dhcp_agent_networks',
|
||||||
|
fake_nfvi_get_dhcp_agent_networks)
|
||||||
|
def test_rebalance_down_host_abort_w_api_calls(self):
|
||||||
|
initial_network_count = 0
|
||||||
|
initial_network_config = list()
|
||||||
|
|
||||||
|
abort_state_list = [DHCP_REBALANCE_STATE.GET_DHCP_AGENTS,
|
||||||
|
DHCP_REBALANCE_STATE.GET_NETWORKS_HOSTED_ON_AGENT,
|
||||||
|
DHCP_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS,
|
||||||
|
DHCP_REBALANCE_STATE.RESCHEDULE_DOWN_AGENT,
|
||||||
|
DHCP_REBALANCE_STATE.HOLD_OFF,
|
||||||
|
DHCP_REBALANCE_STATE.DONE]
|
||||||
|
|
||||||
|
for x in range(1, 200):
|
||||||
|
_DHCPRebalance.network_diff_threshold = random.randint(1, 4)
|
||||||
|
add_rebalance_work_dhcp('compute-0', True)
|
||||||
|
loopcount = 0
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("HOST DOWN TEST NUMBER %s" % str(x))
|
||||||
|
|
||||||
|
aborted = False
|
||||||
|
doing_abort = False
|
||||||
|
abort_state = random.randint(0, len(abort_state_list) - 1)
|
||||||
|
while True:
|
||||||
|
loopcount += 1
|
||||||
|
|
||||||
|
old_state = _DHCPRebalance.get_state()
|
||||||
|
|
||||||
|
if old_state == (abort_state_list[abort_state]) and (not aborted):
|
||||||
|
aborted = True
|
||||||
|
doing_abort = True
|
||||||
|
add_rebalance_work_dhcp('compute-1', True)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("host-down adding compute-1 down in state: %s." %
|
||||||
|
old_state)
|
||||||
|
|
||||||
|
_run_state_machine()
|
||||||
|
new_state = _DHCPRebalance.get_state()
|
||||||
|
|
||||||
|
if doing_abort:
|
||||||
|
doing_abort = False
|
||||||
|
if (old_state != DHCP_REBALANCE_STATE.DONE) and \
|
||||||
|
(old_state != DHCP_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
if _DHCPRebalance.num_dhcp_agents < 2:
|
||||||
|
assert(new_state == DHCP_REBALANCE_STATE.DONE)
|
||||||
|
else:
|
||||||
|
assert(new_state ==
|
||||||
|
DHCP_REBALANCE_STATE.GET_NETWORKS_HOSTED_ON_AGENT)
|
||||||
|
|
||||||
|
if ((old_state ==
|
||||||
|
DHCP_REBALANCE_STATE.GET_NETWORKS_HOSTED_ON_AGENT) and
|
||||||
|
(new_state ==
|
||||||
|
DHCP_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS)):
|
||||||
|
for idx in range(len(_DHCPRebalance.num_networks_on_agents)):
|
||||||
|
initial_network_config.append(
|
||||||
|
_DHCPRebalance.num_networks_on_agents[idx])
|
||||||
|
initial_network_count = \
|
||||||
|
sum(_DHCPRebalance.num_networks_on_agents)
|
||||||
|
|
||||||
|
if (_DHCPRebalance.get_state() == DHCP_REBALANCE_STATE.DONE) and \
|
||||||
|
(len(_DHCPRebalance.host_down_queue) == 0):
|
||||||
|
final_network_count = \
|
||||||
|
sum(_DHCPRebalance.num_networks_on_agents)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("network_diff_threshold: %s" %
|
||||||
|
_DHCPRebalance.network_diff_threshold)
|
||||||
|
print("initial_network_count: %s, "
|
||||||
|
"final_network_count: %s" %
|
||||||
|
(initial_network_count, final_network_count))
|
||||||
|
print("initial num_networks_on_agents: %s, "
|
||||||
|
"final num_networks_on_agents: %s" %
|
||||||
|
(initial_network_config,
|
||||||
|
_DHCPRebalance.num_networks_on_agents))
|
||||||
|
del initial_network_config[:]
|
||||||
|
if len(_DHCPRebalance.num_networks_on_agents) > 2:
|
||||||
|
num_networks_length = \
|
||||||
|
len(_DHCPRebalance.num_networks_on_agents)
|
||||||
|
assert ((num_networks_length == 0) or
|
||||||
|
_DHCPRebalance.num_networks_on_agents[0] == 0)
|
||||||
|
assert (initial_network_count == final_network_count)
|
||||||
|
else:
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("less than 2 agents, nothing to do")
|
||||||
|
break
|
||||||
|
|
||||||
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
|
||||||
|
@mock.patch('nfv_vim.nfvi.nfvi_get_dhcp_agent_networks',
|
||||||
|
fake_nfvi_get_dhcp_agent_networks)
|
||||||
|
def test_rebalance_up_host_abort_randomized_w_api_calls(self):
|
||||||
|
initial_network_count = 0
|
||||||
|
initial_network_config = list()
|
||||||
|
|
||||||
|
abort_state_list = [DHCP_REBALANCE_STATE.GET_DHCP_AGENTS,
|
||||||
|
DHCP_REBALANCE_STATE.GET_NETWORKS_HOSTED_ON_AGENT,
|
||||||
|
DHCP_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS,
|
||||||
|
DHCP_REBALANCE_STATE.RESCHEDULE_NEW_AGENT,
|
||||||
|
DHCP_REBALANCE_STATE.HOLD_OFF,
|
||||||
|
DHCP_REBALANCE_STATE.DONE]
|
||||||
|
|
||||||
|
for x in range(1, 200):
|
||||||
|
_DHCPRebalance.network_diff_threshold = random.randint(1, 4)
|
||||||
|
add_rebalance_work_dhcp('compute-0', False)
|
||||||
|
|
||||||
|
aborted = False
|
||||||
|
doing_abort = False
|
||||||
|
abort_state = random.randint(0, len(abort_state_list) - 1)
|
||||||
|
|
||||||
|
loopcount = 0
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("HOST UP TEST NUMBER %s" % str(x))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
loopcount += 1
|
||||||
|
|
||||||
|
old_state = _DHCPRebalance.get_state()
|
||||||
|
|
||||||
|
if old_state == (abort_state_list[abort_state]) and (not aborted):
|
||||||
|
aborted = True
|
||||||
|
doing_abort = True
|
||||||
|
add_rebalance_work_dhcp('compute-1', True)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("host-up adding compute-1 down in state: %s." %
|
||||||
|
old_state)
|
||||||
|
|
||||||
|
_run_state_machine()
|
||||||
|
new_state = _DHCPRebalance.get_state()
|
||||||
|
|
||||||
|
if doing_abort:
|
||||||
|
doing_abort = False
|
||||||
|
if (old_state != DHCP_REBALANCE_STATE.DONE) and \
|
||||||
|
(old_state != DHCP_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
assert(new_state ==
|
||||||
|
DHCP_REBALANCE_STATE.HOLD_OFF)
|
||||||
|
|
||||||
|
if ((old_state ==
|
||||||
|
DHCP_REBALANCE_STATE.GET_NETWORKS_HOSTED_ON_AGENT) and
|
||||||
|
((new_state ==
|
||||||
|
DHCP_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS) or
|
||||||
|
(new_state == DHCP_REBALANCE_STATE.DONE))):
|
||||||
|
# new_state DONE is for already balanced case
|
||||||
|
for idx in range(len(_DHCPRebalance.num_networks_on_agents)):
|
||||||
|
initial_network_config.append(
|
||||||
|
_DHCPRebalance.num_networks_on_agents[idx])
|
||||||
|
initial_network_count = sum(
|
||||||
|
_DHCPRebalance.num_networks_on_agents)
|
||||||
|
|
||||||
|
if ((_DHCPRebalance.get_state() == DHCP_REBALANCE_STATE.DONE) and
|
||||||
|
(len(_DHCPRebalance.host_up_queue) == 0) and
|
||||||
|
(len(_DHCPRebalance.host_down_queue) == 0)):
|
||||||
|
final_network_count = sum(
|
||||||
|
_DHCPRebalance.num_networks_on_agents)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("network_diff_threshold: %s" %
|
||||||
|
_DHCPRebalance.network_diff_threshold)
|
||||||
|
print("initial_network_count: %s, "
|
||||||
|
"final_network_count: %s" %
|
||||||
|
(initial_network_count, final_network_count))
|
||||||
|
print("initial num_networks_on_agents: %s, "
|
||||||
|
"final num_networks_on_agents: %s" %
|
||||||
|
(initial_network_config,
|
||||||
|
_DHCPRebalance.num_networks_on_agents))
|
||||||
|
del initial_network_config[:]
|
||||||
|
if len(_DHCPRebalance.num_networks_on_agents) > 2:
|
||||||
|
assert (initial_network_count == final_network_count)
|
||||||
|
assert (max(_DHCPRebalance.num_networks_on_agents) -
|
||||||
|
min(_DHCPRebalance.num_networks_on_agents) <=
|
||||||
|
_DHCPRebalance.network_diff_threshold)
|
||||||
|
else:
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("less than 2 agents, nothing to do")
|
||||||
|
break
|
||||||
|
|
||||||
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
assert loopcount < MAX_LOOPCOUNT
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
@ -298,7 +480,7 @@ class TestNeutronDHCPRebalance(testcase.NFVTestCase):
|
|||||||
print("less than 2 agents, nothing to do")
|
print("less than 2 agents, nothing to do")
|
||||||
break
|
break
|
||||||
|
|
||||||
if loopcount > MAX_LOOPCOUNT:
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
assert loopcount < MAX_LOOPCOUNT
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
@ -355,7 +537,7 @@ class TestNeutronDHCPRebalance(testcase.NFVTestCase):
|
|||||||
print("less than 2 agents, nothing to do")
|
print("less than 2 agents, nothing to do")
|
||||||
break
|
break
|
||||||
|
|
||||||
if loopcount > MAX_LOOPCOUNT:
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
assert loopcount < MAX_LOOPCOUNT
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
@ -277,7 +277,99 @@ class TestNeutronRebalance2(testcase.NFVTestCase):
|
|||||||
print("less than 2 agents, nothing to do")
|
print("less than 2 agents, nothing to do")
|
||||||
break
|
break
|
||||||
|
|
||||||
if loopcount > MAX_LOOPCOUNT:
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
|
||||||
|
def test_rebalance_down_host_abort_randomized_w_api_calls(self):
|
||||||
|
initial_router_count = 0
|
||||||
|
initial_router_config = list()
|
||||||
|
|
||||||
|
abort_state_list = [L3_REBALANCE_STATE.GET_NETWORK_AGENTS,
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTERS_HOSTED_ON_AGENT,
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTER_PORT_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.GET_PHYSICAL_NETWORK_FROM_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.RESCHEDULE_DOWN_AGENT,
|
||||||
|
L3_REBALANCE_STATE.HOLD_OFF,
|
||||||
|
L3_REBALANCE_STATE.DONE]
|
||||||
|
|
||||||
|
for x in range(1, 10):
|
||||||
|
_L3Rebalance.router_diff_threshold = random.randint(1, 4)
|
||||||
|
add_rebalance_work_l3('compute-0', True)
|
||||||
|
|
||||||
|
aborted = False
|
||||||
|
doing_abort = False
|
||||||
|
abort_state = random.randint(0, len(abort_state_list) - 1)
|
||||||
|
|
||||||
|
loopcount = 0
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("HOST DOWN TEST NUMBER %s" % str(x))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
loopcount += 1
|
||||||
|
|
||||||
|
old_state = _L3Rebalance.get_state()
|
||||||
|
|
||||||
|
if old_state == (abort_state_list[abort_state]) and (not aborted):
|
||||||
|
aborted = True
|
||||||
|
doing_abort = True
|
||||||
|
add_rebalance_work_l3('compute-1', True)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("host-up adding compute-1 down in state: %s." %
|
||||||
|
old_state)
|
||||||
|
|
||||||
|
_run_state_machine()
|
||||||
|
new_state = _L3Rebalance.get_state()
|
||||||
|
|
||||||
|
if doing_abort:
|
||||||
|
doing_abort = False
|
||||||
|
if (old_state != L3_REBALANCE_STATE.DONE) and \
|
||||||
|
(old_state != L3_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
if _L3Rebalance.num_l3agents < 2:
|
||||||
|
assert(new_state == L3_REBALANCE_STATE.DONE)
|
||||||
|
else:
|
||||||
|
assert(new_state ==
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTERS_HOSTED_ON_AGENT)
|
||||||
|
|
||||||
|
if ((old_state ==
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTERS_HOSTED_ON_AGENT) and
|
||||||
|
(new_state ==
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTER_PORT_NETWORKS)):
|
||||||
|
for idx in range(len(_L3Rebalance.num_routers_on_agents)):
|
||||||
|
initial_router_config.append(
|
||||||
|
_L3Rebalance.num_routers_on_agents[idx])
|
||||||
|
initial_router_count = \
|
||||||
|
sum(_L3Rebalance.num_routers_on_agents)
|
||||||
|
|
||||||
|
if (_L3Rebalance.get_state() == L3_REBALANCE_STATE.DONE) and \
|
||||||
|
(len(_L3Rebalance.host_down_queue) == 0):
|
||||||
|
final_router_count = \
|
||||||
|
sum(_L3Rebalance.num_routers_on_agents)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("router_diff_threshold: %s" %
|
||||||
|
_L3Rebalance.router_diff_threshold)
|
||||||
|
print("initial_router_count: %s, "
|
||||||
|
"final_router_count: %s" %
|
||||||
|
(initial_router_count, final_router_count))
|
||||||
|
print("initial num_routers_on_agents: %s, "
|
||||||
|
"final num_routers_on_agents: %s" %
|
||||||
|
(initial_router_config,
|
||||||
|
_L3Rebalance.num_routers_on_agents))
|
||||||
|
del initial_router_config[:]
|
||||||
|
if len(_L3Rebalance.num_routers_on_agents) > 2:
|
||||||
|
num_routers_length = \
|
||||||
|
len(_L3Rebalance.num_routers_on_agents)
|
||||||
|
assert ((num_routers_length == 0) or
|
||||||
|
_L3Rebalance.num_routers_on_agents[0] == 0)
|
||||||
|
assert (initial_router_count == final_router_count)
|
||||||
|
else:
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("less than 2 agents, nothing to do")
|
||||||
|
break
|
||||||
|
|
||||||
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
assert loopcount < MAX_LOOPCOUNT
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
@ -335,7 +427,98 @@ class TestNeutronRebalance2(testcase.NFVTestCase):
|
|||||||
print("less than 2 agents, nothing to do")
|
print("less than 2 agents, nothing to do")
|
||||||
break
|
break
|
||||||
|
|
||||||
if loopcount > MAX_LOOPCOUNT:
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
|
||||||
|
def test_rebalance_up_host_abort_randomized_w_api_calls(self):
|
||||||
|
initial_router_count = 0
|
||||||
|
initial_router_config = list()
|
||||||
|
|
||||||
|
abort_state_list = [L3_REBALANCE_STATE.GET_NETWORK_AGENTS,
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTERS_HOSTED_ON_AGENT,
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTER_PORT_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.GET_PHYSICAL_NETWORK_FROM_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.GET_HOST_PHYSICAL_NETWORKS,
|
||||||
|
L3_REBALANCE_STATE.RESCHEDULE_NEW_AGENT,
|
||||||
|
L3_REBALANCE_STATE.HOLD_OFF,
|
||||||
|
L3_REBALANCE_STATE.DONE]
|
||||||
|
|
||||||
|
for x in range(1, 10):
|
||||||
|
_L3Rebalance.router_diff_threshold = random.randint(1, 4)
|
||||||
|
add_rebalance_work_l3('compute-0', False)
|
||||||
|
|
||||||
|
aborted = False
|
||||||
|
doing_abort = False
|
||||||
|
abort_state = random.randint(0, len(abort_state_list) - 1)
|
||||||
|
|
||||||
|
loopcount = 0
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("HOST UP TEST NUMBER %s" % str(x))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
loopcount += 1
|
||||||
|
|
||||||
|
old_state = _L3Rebalance.get_state()
|
||||||
|
|
||||||
|
if old_state == (abort_state_list[abort_state]) and (not aborted):
|
||||||
|
aborted = True
|
||||||
|
doing_abort = True
|
||||||
|
add_rebalance_work_l3('compute-1', True)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("host-up adding compute-1 down in state: %s." %
|
||||||
|
old_state)
|
||||||
|
|
||||||
|
_run_state_machine()
|
||||||
|
new_state = _L3Rebalance.get_state()
|
||||||
|
|
||||||
|
if doing_abort:
|
||||||
|
doing_abort = False
|
||||||
|
if (old_state != L3_REBALANCE_STATE.DONE) and \
|
||||||
|
(old_state != L3_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
assert(new_state ==
|
||||||
|
L3_REBALANCE_STATE.HOLD_OFF)
|
||||||
|
|
||||||
|
if ((old_state ==
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTERS_HOSTED_ON_AGENT) and
|
||||||
|
((new_state ==
|
||||||
|
L3_REBALANCE_STATE.GET_ROUTER_PORT_NETWORKS) or
|
||||||
|
(new_state == L3_REBALANCE_STATE.DONE))):
|
||||||
|
# new_state DONE is for already balanced case
|
||||||
|
for idx in range(len(_L3Rebalance.num_routers_on_agents)):
|
||||||
|
initial_router_config.append(
|
||||||
|
_L3Rebalance.num_routers_on_agents[idx])
|
||||||
|
initial_router_count = sum(
|
||||||
|
_L3Rebalance.num_routers_on_agents)
|
||||||
|
|
||||||
|
if ((_L3Rebalance.get_state() == L3_REBALANCE_STATE.DONE) and
|
||||||
|
(len(_L3Rebalance.host_up_queue) == 0) and
|
||||||
|
(len(_L3Rebalance.host_down_queue) == 0)):
|
||||||
|
final_router_count = sum(
|
||||||
|
_L3Rebalance.num_routers_on_agents)
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("router_diff_threshold: %s" %
|
||||||
|
_L3Rebalance.router_diff_threshold)
|
||||||
|
print("initial_router_count: %s, "
|
||||||
|
"final_router_count: %s" %
|
||||||
|
(initial_router_count, final_router_count))
|
||||||
|
print("initial num_routers_on_agents: %s, "
|
||||||
|
"final num_routers_on_agents: %s" %
|
||||||
|
(initial_router_config,
|
||||||
|
_L3Rebalance.num_routers_on_agents))
|
||||||
|
del initial_router_config[:]
|
||||||
|
if len(_L3Rebalance.num_routers_on_agents) > 2:
|
||||||
|
assert (initial_router_count == final_router_count)
|
||||||
|
assert (max(_L3Rebalance.num_routers_on_agents) -
|
||||||
|
min(_L3Rebalance.num_routers_on_agents) <=
|
||||||
|
_L3Rebalance.router_diff_threshold)
|
||||||
|
else:
|
||||||
|
if DEBUG_PRINTING:
|
||||||
|
print("less than 2 agents, nothing to do")
|
||||||
|
break
|
||||||
|
|
||||||
|
if loopcount >= MAX_LOOPCOUNT:
|
||||||
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
print("Loopcount exit!!! loopcount:%s" % loopcount)
|
||||||
|
|
||||||
assert loopcount < MAX_LOOPCOUNT
|
assert loopcount < MAX_LOOPCOUNT
|
||||||
|
@ -93,6 +93,32 @@ class DHCPAgentRebalance(object):
|
|||||||
# queues that maintain host names of hosts coming up and going down.
|
# queues that maintain host names of hosts coming up and going down.
|
||||||
self.host_up_queue = list()
|
self.host_up_queue = list()
|
||||||
self.host_down_queue = list()
|
self.host_down_queue = list()
|
||||||
|
# whether to abort and restart.
|
||||||
|
self.abort = False
|
||||||
|
|
||||||
|
def set_abort(self):
|
||||||
|
self.abort = True
|
||||||
|
|
||||||
|
def check_abort(self):
|
||||||
|
# if abort flag is set, it means that there has been a change
|
||||||
|
# in status of one of the agents, we need to restart the
|
||||||
|
# algorithm
|
||||||
|
if self.abort:
|
||||||
|
if self.get_working_host() is not None:
|
||||||
|
# We were processing a host down.
|
||||||
|
# Go to GET_DHCP_AGENTS and restart the host down
|
||||||
|
# processing for the current host
|
||||||
|
self.set_state(DHCP_REBALANCE_STATE.GET_DHCP_AGENTS)
|
||||||
|
else:
|
||||||
|
# We were processing a host up.
|
||||||
|
# Go to HOLD_OFF so we can service the host down
|
||||||
|
# that just came in first.
|
||||||
|
self.set_state(DHCP_REBALANCE_STATE.HOLD_OFF)
|
||||||
|
# enqueue another host up rebalance to trigger host up
|
||||||
|
# rebalancing after processing the host down.
|
||||||
|
self.host_up_queue.append('abort-restart')
|
||||||
|
self.abort = False
|
||||||
|
DLOG.info("Aborting current reschedule and restarting")
|
||||||
|
|
||||||
def reinit(self):
|
def reinit(self):
|
||||||
self.num_dhcp_agents = 0
|
self.num_dhcp_agents = 0
|
||||||
@ -284,7 +310,30 @@ class DHCPAgentRebalance(object):
|
|||||||
|
|
||||||
def add_rebalance_work(self, host_name, host_is_going_down):
|
def add_rebalance_work(self, host_name, host_is_going_down):
|
||||||
if host_is_going_down:
|
if host_is_going_down:
|
||||||
self.host_down_queue.append(host_name)
|
# Only add this host to the queue if it is not
|
||||||
|
# already in it, and we are not in the process of
|
||||||
|
# performing a host down reschedule for that host.
|
||||||
|
if host_name not in self.host_down_queue:
|
||||||
|
if (self.state != DHCP_REBALANCE_STATE.DONE) and \
|
||||||
|
(self.state != DHCP_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
# state machine is in progress.
|
||||||
|
if (self.get_working_host() != host_name):
|
||||||
|
# We are in the progress of rescheduling,
|
||||||
|
# but not due to processing a down host
|
||||||
|
# reschedule for the host that is to be queued.
|
||||||
|
# We need to abort immediately and restart,
|
||||||
|
# lest we reschedule networks onto a down host.
|
||||||
|
self.set_abort()
|
||||||
|
self.host_down_queue.append(host_name)
|
||||||
|
else:
|
||||||
|
DLOG.debug("Not adding host down entry as host "
|
||||||
|
"down processing for this host already "
|
||||||
|
"in progress")
|
||||||
|
else:
|
||||||
|
# state machine is not in progress.
|
||||||
|
self.host_down_queue.append(host_name)
|
||||||
|
else:
|
||||||
|
DLOG.debug("Not adding duplicate host down queue entry")
|
||||||
else:
|
else:
|
||||||
self.host_up_queue.append(host_name)
|
self.host_up_queue.append(host_name)
|
||||||
|
|
||||||
@ -818,6 +867,8 @@ def _run_state_machine():
|
|||||||
|
|
||||||
_DHCPRebalance.state_machine_in_progress = True
|
_DHCPRebalance.state_machine_in_progress = True
|
||||||
|
|
||||||
|
_DHCPRebalance.check_abort()
|
||||||
|
|
||||||
my_state = _DHCPRebalance.get_state()
|
my_state = _DHCPRebalance.get_state()
|
||||||
DLOG.debug("Network Rebalance State %s" % my_state)
|
DLOG.debug("Network Rebalance State %s" % my_state)
|
||||||
if my_state == DHCP_REBALANCE_STATE.GET_DHCP_AGENTS:
|
if my_state == DHCP_REBALANCE_STATE.GET_DHCP_AGENTS:
|
||||||
|
@ -99,6 +99,32 @@ class L3AgentRebalance(object):
|
|||||||
# queues that maintain host names of hosts coming up and going down.
|
# queues that maintain host names of hosts coming up and going down.
|
||||||
self.host_up_queue = list()
|
self.host_up_queue = list()
|
||||||
self.host_down_queue = list()
|
self.host_down_queue = list()
|
||||||
|
# whether to abort and restart.
|
||||||
|
self.abort = False
|
||||||
|
|
||||||
|
def set_abort(self):
|
||||||
|
self.abort = True
|
||||||
|
|
||||||
|
def check_abort(self):
|
||||||
|
# if abort flag is set, it means that there has been a change
|
||||||
|
# in status of one of the agents, we need to restart the
|
||||||
|
# algorithm.
|
||||||
|
if self.abort:
|
||||||
|
if self.get_working_host() is not None:
|
||||||
|
# We were processing a host down.
|
||||||
|
# Go to GET_NETWORK_AGENTS and restart the host down
|
||||||
|
# processing for the current host
|
||||||
|
self.set_state(L3_REBALANCE_STATE.GET_NETWORK_AGENTS)
|
||||||
|
else:
|
||||||
|
# We were processing a host up.
|
||||||
|
# Go to HOLD_OFF so we can service the host down
|
||||||
|
# that just came in first.
|
||||||
|
self.set_state(L3_REBALANCE_STATE.HOLD_OFF)
|
||||||
|
# enqueue another host up rebalance to trigger host up
|
||||||
|
# rebalancing after processing the host down.
|
||||||
|
self.host_up_queue.append('abort-restart')
|
||||||
|
self.abort = False
|
||||||
|
DLOG.info("Aborting current reschedule and restarting")
|
||||||
|
|
||||||
def reinit(self):
|
def reinit(self):
|
||||||
self.num_l3agents = 0
|
self.num_l3agents = 0
|
||||||
@ -380,7 +406,30 @@ class L3AgentRebalance(object):
|
|||||||
|
|
||||||
def add_rebalance_work(self, host_name, host_is_going_down):
|
def add_rebalance_work(self, host_name, host_is_going_down):
|
||||||
if host_is_going_down:
|
if host_is_going_down:
|
||||||
self.host_down_queue.append(host_name)
|
# Only add this host to the queue if it is not
|
||||||
|
# already in it, and we are not in the process of
|
||||||
|
# performing a host down reschedule for that host.
|
||||||
|
if host_name not in self.host_down_queue:
|
||||||
|
if (self.state != L3_REBALANCE_STATE.DONE) and \
|
||||||
|
(self.state != L3_REBALANCE_STATE.HOLD_OFF):
|
||||||
|
# state machine is in progress.
|
||||||
|
if (self.get_working_host() != host_name):
|
||||||
|
# We are in the progress of rescheduling,
|
||||||
|
# but not due to processing a down host
|
||||||
|
# reschedule for the host that is to be queued.
|
||||||
|
# We need to abort immediately and restart,
|
||||||
|
# lest we reschedule routers onto a down host.
|
||||||
|
self.set_abort()
|
||||||
|
self.host_down_queue.append(host_name)
|
||||||
|
else:
|
||||||
|
DLOG.debug("Not adding host down entry as host "
|
||||||
|
"down processing for this host already "
|
||||||
|
"in progress")
|
||||||
|
else:
|
||||||
|
# state machine is not in progress.
|
||||||
|
self.host_down_queue.append(host_name)
|
||||||
|
else:
|
||||||
|
DLOG.debug("Not adding duplicate host down queue entry")
|
||||||
else:
|
else:
|
||||||
self.host_up_queue.append(host_name)
|
self.host_up_queue.append(host_name)
|
||||||
|
|
||||||
@ -1009,8 +1058,11 @@ def _run_state_machine():
|
|||||||
|
|
||||||
_L3Rebalance.state_machine_in_progress = True
|
_L3Rebalance.state_machine_in_progress = True
|
||||||
|
|
||||||
|
_L3Rebalance.check_abort()
|
||||||
|
|
||||||
my_state = _L3Rebalance.get_state()
|
my_state = _L3Rebalance.get_state()
|
||||||
DLOG.debug("Network Rebalance State %s" % my_state)
|
DLOG.debug("Network Rebalance State %s" % my_state)
|
||||||
|
|
||||||
if my_state == L3_REBALANCE_STATE.GET_NETWORK_AGENTS:
|
if my_state == L3_REBALANCE_STATE.GET_NETWORK_AGENTS:
|
||||||
|
|
||||||
_L3Rebalance.reinit()
|
_L3Rebalance.reinit()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user