Mark pods as not ready when host goes offline

When a host goes offline unexpectedly (e.g. spontaneous reboot or
power failure), it takes a while before kubernetes detects that
the node is unavailable and sets the node to be not ready, which
causes the pods on the node to be set to not ready as well. Until
that happens, the endpoints for the pods are still in use, which
can lead to message loss.

In StarlingX, our maintenance component detects that a node is
offline faster than kubernetes does. To improve the time taken
to react, the VIM is updated to set the status of the pods on
an offline node to not ready when it is informed by maintenance
that a node is offline.

Change-Id: I1a20707981c5517e0d305fd9551bc5e69c0282cd
Story: 2002843
Task: 33695
Signed-off-by: Bart Wensley <barton.wensley@windriver.com>
This commit is contained in:
Bart Wensley 2019-06-13 13:07:06 -05:00
parent ded3f2df13
commit cdd6c334d9
9 changed files with 233 additions and 8 deletions

View File

@ -134,3 +134,44 @@ def delete_node(node_name):
raise raise
return Result(response) return Result(response)
def mark_all_pods_not_ready(node_name, reason):
"""
Mark all pods on a node as not ready
Note: It would be preferable to mark the node as not ready and have
kubernetes then mark the pods as not ready, but this is not supported.
"""
# Get the client.
kube_client = get_client()
# Retrieve the pods on the specified node.
response = kube_client.list_namespaced_pod(
"", field_selector="spec.nodeName=%s" % node_name)
pods = response.items
if pods is not None:
for pod in pods:
for condition in pod.status.conditions:
if condition.type == "Ready":
if condition.status != "False":
# Update the Ready status to False
body = {"status":
{"conditions":
[{"type": "Ready",
"status": "False",
"reason": reason,
}]}}
try:
DLOG.debug(
"Marking pod %s in namespace %s not ready" %
(pod.metadata.name, pod.metadata.namespace))
kube_client.patch_namespaced_pod_status(
pod.metadata.name, pod.metadata.namespace, body)
except ApiException:
DLOG.exception(
"Failed to update status for pod %s in "
"namespace %s" % (pod.metadata.name,
pod.metadata.namespace))
break
return

View File

@ -925,7 +925,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.close() callback.close()
def disable_host_services(self, future, host_uuid, def disable_host_services(self, future, host_uuid,
host_name, host_personality, host_name, host_personality, host_offline,
callback): callback):
""" """
Disable Host Services, notifies kubernetes client to disable services Disable Host Services, notifies kubernetes client to disable services
@ -955,6 +955,24 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
% (host_uuid, host_name)) % (host_uuid, host_name))
return return
if host_offline:
# If the disabled node is offline, we also mark all
# the pods on the node as not ready. This will ensure
# kubernetes takes action immediately (e.g. to disable
# endpoints associated with the pods) instead of waiting
# for a grace period to determine the node is unavailable.
future.work(kubernetes_client.mark_all_pods_not_ready,
host_name, "NodeOffline")
future.result = (yield)
if not future.result.is_complete():
DLOG.error("Kubernetes mark_all_pods_not_ready failed, "
"operation did not complete, host_uuid=%s, "
"host_name=%s."
% (host_uuid, host_name))
return
response['completed'] = True response['completed'] = True
response['reason'] = '' response['reason'] = ''

View File

@ -203,3 +203,110 @@ class TestNFVPluginsK8SNodeTaint(testcase.NFVTestCase):
'NoSchedule', 'NoSchedule',
self.test_key1, self.test_key1,
self.test_value1) is False self.test_value1) is False
@mock.patch('kubernetes.config.load_kube_config', mock_load_kube_config)
class TestNFVPluginsK8SMarkAllPodsNotReady(testcase.NFVTestCase):
list_namespaced_pod_result = kubernetes.client.V1PodList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-ready",
namespace="test-namespace-1"),
status=kubernetes.client.V1PodStatus(
conditions=[
kubernetes.client.V1PodCondition(
status="True",
type="Initialized"),
kubernetes.client.V1PodCondition(
status="False",
type="Ready"),
kubernetes.client.V1PodCondition(
status="True",
type="ContainersReady"),
kubernetes.client.V1PodCondition(
status="True",
type="PodScheduled"),
]
)
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-ready",
namespace="test-namespace-1"),
status=kubernetes.client.V1PodStatus(
conditions=[
kubernetes.client.V1PodCondition(
status="True",
type="Initialized"),
kubernetes.client.V1PodCondition(
status="True",
type="Ready"),
kubernetes.client.V1PodCondition(
status="True",
type="ContainersReady"),
kubernetes.client.V1PodCondition(
status="True",
type="PodScheduled"),
]
)
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-no-ready-status",
namespace="test-namespace-1"),
status=kubernetes.client.V1PodStatus(
conditions=[
kubernetes.client.V1PodCondition(
status="True",
type="Initialized"),
kubernetes.client.V1PodCondition(
status="True",
type="ContainersReady"),
kubernetes.client.V1PodCondition(
status="True",
type="PodScheduled"),
]
)
),
]
)
def setUp(self):
super(TestNFVPluginsK8SMarkAllPodsNotReady, self).setUp()
def mock_list_namespaced_pod(obj, namespace, field_selector=""):
return self.list_namespaced_pod_result
self.mocked_list_namespaced_pod = mock.patch(
'kubernetes.client.CoreV1Api.list_namespaced_pod',
mock_list_namespaced_pod)
self.mocked_list_namespaced_pod.start()
self.mock_patch_namespaced_pod_status = mock.Mock()
self.mocked_patch_namespaced_pod_status = mock.patch(
'kubernetes.client.CoreV1Api.patch_namespaced_pod_status',
self.mock_patch_namespaced_pod_status)
self.mocked_patch_namespaced_pod_status.start()
def tearDown(self):
super(TestNFVPluginsK8SMarkAllPodsNotReady, self).tearDown()
self.mocked_list_namespaced_pod.stop()
self.mocked_patch_namespaced_pod_status.stop()
def test_mark_pods(self):
kubernetes_client.mark_all_pods_not_ready("test_node", "test_reason")
self.mock_patch_namespaced_pod_status.assert_called_with(
"test-pod-ready", "test-namespace-1", mock.ANY)
self.mock_patch_namespaced_pod_status.assert_called_once()

View File

@ -102,7 +102,7 @@ class HostDirector(object):
sw_mgmt_director.disable_host_services_failed(host) sw_mgmt_director.disable_host_services_failed(host)
def _nfvi_disable_host_services(self, host_uuid, host_name, def _nfvi_disable_host_services(self, host_uuid, host_name,
host_personality, service): host_personality, host_offline, service):
""" """
NFVI Disable Host Services NFVI Disable Host Services
""" """
@ -118,7 +118,7 @@ class HostDirector(object):
objects.HOST_SERVICES.GUEST)) objects.HOST_SERVICES.GUEST))
elif service == objects.HOST_SERVICES.CONTAINER: elif service == objects.HOST_SERVICES.CONTAINER:
nfvi.nfvi_disable_container_host_services( nfvi.nfvi_disable_container_host_services(
host_uuid, host_name, host_personality, host_uuid, host_name, host_personality, host_offline,
self._nfvi_disable_host_services_callback( self._nfvi_disable_host_services_callback(
objects.HOST_SERVICES.CONTAINER)) objects.HOST_SERVICES.CONTAINER))
else: else:
@ -701,7 +701,8 @@ class HostDirector(object):
for host in host_list: for host in host_list:
self._nfvi_disable_host_services( self._nfvi_disable_host_services(
host.uuid, host.name, host.personality, service) host.uuid, host.name, host.personality, host.is_offline,
service)
if host_operation.is_inprogress(): if host_operation.is_inprogress():
self._host_operation = host_operation self._host_operation = host_operation

View File

@ -11,6 +11,7 @@ from nfv_vim.host_fsm._host_defs import HOST_STATE
from nfv_vim.host_fsm._host_tasks import AuditDisabledHostTask from nfv_vim.host_fsm._host_tasks import AuditDisabledHostTask
from nfv_vim.host_fsm._host_tasks import FailHostTask from nfv_vim.host_fsm._host_tasks import FailHostTask
from nfv_vim.host_fsm._host_tasks import NotifyDisabledHostTask from nfv_vim.host_fsm._host_tasks import NotifyDisabledHostTask
from nfv_vim.host_fsm._host_tasks import OfflineHostTask
DLOG = debug.debug_get_logger('nfv_vim.state_machine.host') DLOG = debug.debug_get_logger('nfv_vim.state_machine.host')
@ -77,6 +78,13 @@ class DisabledState(state_machine.State):
host.task = AuditDisabledHostTask(host) host.task = AuditDisabledHostTask(host)
host.task.start() host.task.start()
elif HOST_EVENT.DISABLE == event:
if not host.task.inprogress():
# This host may have gone offline after being disabled.
if host.is_offline():
host.task = OfflineHostTask(host)
host.task.start()
else: else:
DLOG.verbose("Ignoring %s event for %s." % (event, host.name)) DLOG.verbose("Ignoring %s event for %s." % (event, host.name))

View File

@ -970,7 +970,7 @@ class DisableHostServicesTaskWork(state_machine.StateTaskWork):
elif self._service == objects.HOST_SERVICES.CONTAINER: elif self._service == objects.HOST_SERVICES.CONTAINER:
nfvi.nfvi_disable_container_host_services( nfvi.nfvi_disable_container_host_services(
self._host.uuid, self._host.name, self._host.personality, self._host.uuid, self._host.name, self._host.personality,
self._callback()) self._host.is_offline(), self._callback())
else: else:
reason = ("Trying to disable unknown " reason = ("Trying to disable unknown "
"host service %s" % self._service) "host service %s" % self._service)

View File

@ -284,6 +284,56 @@ class DisableHostTask(state_machine.StateTask):
self._host.fsm.handle_event(HOST_EVENT.TASK_FAILED, event_data) self._host.fsm.handle_event(HOST_EVENT.TASK_FAILED, event_data)
class OfflineHostTask(state_machine.StateTask):
"""
Offline Host Task
"""
def __init__(self, host):
from nfv_vim import objects
self._host_reference = weakref.ref(host)
task_work_list = list()
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
# Only disable the container services if we are not running in a
# single controller configuration. In a single controller
# configuration we keep the container services running.
from nfv_vim import directors
sw_mgmt_director = directors.get_sw_mgmt_director()
if not sw_mgmt_director.single_controller:
task_work_list.append(DisableHostServicesTaskWork(
self, host, objects.HOST_SERVICES.CONTAINER))
super(OfflineHostTask, self).__init__(
'offline-host_%s' % host.name, task_work_list)
@property
def _host(self):
"""
Returns the host
"""
host = self._host_reference()
return host
def complete(self, result, reason):
"""
Disable Host Task Complete
"""
if self.aborted():
DLOG.debug("Task (%s) complete, but has been aborted." % self._name)
else:
DLOG.debug("Task (%s) complete." % self._name)
event_data = dict()
event_data['reason'] = reason
if state_machine.STATE_TASK_RESULT.SUCCESS == result:
self._host.fsm.handle_event(HOST_EVENT.TASK_COMPLETED,
event_data)
else:
self._host.fsm.handle_event(HOST_EVENT.TASK_FAILED, event_data)
class FailHostTask(state_machine.StateTask): class FailHostTask(state_machine.StateTask):
""" """
Fail Host Task Fail Host Task

View File

@ -96,14 +96,14 @@ def nfvi_upgrade_complete(callback):
def nfvi_disable_container_host_services(host_uuid, host_name, def nfvi_disable_container_host_services(host_uuid, host_name,
host_personality, host_personality, host_offline,
callback): callback):
""" """
Disable container services on a host Disable container services on a host
""" """
cmd_id = _infrastructure_plugin.invoke_plugin( cmd_id = _infrastructure_plugin.invoke_plugin(
'disable_host_services', 'disable_host_services',
host_uuid, host_name, host_personality, host_uuid, host_name, host_personality, host_offline,
callback=callback) callback=callback)
return cmd_id return cmd_id

View File

@ -94,7 +94,7 @@ class NFVIInfrastructureAPI(object):
@abc.abstractmethod @abc.abstractmethod
def disable_host_services(self, future, host_uuid, def disable_host_services(self, future, host_uuid,
host_name, host_personality, host_name, host_personality, host_offline,
callback): callback):
""" """
Disable infrastructure host services using the plugin Disable infrastructure host services using the plugin