diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/clients/kubernetes_client.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/clients/kubernetes_client.py index ab40f353..321ab422 100644 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/clients/kubernetes_client.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/clients/kubernetes_client.py @@ -134,3 +134,44 @@ def delete_node(node_name): raise return Result(response) + + +def mark_all_pods_not_ready(node_name, reason): + """ + Mark all pods on a node as not ready + Note: It would be preferable to mark the node as not ready and have + kubernetes then mark the pods as not ready, but this is not supported. + """ + # Get the client. + kube_client = get_client() + + # Retrieve the pods on the specified node. + response = kube_client.list_namespaced_pod( + "", field_selector="spec.nodeName=%s" % node_name) + + pods = response.items + if pods is not None: + for pod in pods: + for condition in pod.status.conditions: + if condition.type == "Ready": + if condition.status != "False": + # Update the Ready status to False + body = {"status": + {"conditions": + [{"type": "Ready", + "status": "False", + "reason": reason, + }]}} + try: + DLOG.debug( + "Marking pod %s in namespace %s not ready" % + (pod.metadata.name, pod.metadata.namespace)) + kube_client.patch_namespaced_pod_status( + pod.metadata.name, pod.metadata.namespace, body) + except ApiException: + DLOG.exception( + "Failed to update status for pod %s in " + "namespace %s" % (pod.metadata.name, + pod.metadata.namespace)) + break + return diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py index 220372f1..ebec4111 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py @@ -925,7 +925,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): callback.close() def disable_host_services(self, future, host_uuid, - host_name, host_personality, + host_name, host_personality, host_offline, callback): """ Disable Host Services, notifies kubernetes client to disable services @@ -955,6 +955,24 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): % (host_uuid, host_name)) return + if host_offline: + # If the disabled node is offline, we also mark all + # the pods on the node as not ready. This will ensure + # kubernetes takes action immediately (e.g. to disable + # endpoints associated with the pods) instead of waiting + # for a grace period to determine the node is unavailable. + future.work(kubernetes_client.mark_all_pods_not_ready, + host_name, "NodeOffline") + + future.result = (yield) + + if not future.result.is_complete(): + DLOG.error("Kubernetes mark_all_pods_not_ready failed, " + "operation did not complete, host_uuid=%s, " + "host_name=%s." + % (host_uuid, host_name)) + return + response['completed'] = True response['reason'] = '' diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_plugin_kubernetes_client.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_plugin_kubernetes_client.py index 2f854874..eace3151 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_plugin_kubernetes_client.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_plugin_kubernetes_client.py @@ -203,3 +203,110 @@ class TestNFVPluginsK8SNodeTaint(testcase.NFVTestCase): 'NoSchedule', self.test_key1, self.test_value1) is False + + +@mock.patch('kubernetes.config.load_kube_config', mock_load_kube_config) +class TestNFVPluginsK8SMarkAllPodsNotReady(testcase.NFVTestCase): + + list_namespaced_pod_result = kubernetes.client.V1PodList( + api_version="v1", + items=[ + kubernetes.client.V1Pod( + api_version="v1", + kind="Pod", + metadata=kubernetes.client.V1ObjectMeta( + name="test-pod-not-ready", + namespace="test-namespace-1"), + status=kubernetes.client.V1PodStatus( + conditions=[ + kubernetes.client.V1PodCondition( + status="True", + type="Initialized"), + kubernetes.client.V1PodCondition( + status="False", + type="Ready"), + kubernetes.client.V1PodCondition( + status="True", + type="ContainersReady"), + kubernetes.client.V1PodCondition( + status="True", + type="PodScheduled"), + ] + ) + ), + kubernetes.client.V1Pod( + api_version="v1", + kind="Pod", + metadata=kubernetes.client.V1ObjectMeta( + name="test-pod-ready", + namespace="test-namespace-1"), + status=kubernetes.client.V1PodStatus( + conditions=[ + kubernetes.client.V1PodCondition( + status="True", + type="Initialized"), + kubernetes.client.V1PodCondition( + status="True", + type="Ready"), + kubernetes.client.V1PodCondition( + status="True", + type="ContainersReady"), + kubernetes.client.V1PodCondition( + status="True", + type="PodScheduled"), + ] + ) + ), + kubernetes.client.V1Pod( + api_version="v1", + kind="Pod", + metadata=kubernetes.client.V1ObjectMeta( + name="test-pod-no-ready-status", + namespace="test-namespace-1"), + status=kubernetes.client.V1PodStatus( + conditions=[ + kubernetes.client.V1PodCondition( + status="True", + type="Initialized"), + kubernetes.client.V1PodCondition( + status="True", + type="ContainersReady"), + kubernetes.client.V1PodCondition( + status="True", + type="PodScheduled"), + ] + ) + ), + ] + ) + + def setUp(self): + super(TestNFVPluginsK8SMarkAllPodsNotReady, self).setUp() + + def mock_list_namespaced_pod(obj, namespace, field_selector=""): + return self.list_namespaced_pod_result + + self.mocked_list_namespaced_pod = mock.patch( + 'kubernetes.client.CoreV1Api.list_namespaced_pod', + mock_list_namespaced_pod) + self.mocked_list_namespaced_pod.start() + + self.mock_patch_namespaced_pod_status = mock.Mock() + self.mocked_patch_namespaced_pod_status = mock.patch( + 'kubernetes.client.CoreV1Api.patch_namespaced_pod_status', + self.mock_patch_namespaced_pod_status) + self.mocked_patch_namespaced_pod_status.start() + + def tearDown(self): + super(TestNFVPluginsK8SMarkAllPodsNotReady, self).tearDown() + + self.mocked_list_namespaced_pod.stop() + self.mocked_patch_namespaced_pod_status.stop() + + def test_mark_pods(self): + + kubernetes_client.mark_all_pods_not_ready("test_node", "test_reason") + + self.mock_patch_namespaced_pod_status.assert_called_with( + "test-pod-ready", "test-namespace-1", mock.ANY) + self.mock_patch_namespaced_pod_status.assert_called_once() diff --git a/nfv/nfv-vim/nfv_vim/directors/_host_director.py b/nfv/nfv-vim/nfv_vim/directors/_host_director.py index a4a02039..226c81f3 100755 --- a/nfv/nfv-vim/nfv_vim/directors/_host_director.py +++ b/nfv/nfv-vim/nfv_vim/directors/_host_director.py @@ -102,7 +102,7 @@ class HostDirector(object): sw_mgmt_director.disable_host_services_failed(host) def _nfvi_disable_host_services(self, host_uuid, host_name, - host_personality, service): + host_personality, host_offline, service): """ NFVI Disable Host Services """ @@ -118,7 +118,7 @@ class HostDirector(object): objects.HOST_SERVICES.GUEST)) elif service == objects.HOST_SERVICES.CONTAINER: nfvi.nfvi_disable_container_host_services( - host_uuid, host_name, host_personality, + host_uuid, host_name, host_personality, host_offline, self._nfvi_disable_host_services_callback( objects.HOST_SERVICES.CONTAINER)) else: @@ -701,7 +701,8 @@ class HostDirector(object): for host in host_list: self._nfvi_disable_host_services( - host.uuid, host.name, host.personality, service) + host.uuid, host.name, host.personality, host.is_offline, + service) if host_operation.is_inprogress(): self._host_operation = host_operation diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_state_disabled.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_state_disabled.py index 47a92735..b8efffc0 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_state_disabled.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_state_disabled.py @@ -11,6 +11,7 @@ from nfv_vim.host_fsm._host_defs import HOST_STATE from nfv_vim.host_fsm._host_tasks import AuditDisabledHostTask from nfv_vim.host_fsm._host_tasks import FailHostTask from nfv_vim.host_fsm._host_tasks import NotifyDisabledHostTask +from nfv_vim.host_fsm._host_tasks import OfflineHostTask DLOG = debug.debug_get_logger('nfv_vim.state_machine.host') @@ -77,6 +78,13 @@ class DisabledState(state_machine.State): host.task = AuditDisabledHostTask(host) host.task.start() + elif HOST_EVENT.DISABLE == event: + if not host.task.inprogress(): + # This host may have gone offline after being disabled. + if host.is_offline(): + host.task = OfflineHostTask(host) + host.task.start() + else: DLOG.verbose("Ignoring %s event for %s." % (event, host.name)) diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py index 42f13c7c..b8e0bfc5 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py @@ -970,7 +970,7 @@ class DisableHostServicesTaskWork(state_machine.StateTaskWork): elif self._service == objects.HOST_SERVICES.CONTAINER: nfvi.nfvi_disable_container_host_services( self._host.uuid, self._host.name, self._host.personality, - self._callback()) + self._host.is_offline(), self._callback()) else: reason = ("Trying to disable unknown " "host service %s" % self._service) diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py index 52ce67db..21457233 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py @@ -284,6 +284,56 @@ class DisableHostTask(state_machine.StateTask): self._host.fsm.handle_event(HOST_EVENT.TASK_FAILED, event_data) +class OfflineHostTask(state_machine.StateTask): + """ + Offline Host Task + """ + def __init__(self, host): + from nfv_vim import objects + + self._host_reference = weakref.ref(host) + + task_work_list = list() + if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): + # Only disable the container services if we are not running in a + # single controller configuration. In a single controller + # configuration we keep the container services running. + from nfv_vim import directors + sw_mgmt_director = directors.get_sw_mgmt_director() + if not sw_mgmt_director.single_controller: + task_work_list.append(DisableHostServicesTaskWork( + self, host, objects.HOST_SERVICES.CONTAINER)) + + super(OfflineHostTask, self).__init__( + 'offline-host_%s' % host.name, task_work_list) + + @property + def _host(self): + """ + Returns the host + """ + host = self._host_reference() + return host + + def complete(self, result, reason): + """ + Disable Host Task Complete + """ + if self.aborted(): + DLOG.debug("Task (%s) complete, but has been aborted." % self._name) + else: + DLOG.debug("Task (%s) complete." % self._name) + + event_data = dict() + event_data['reason'] = reason + + if state_machine.STATE_TASK_RESULT.SUCCESS == result: + self._host.fsm.handle_event(HOST_EVENT.TASK_COMPLETED, + event_data) + else: + self._host.fsm.handle_event(HOST_EVENT.TASK_FAILED, event_data) + + class FailHostTask(state_machine.StateTask): """ Fail Host Task diff --git a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py index 5844038f..348f64d9 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py @@ -96,14 +96,14 @@ def nfvi_upgrade_complete(callback): def nfvi_disable_container_host_services(host_uuid, host_name, - host_personality, + host_personality, host_offline, callback): """ Disable container services on a host """ cmd_id = _infrastructure_plugin.invoke_plugin( 'disable_host_services', - host_uuid, host_name, host_personality, + host_uuid, host_name, host_personality, host_offline, callback=callback) return cmd_id diff --git a/nfv/nfv-vim/nfv_vim/nfvi/api/v1/_nfvi_infrastructure_api.py b/nfv/nfv-vim/nfv_vim/nfvi/api/v1/_nfvi_infrastructure_api.py index 4395318e..98322ec8 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/api/v1/_nfvi_infrastructure_api.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/api/v1/_nfvi_infrastructure_api.py @@ -94,7 +94,7 @@ class NFVIInfrastructureAPI(object): @abc.abstractmethod def disable_host_services(self, future, host_uuid, - host_name, host_personality, + host_name, host_personality, host_offline, callback): """ Disable infrastructure host services using the plugin