From 28d9e9be13805a90fa7b4a2c824cd10c8b7ea278 Mon Sep 17 00:00:00 2001 From: Adil Date: Fri, 4 Jun 2021 13:17:23 -0300 Subject: [PATCH] Update in NVIDIA T4 GPU Support Updated documentation in Node Management Guide Input from Jira comments fixed formatting issues This is a new review: https://review.opendev.org/c/starlingx/docs/+/794859 Previous review merged: https://review.opendev.org/c/starlingx/docs/+/785251 Signed-off-by: Adil Change-Id: I9aef3f5487e6e28986521c88748bb09b8c465f84 --- ...vidia-gpu-operator-for-pci-passthrough.rst | 23 +++++++++---------- .../delete-the-gpu-operator.rst | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/source/node_management/kubernetes/hardware_acceleration_devices/configure-nvidia-gpu-operator-for-pci-passthrough.rst b/doc/source/node_management/kubernetes/hardware_acceleration_devices/configure-nvidia-gpu-operator-for-pci-passthrough.rst index 17f9ca2f2..0abc22071 100644 --- a/doc/source/node_management/kubernetes/hardware_acceleration_devices/configure-nvidia-gpu-operator-for-pci-passthrough.rst +++ b/doc/source/node_management/kubernetes/hardware_acceleration_devices/configure-nvidia-gpu-operator-for-pci-passthrough.rst @@ -92,16 +92,14 @@ Use the following steps to configure the GPU Operator container: .. code-block:: none ~(keystone_admin)]$ kubectl get pods –A - NAMESPACE NAME READY STATUS RESTART AGE - default g-node.. 1/1 Running 1 7h54m - default g-node.. 1/1 Running 1 7h54m - default gpu-ope. 1/1 Running 1 7h54m - gpu-operator-resources gpu-.. 1/1 Running 4 28m - gpu-operator-resources nvidia.. 1/1 Running 0 28m - gpu-operator-resources nvidia.. 1/1 Running 0 28m - gpu-operator-resources nvidia.. 1/1 Running 0 28m - gpu-operator-resources nvidia.. 0/1 Completed 0 7h53m - gpu-operator-resources nvidia.. 1/1 Running 0 28m + NAMESPACE NAME READY STATUS RESTART AGE + default gpu-operator-596c49cb9b-2tdlw 1/1 Running 1 24h + default gpu-operator-node-feature-discovery-master-7f87b4d6bb-wsbn4 1/1 Running 2 24h + default gpu-operator-node-feature-discovery-worker-hqzvw 1/1 Running 4 24h + gpu-operator-resources nvidia-container-toolkit-daemonset-8f7nl 1/1 Running 0 14h + gpu-operator-resources nvidia-device-plugin-daemonset-g9lmk 1/1 Running 0 14h + gpu-operator-resources nvidia-device-plugin-validation 0/1 Pending 0 24h + gpu-operator-resources nvidia-driver-daemonset-9mnwr 1/1 Running 0 14h The plugin validation pod is marked completed. @@ -126,8 +124,9 @@ Use the following steps to configure the GPU Operator container: runtimeClassName: nvidia containers: - name: nvidia-usage-example-pod - image: nvidia/samples:cuda10.2-vectorAdd - imagePullPolicy: IfNotPresent command: [ "/bin/bash", "-c", "--" ] + image: nvidia/samples:cuda10.2-vectorAdd + imagePullPolicy: IfNotPresent + command: [ "/bin/bash", "-c", "--" ] args: [ "while true; do sleep 300000; done;" ] resources: requests: diff --git a/doc/source/node_management/kubernetes/hardware_acceleration_devices/delete-the-gpu-operator.rst b/doc/source/node_management/kubernetes/hardware_acceleration_devices/delete-the-gpu-operator.rst index 6791f6caa..b1858766d 100644 --- a/doc/source/node_management/kubernetes/hardware_acceleration_devices/delete-the-gpu-operator.rst +++ b/doc/source/node_management/kubernetes/hardware_acceleration_devices/delete-the-gpu-operator.rst @@ -20,7 +20,7 @@ Ensure that all user generated pods with access to `nvidia.com/gpu` resources ar .. code-block:: none - ~(keystone_admin)]$ helm delete --purge gpu-operator + ~(keystone_admin)]$ helm delete gpu-operator ~(keystone_admin)]$ kubectl delete runtimeclasses.node.k8s.io nvidia #. Remove the GPU Operator, and remove the service parameter platform