Add GPU reporting to idrac-wsman inspect interface
This patch implements reporting number of NVIDIA Tesla T4 devices connected to a system by discovering such devices and reporting them through capability 'pci_gpu_devices'. Change-Id: If713895f05f08a9827c4c085108abb3e388b2a2e Story: 2008118 Task: 40839 Depends-On: https://review.opendev.org/#/c/750364/
This commit is contained in:
parent
e2d0f3fd07
commit
101fc29686
@ -259,6 +259,7 @@ The inspection discovers the following properties:
|
|||||||
Extra capabilities:
|
Extra capabilities:
|
||||||
|
|
||||||
* ``boot_mode``: UEFI or BIOS boot mode.
|
* ``boot_mode``: UEFI or BIOS boot mode.
|
||||||
|
* ``pci_gpu_devices``: number of GPU devices connected to the bare metal.
|
||||||
|
|
||||||
It also creates baremetal ports for each NIC port detected in the system.
|
It also creates baremetal ports for each NIC port detected in the system.
|
||||||
The ``idrac-wsman`` inspect interface discovers which NIC ports are
|
The ``idrac-wsman`` inspect interface discovers which NIC ports are
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
proliantutils>=2.10.0
|
proliantutils>=2.10.0
|
||||||
pysnmp>=4.3.0,<5.0.0
|
pysnmp>=4.3.0,<5.0.0
|
||||||
python-scciclient>=0.8.0
|
python-scciclient>=0.8.0
|
||||||
python-dracclient>=3.1.0,<6.0.0
|
python-dracclient>=5.1.0,<6.0.0
|
||||||
python-xclarityclient>=0.1.6
|
python-xclarityclient>=0.1.6
|
||||||
|
|
||||||
# The Redfish hardware type uses the Sushy library
|
# The Redfish hardware type uses the Sushy library
|
||||||
|
@ -49,6 +49,8 @@ class DracRedfishInspect(redfish_inspect.RedfishInspect):
|
|||||||
|
|
||||||
class DracWSManInspect(base.InspectInterface):
|
class DracWSManInspect(base.InspectInterface):
|
||||||
|
|
||||||
|
_GPU_SUPPORTED_LIST = {"TU104GL [Tesla T4]"}
|
||||||
|
|
||||||
def get_properties(self):
|
def get_properties(self):
|
||||||
"""Return the properties of the interface.
|
"""Return the properties of the interface.
|
||||||
|
|
||||||
@ -98,9 +100,12 @@ class DracWSManInspect(base.InspectInterface):
|
|||||||
properties['cpu_arch'] = 'x86_64' if cpus[0].arch64 else 'x86'
|
properties['cpu_arch'] = 'x86_64' if cpus[0].arch64 else 'x86'
|
||||||
|
|
||||||
bios_settings = client.list_bios_settings()
|
bios_settings = client.list_bios_settings()
|
||||||
|
video_controllers = client.list_video_controllers()
|
||||||
current_capabilities = node.properties.get('capabilities', '')
|
current_capabilities = node.properties.get('capabilities', '')
|
||||||
new_capabilities = {
|
new_capabilities = {
|
||||||
'boot_mode': bios_settings["BootMode"].current_value.lower()}
|
'boot_mode': bios_settings["BootMode"].current_value.lower(),
|
||||||
|
'pci_gpu_devices': self._calculate_gpus(video_controllers)}
|
||||||
|
|
||||||
capabilties = utils.get_updated_capabilities(current_capabilities,
|
capabilties = utils.get_updated_capabilities(current_capabilities,
|
||||||
new_capabilities)
|
new_capabilities)
|
||||||
properties['capabilities'] = capabilties
|
properties['capabilities'] = capabilties
|
||||||
@ -190,6 +195,23 @@ class DracWSManInspect(base.InspectInterface):
|
|||||||
else:
|
else:
|
||||||
return cpu.cores
|
return cpu.cores
|
||||||
|
|
||||||
|
def _calculate_gpus(self, video_controllers):
|
||||||
|
"""Find actual GPU count.
|
||||||
|
|
||||||
|
This method reports number of NVIDIA Tesla T4 GPU devices present
|
||||||
|
on the server.
|
||||||
|
|
||||||
|
:param video_controllers: list of video controllers.
|
||||||
|
|
||||||
|
:returns: returns total gpu count.
|
||||||
|
"""
|
||||||
|
gpu_cnt = 0
|
||||||
|
for video_controller in video_controllers:
|
||||||
|
for gpu in self._GPU_SUPPORTED_LIST:
|
||||||
|
if video_controller.description == gpu:
|
||||||
|
gpu_cnt += 1
|
||||||
|
return gpu_cnt
|
||||||
|
|
||||||
def _get_pxe_dev_nics(self, client, nics, node):
|
def _get_pxe_dev_nics(self, client, nics, node):
|
||||||
"""Get a list of pxe device interfaces.
|
"""Get a list of pxe device interfaces.
|
||||||
|
|
||||||
|
@ -135,6 +135,23 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
'PxeDev4Interface': None}
|
'PxeDev4Interface': None}
|
||||||
nic_settings = {'LegacyBootProto': {'current_value': 'PXE'},
|
nic_settings = {'LegacyBootProto': {'current_value': 'PXE'},
|
||||||
'FQDD': 'NIC.Embedded.1-1-1'}
|
'FQDD': 'NIC.Embedded.1-1-1'}
|
||||||
|
video_controllers = [
|
||||||
|
{'id': 'Video.Embedded.1-1',
|
||||||
|
'description': 'Integrated Matrox G200eW3 Graphics Controller',
|
||||||
|
'function_number': 0,
|
||||||
|
'manufacturer': 'Matrox Electronics Systems Ltd.',
|
||||||
|
'pci_device_id': '0536',
|
||||||
|
'pci_vendor_id': '102B',
|
||||||
|
'pci_subdevice_id': '0737',
|
||||||
|
'pci_subvendor_id': '1028'},
|
||||||
|
{'id': 'Video.Slot.7-1',
|
||||||
|
'description': 'TU104GL [Tesla T4]',
|
||||||
|
'function_number': 0,
|
||||||
|
'manufacturer': 'NVIDIA Corporation',
|
||||||
|
'pci_device_id': '1EB8',
|
||||||
|
'pci_vendor_id': '10DE',
|
||||||
|
'pci_subdevice_id': '12A2',
|
||||||
|
'pci_subvendor_id': '10DE'}]
|
||||||
|
|
||||||
self.memory = [test_utils.dict_to_namedtuple(values=m) for m in memory]
|
self.memory = [test_utils.dict_to_namedtuple(values=m) for m in memory]
|
||||||
self.cpus = [test_utils.dict_to_namedtuple(values=c) for c in cpus]
|
self.cpus = [test_utils.dict_to_namedtuple(values=c) for c in cpus]
|
||||||
@ -146,6 +163,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
self.bios_boot_settings = test_utils.dict_of_object(bios_boot_settings)
|
self.bios_boot_settings = test_utils.dict_of_object(bios_boot_settings)
|
||||||
self.uefi_boot_settings = test_utils.dict_of_object(uefi_boot_settings)
|
self.uefi_boot_settings = test_utils.dict_of_object(uefi_boot_settings)
|
||||||
self.nic_settings = test_utils.dict_of_object(nic_settings)
|
self.nic_settings = test_utils.dict_of_object(nic_settings)
|
||||||
|
self.video_controllers = [test_utils.dict_to_namedtuple(values=vc)
|
||||||
|
for vc in video_controllers]
|
||||||
|
|
||||||
def test_get_properties(self):
|
def test_get_properties(self):
|
||||||
expected = drac_common.COMMON_PROPERTIES
|
expected = drac_common.COMMON_PROPERTIES
|
||||||
@ -161,7 +180,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
'local_gb': 1116,
|
'local_gb': 1116,
|
||||||
'cpus': 18,
|
'cpus': 18,
|
||||||
'cpu_arch': 'x86_64',
|
'cpu_arch': 'x86_64',
|
||||||
'capabilities': 'boot_mode:uefi'}
|
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||||
mock_client = mock.Mock()
|
mock_client = mock.Mock()
|
||||||
mock_get_drac_client.return_value = mock_client
|
mock_get_drac_client.return_value = mock_client
|
||||||
mock_client.list_memory.return_value = self.memory
|
mock_client.list_memory.return_value = self.memory
|
||||||
@ -169,6 +188,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
||||||
mock_client.list_nics.return_value = self.nics
|
mock_client.list_nics.return_value = self.nics
|
||||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = \
|
||||||
|
self.video_controllers
|
||||||
|
|
||||||
with task_manager.acquire(self.context, self.node.uuid,
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
shared=True) as task:
|
shared=True) as task:
|
||||||
@ -191,6 +212,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
mock_client.list_virtual_disks.side_effect = (
|
mock_client.list_virtual_disks.side_effect = (
|
||||||
drac_exceptions.BaseClientException('boom'))
|
drac_exceptions.BaseClientException('boom'))
|
||||||
mock_client.list_bios_settings.return_value = self.bios_boot_settings
|
mock_client.list_bios_settings.return_value = self.bios_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = \
|
||||||
|
self.video_controllers
|
||||||
|
|
||||||
with task_manager.acquire(self.context, self.node.uuid,
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
shared=True) as task:
|
shared=True) as task:
|
||||||
@ -207,7 +230,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
'local_gb': 279,
|
'local_gb': 279,
|
||||||
'cpus': 18,
|
'cpus': 18,
|
||||||
'cpu_arch': 'x86_64',
|
'cpu_arch': 'x86_64',
|
||||||
'capabilities': 'boot_mode:uefi'}
|
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||||
mock_client = mock.Mock()
|
mock_client = mock.Mock()
|
||||||
mock_get_drac_client.return_value = mock_client
|
mock_get_drac_client.return_value = mock_client
|
||||||
mock_client.list_memory.return_value = self.memory
|
mock_client.list_memory.return_value = self.memory
|
||||||
@ -216,6 +239,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||||
mock_client.list_nics.return_value = self.nics
|
mock_client.list_nics.return_value = self.nics
|
||||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = \
|
||||||
|
self.video_controllers
|
||||||
|
|
||||||
with task_manager.acquire(self.context, self.node.uuid,
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
shared=True) as task:
|
shared=True) as task:
|
||||||
@ -239,12 +264,94 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||||
mock_client.list_nics.return_value = self.nics
|
mock_client.list_nics.return_value = self.nics
|
||||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = \
|
||||||
|
self.video_controllers
|
||||||
|
|
||||||
with task_manager.acquire(self.context, self.node.uuid,
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
shared=True) as task:
|
shared=True) as task:
|
||||||
self.assertRaises(exception.HardwareInspectionFailure,
|
self.assertRaises(exception.HardwareInspectionFailure,
|
||||||
task.driver.inspect.inspect_hardware, task)
|
task.driver.inspect.inspect_hardware, task)
|
||||||
|
|
||||||
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
|
autospec=True)
|
||||||
|
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||||
|
def test_inspect_hardware_no_supported_gpu(self, mock_port_create,
|
||||||
|
mock_get_drac_client):
|
||||||
|
controllers = [
|
||||||
|
{'id': 'Video.Embedded.1-1',
|
||||||
|
'description': 'Integrated Matrox G200eW3 Graphics Controller',
|
||||||
|
'function_number': 0,
|
||||||
|
'manufacturer': 'Matrox Electronics Systems Ltd.',
|
||||||
|
'pci_device_id': '0536',
|
||||||
|
'pci_vendor_id': '102B',
|
||||||
|
'pci_subdevice_id': '0737',
|
||||||
|
'pci_subvendor_id': '1028'},
|
||||||
|
{'id': 'Video.Slot.7-1',
|
||||||
|
'description': 'GV100GL [Tesla V100 PCIe 16GB]]',
|
||||||
|
'function_number': 0,
|
||||||
|
'manufacturer': 'NVIDIA Corporation',
|
||||||
|
'pci_device_id': '1DB4',
|
||||||
|
'pci_vendor_id': '10DE',
|
||||||
|
'pci_subdevice_id': '1214',
|
||||||
|
'pci_subvendor_id': '10DE'}]
|
||||||
|
|
||||||
|
expected_node_properties = {
|
||||||
|
'memory_mb': 32768,
|
||||||
|
'local_gb': 279,
|
||||||
|
'cpus': 18,
|
||||||
|
'cpu_arch': 'x86_64',
|
||||||
|
'capabilities': 'boot_mode:uefi,pci_gpu_devices:0'}
|
||||||
|
mock_client = mock.Mock()
|
||||||
|
mock_get_drac_client.return_value = mock_client
|
||||||
|
mock_client.list_memory.return_value = self.memory
|
||||||
|
mock_client.list_cpus.return_value = self.cpus
|
||||||
|
mock_client.list_virtual_disks.return_value = []
|
||||||
|
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||||
|
mock_client.list_nics.return_value = self.nics
|
||||||
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
video_controllers = [test_utils.dict_to_namedtuple(values=vc)
|
||||||
|
for vc in controllers]
|
||||||
|
mock_client.list_video_controllers.return_value = video_controllers
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=True) as task:
|
||||||
|
return_value = task.driver.inspect.inspect_hardware(task)
|
||||||
|
|
||||||
|
self.node.refresh()
|
||||||
|
self.assertEqual(expected_node_properties, self.node.properties)
|
||||||
|
self.assertEqual(states.MANAGEABLE, return_value)
|
||||||
|
self.assertEqual(2, mock_port_create.call_count)
|
||||||
|
|
||||||
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
|
autospec=True)
|
||||||
|
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||||
|
def test_inspect_hardware_no_gpu(self, mock_port_create,
|
||||||
|
mock_get_drac_client):
|
||||||
|
expected_node_properties = {
|
||||||
|
'memory_mb': 32768,
|
||||||
|
'local_gb': 279,
|
||||||
|
'cpus': 18,
|
||||||
|
'cpu_arch': 'x86_64',
|
||||||
|
'capabilities': 'boot_mode:uefi,pci_gpu_devices:0'}
|
||||||
|
mock_client = mock.Mock()
|
||||||
|
mock_get_drac_client.return_value = mock_client
|
||||||
|
mock_client.list_memory.return_value = self.memory
|
||||||
|
mock_client.list_cpus.return_value = self.cpus
|
||||||
|
mock_client.list_virtual_disks.return_value = []
|
||||||
|
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||||
|
mock_client.list_nics.return_value = self.nics
|
||||||
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = []
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=True) as task:
|
||||||
|
return_value = task.driver.inspect.inspect_hardware(task)
|
||||||
|
|
||||||
|
self.node.refresh()
|
||||||
|
self.assertEqual(expected_node_properties, self.node.properties)
|
||||||
|
self.assertEqual(states.MANAGEABLE, return_value)
|
||||||
|
self.assertEqual(2, mock_port_create.call_count)
|
||||||
|
|
||||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||||
@ -255,7 +362,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
'local_gb': 1116,
|
'local_gb': 1116,
|
||||||
'cpus': 18,
|
'cpus': 18,
|
||||||
'cpu_arch': 'x86_64',
|
'cpu_arch': 'x86_64',
|
||||||
'capabilities': 'boot_mode:uefi'}
|
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||||
mock_client = mock.Mock()
|
mock_client = mock.Mock()
|
||||||
mock_get_drac_client.return_value = mock_client
|
mock_get_drac_client.return_value = mock_client
|
||||||
mock_client.list_memory.return_value = self.memory
|
mock_client.list_memory.return_value = self.memory
|
||||||
@ -263,6 +370,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
|||||||
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
||||||
mock_client.list_nics.return_value = self.nics
|
mock_client.list_nics.return_value = self.nics
|
||||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||||
|
mock_client.list_video_controllers.return_value = \
|
||||||
|
self.video_controllers
|
||||||
|
|
||||||
mock_port_create.side_effect = exception.MACAlreadyExists("boom")
|
mock_port_create.side_effect = exception.MACAlreadyExists("boom")
|
||||||
|
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Adds support in ``idrac-wsman`` inspect hardware interface for reporting
|
||||||
|
number of GPU devices connected to a system. This information is advertised
|
||||||
|
through capability ``pci_gpu_devices``, which can be used to make
|
||||||
|
scheduling decisions for the node. Currently, NVIDIA Tesla T4 GPU devices
|
||||||
|
are reported.
|
Loading…
Reference in New Issue
Block a user