# Copyright 2014 Intel Corporation. # All Rights Reserved. # # Author: Zhai Edwin # Author: Gao Fengqian # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. """Node manager engine to collect power and temperature of compute node. Intel Node Manager Technology enables the datacenter IT to monitor and control actual server power, thermal and compute utlization behavior through industry defined standard IPMI. This file provides Node Manager engine to get simple system power and temperature data based on ipmitool. """ import binascii import collections import tempfile import time from oslo.config import cfg from ceilometer.ipmi.platform import exception as nmexcept from ceilometer.ipmi.platform import ipmitool from ceilometer.openstack.common.gettextutils import _ OPTS = [ cfg.IntOpt('node_manager_init_retry', default=3, help='Number of retries upon Intel Node ' 'Manager initialization failure') ] CONF = cfg.CONF CONF.register_opts(OPTS, group='ipmi') IPMICMD = {"sdr_dump": "sdr dump", "sdr_info": "sdr info", "sensor_dump": "sdr -v"} IPMIRAWCMD = {"get_device_id": "raw 0x06 0x01", "init_sensor_agent": "raw 0x0a 0x2c 0x01", "init_complete": "raw 0x0a 0x2c 0x00", "init_sensor_agent_status": "raw 0x0a 0x2c 0x00", "read_power_all": "raw 0x2e 0xc8 0x57 0x01 0x00 0x01 0x00 0x00", "read_temperature_all": "raw 0x2e 0xc8 0x57 0x01 0x00 0x02 0x00 0x00"} MANUFACTURER_ID_INTEL = ['57', '01', '00'] INTEL_PREFIX = '5701000d01' # The template dict are made according to the spec. It contains the expected # length of each item. And it can be used to parse the output of IPMI command. ONE_RETURN_TEMPLATE = {"ret": 1} BMC_INFO_TEMPLATE = collections.OrderedDict() BMC_INFO_TEMPLATE['Device_ID'] = 1 BMC_INFO_TEMPLATE['Device_Revision'] = 1 BMC_INFO_TEMPLATE['Firmware_Revision_1'] = 1 BMC_INFO_TEMPLATE['Firmware_Revision_2'] = 1 BMC_INFO_TEMPLATE['IPMI_Version'] = 1 BMC_INFO_TEMPLATE['Additional_Device_support'] = 1 BMC_INFO_TEMPLATE['Manufacturer_ID'] = 3 BMC_INFO_TEMPLATE['Product_ID'] = 2 BMC_INFO_TEMPLATE['Auxiliary_Firmware_Revision'] = 4 NM_STATISTICS_TEMPLATE = collections.OrderedDict() NM_STATISTICS_TEMPLATE['Manufacturer_ID'] = 3 NM_STATISTICS_TEMPLATE['Current_value'] = 2 NM_STATISTICS_TEMPLATE['Minimum_value'] = 2 NM_STATISTICS_TEMPLATE['Maximum_value'] = 2 NM_STATISTICS_TEMPLATE['Average_value'] = 2 NM_STATISTICS_TEMPLATE['Time_stamp'] = 4 NM_STATISTICS_TEMPLATE['Report_period'] = 4 NM_STATISTICS_TEMPLATE["DomainID_PolicyState"] = 1 NM_GET_DEVICE_ID_TEMPLATE = collections.OrderedDict() NM_GET_DEVICE_ID_TEMPLATE['Device_ID'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Device_revision'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Firmware_revision_1'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Firmware_Revision_2'] = 1 NM_GET_DEVICE_ID_TEMPLATE['IPMI_Version'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Additinal_Device_support'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Manufacturer_ID'] = 3 NM_GET_DEVICE_ID_TEMPLATE['Product_ID_min_version'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Product_ID_major_version'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Implemented_firmware'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Firmware_build_number'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Last_digit_firmware_build_number'] = 1 NM_GET_DEVICE_ID_TEMPLATE['Image_flags'] = 1 def _hex(list=None): """Format the return value in list into hex.""" list = list or [] if list: list.reverse() return int(''.join(list), 16) return 0 class NodeManager(object): """The python implementation of Intel Node Manager engine using ipmitool The class implements the engine to read power and temperature of compute node. It uses ipmitool to execute the IPMI command and parse the output into dict. """ _inited = False _instance = None def __new__(cls, *args, **kwargs): """Singleton to avoid duplicated initialization.""" if not cls._instance: cls._instance = super(NodeManager, cls).__new__(cls, *args, **kwargs) return cls._instance def __init__(self): if not (self._instance and self._inited): self.nm_support = False self.channel_slave = '' self._inited = True self.nm_support = self.check_node_manager() @staticmethod def _parse_slave_and_channel(file_path): """Parse the dumped file to get slave address and channel number. :param file_path: file path of dumped SDR file. :return: slave address and channel number of target device. """ ret = None prefix = INTEL_PREFIX # According to Intel Node Manager spec, section 4.5, for Intel NM # discovery OEM SDR records are type C0h. It contains manufacture ID # and OEM data in the record body. # 0-2 bytes are OEM ID, byte 3 is 0Dh and byte 4 is 01h. Byte 5, 6 # is Intel NM device slave address and channel number/sensor owner LUN. with open(file_path, 'rb') as bin_fp: for line in bin_fp.readlines(): if line: data_str = binascii.hexlify(line) if prefix in data_str: oem_id_index = data_str.index(prefix) ret = data_str[oem_id_index + len(prefix): oem_id_index + len(prefix) + 4] # Byte 5 is slave address. [7:4] from byte 6 is channel # number, so just pick ret[2] here. ret = (ret[0:2], ret[2]) break return ret @ipmitool.execute_ipmi_cmd(BMC_INFO_TEMPLATE) def get_device_id(self): """IPMI command GET_DEVICE_ID.""" return IPMIRAWCMD["get_device_id"] @ipmitool.execute_ipmi_cmd(ONE_RETURN_TEMPLATE) def _init_sensor_agent(self): """Run initialization agent.""" return IPMIRAWCMD["init_sensor_agent"] @ipmitool.execute_ipmi_cmd(ONE_RETURN_TEMPLATE) def _init_sensor_agent_process(self): """Check the status of initialization agent.""" return IPMIRAWCMD["init_sensor_agent_status"] @ipmitool.execute_ipmi_cmd() def _dump_sdr_file(self, data_file=""): """Dump SDR into a file.""" return IPMICMD["sdr_dump"] + " " + data_file @ipmitool.execute_ipmi_cmd(NM_GET_DEVICE_ID_TEMPLATE) def _node_manager_get_device_id(self): """GET_DEVICE_ID command in Intel Node Manager Different from IPMI command GET_DEVICE_ID, it contains more information of Intel Node Manager. """ return self.channel_slave + ' ' + IPMIRAWCMD["get_device_id"] @ipmitool.execute_ipmi_cmd(NM_STATISTICS_TEMPLATE) def _read_power_all(self): """Get the power consumption of the whole platform.""" return self.channel_slave + ' ' + IPMIRAWCMD['read_power_all'] @ipmitool.execute_ipmi_cmd(NM_STATISTICS_TEMPLATE) def _read_temperature_all(self): """Get the temperature info of the whole platform.""" return self.channel_slave + ' ' + IPMIRAWCMD['read_temperature_all'] def read_power_all(self): if self.nm_support: return self._read_power_all() return {} def read_temperature_all(self): if self.nm_support: return self._read_temperature_all() return {} def init_node_manager(self): if self._init_sensor_agent_process()['ret'] == ['01']: return # Run sensor initialization agent for i in range(CONF.ipmi.node_manager_init_retry): self._init_sensor_agent() time.sleep(1) if self._init_sensor_agent_process()['ret'] == ['01']: return raise nmexcept.NodeManagerException(_('Node Manager init failed')) def discover_slave_channel(self): """Discover target slave address and channel number.""" file_path = tempfile.mkstemp()[1] self._dump_sdr_file(data_file=file_path) ret = self._parse_slave_and_channel(file_path) slave_address = ''.join(['0x', ret[0]]) channel = ''.join(['0x', ret[1]]) # String of channel and slave_address self.channel_slave = '-b ' + channel + ' -t ' + slave_address def node_manager_support(self): """Intel Node Manager capability checking This function is used to detect if compute node support Intel Node Manager or not and parse out the slave address and channel number of node manager. """ self.manufacturer_id = self.get_device_id()['Manufacturer_ID'] if MANUFACTURER_ID_INTEL != self.manufacturer_id: # If the manufacturer is not Intel, just set False and return. return False self.discover_slave_channel() support = self._node_manager_get_device_id()['Implemented_firmware'] # According to Intel Node Manager spec, return value of GET_DEVICE_ID, # bits 3 to 0 shows if Intel NM implemented or not. if int(support[0], 16) & 0xf != 0: return True else: return False def check_node_manager(self): """Intel Node Manager init and check This function is used to initialize Intel Node Manager and check the capability without throwing exception. It's safe to call it on non-NodeManager platform. """ try: self.init_node_manager() has_nm = self.node_manager_support() except (nmexcept.NodeManagerException, nmexcept.IPMIException): return False return has_nm