raise/clear openstack alarms/logs to fm in pods

if the entity_type of alarms/logs "instance", they
are related to VM instance and should be managed by
openstack. So we need to raise/clear them by restful
api of fm services in pods.

changes:
1. point vim webserver to fm in pods
2. change alarm/log interface of nfvi
3. change nfvi alarm/log configure file

add param "fault_management_pod_disabled" with 'True' as
the default value into config file to disable nfvi raise
alarms to containerized fm. it will be removed in future

Depends-On: https://review.opendev.org/658740/
Depends-On: https://review.opendev.org/648814/
Story: 2004008
Task: 28876
Change-Id: I373f1f8cd6a69b96f70c99c46e6580002c09efc0
Signed-off-by: SidneyAn <ran1.an@intel.com>
This commit is contained in:
SidneyAn 2019-06-05 10:55:53 +08:00
parent c80b323895
commit ded3f2df13
7 changed files with 336 additions and 31 deletions

View File

@ -4,4 +4,21 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
[File-Storage] [File-Storage]
file=/var/log/nfv-vim-alarms.log file=/var/log/nfv-vim-alarms.log
[openstack]
username=admin
tenant=admin
authorization_protocol=http
authorization_ip=keystone-api.openstack.svc.cluster.local
authorization_port=5000
user_domain_name=Default
project_domain_name=Default
keyring_service=CGCS
[fm]
region_name=RegionOne
service_name=fm
service_type=alarming
endpoint_type=admin
endpoint_disabled=false

View File

@ -5,6 +5,8 @@
# #
from fm_api import constants as fm_constants from fm_api import constants as fm_constants
from fm_api import fm_api from fm_api import fm_api
import json
from six.moves import http_client as httplib
from nfv_common import debug from nfv_common import debug
@ -12,6 +14,10 @@ import nfv_common.alarm.handlers.v1 as alarm_handlers_v1
import nfv_common.alarm.objects.v1 as alarm_objects_v1 import nfv_common.alarm.objects.v1 as alarm_objects_v1
from nfv_plugins.alarm_handlers import config from nfv_plugins.alarm_handlers import config
from nfv_plugins.nfvi_plugins.openstack import exceptions
from nfv_plugins.nfvi_plugins.openstack import fm
from nfv_plugins.nfvi_plugins.openstack.objects import OPENSTACK_SERVICE
from nfv_plugins.nfvi_plugins.openstack import openstack
DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm') DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
@ -129,8 +135,15 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
_provider = 'Wind River' _provider = 'Wind River'
_signature = 'e33d7cf6-f270-4256-893e-16266ee4dd2e' _signature = 'e33d7cf6-f270-4256-893e-16266ee4dd2e'
_alarm_db = dict() _platform_alarm_db = dict()
_openstack_alarm_db = dict()
_fm_api = None _fm_api = None
_openstack_token = None
_openstack_directory = None
_openstack_fm_endpoint_disabled = False
# This flag is used to disable raising alarm to containerized fm
# and will be removed in future.
_fault_management_pod_disabled = True
@property @property
def name(self): def name(self):
@ -148,9 +161,23 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
def signature(self): def signature(self):
return self._signature return self._signature
def raise_alarm(self, alarm_uuid, alarm_data): @property
DLOG.debug("Raising alarm, uuid=%s." % alarm_uuid) def openstack_fm_endpoint_disabled(self):
return self._openstack_fm_endpoint_disabled
@property
def openstack_token(self):
if self._openstack_token is None or \
self._openstack_token.is_expired():
self._openstack_token = openstack.get_token(self._openstack_directory)
if self._openstack_token is None:
raise Exception("OpenStack get-token did not complete.")
return self._openstack_token
def _format_alarm(self, alarm_data):
fault = None
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_data.alarm_type, None) fm_alarm_id = _fm_alarm_id_mapping.get(alarm_data.alarm_type, None)
if fm_alarm_id is not None: if fm_alarm_id is not None:
fm_alarm_type = _fm_alarm_type_mapping[alarm_data.event_type] fm_alarm_type = _fm_alarm_type_mapping[alarm_data.event_type]
@ -167,38 +194,147 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
alarm_data.suppression_allowed, alarm_data.suppression_allowed,
fm_uuid, fm_uuid,
timestamp=alarm_data.raised_timestamp) timestamp=alarm_data.raised_timestamp)
return fault
response = self._fm_api.set_fault(fault) def _raise_openstack_alarm(self, format_alarm):
if response is None: if self.openstack_fm_endpoint_disabled:
self._alarm_db[alarm_uuid] = (alarm_data, None) DLOG.error("Openstack fm endpoint is disabled when raise openstack alarm.")
return None
try:
result = fm.raise_alarm(self.openstack_token, format_alarm)
result_data = json.loads(result.result_data)
if result_data is not None:
return result_data["uuid"]
else:
return None
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
if self._openstack_token is not None:
self._openstack_token.set_expired()
else:
DLOG.exception("Caught exception while trying to raise openstack alarm, "
"error=%s." % e)
except Exception as e:
DLOG.exception("Caught exception while trying to raise openstack alarm, "
"error=%s." % e)
def raise_alarm(self, alarm_uuid, alarm_data):
DLOG.debug("Raising alarm, uuid=%s." % alarm_uuid)
fault = self._format_alarm(alarm_data)
if fault is not None:
# conditional statement 'self._fault_management_pod_disabled' is used
# to disable raising alarm to containerized fm and will be removed in future.
if "instance" in alarm_data.entity_type and (not self._fault_management_pod_disabled):
fm_uuid = self._raise_openstack_alarm(fault.as_dict())
self._openstack_alarm_db[alarm_uuid] = (alarm_data, fm_uuid)
else:
fm_uuid = self._fm_api.set_fault(fault)
self._platform_alarm_db[alarm_uuid] = (alarm_data, fm_uuid)
if fm_uuid is None:
DLOG.error("Failed to raise alarm, uuid=%s, fm_uuid=%s." DLOG.error("Failed to raise alarm, uuid=%s, fm_uuid=%s."
% (alarm_uuid, fm_uuid)) % (alarm_uuid, fm_uuid))
else: else:
fm_uuid = response
self._alarm_db[alarm_uuid] = (alarm_data, fm_uuid)
DLOG.info("Raised alarm, uuid=%s, fm_uuid=%s." DLOG.info("Raised alarm, uuid=%s, fm_uuid=%s."
% (alarm_uuid, fm_uuid)) % (alarm_uuid, fm_uuid))
else: else:
DLOG.error("Unknown alarm type (%s) given." % alarm_data.alarm_type) DLOG.error("Unknown alarm type (%s) given." % alarm_data.alarm_type)
def _clear_openstack_alarm(self, fm_uuid):
if self.openstack_fm_endpoint_disabled:
DLOG.error("Openstack fm endpoint is disabled when clear openstack alarm.")
return
if fm_uuid is None:
return
try:
fm.clear_alarm(self.openstack_token, fm_uuid)
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
if self._openstack_token is not None:
self._openstack_token.set_expired()
else:
DLOG.exception("Caught exception while trying to clear alarm %s, "
"error=%s." % (fm_uuid, e))
except Exception as e:
DLOG.exception("Caught exception while trying to clear alarm %s, "
"error=%s." % (fm_uuid, e))
def _clear_platform_alarm(self, alarm_uuid, alarm_data):
fm_alarm_id = _fm_alarm_id_mapping[alarm_data.alarm_type]
if self._fm_api.clear_fault(fm_alarm_id, alarm_data.entity):
DLOG.info("Cleared alarm, uuid=%s." % alarm_uuid)
else:
DLOG.error("Failed to clear alarm, uuid=%s." % alarm_uuid)
def clear_alarm(self, alarm_uuid): def clear_alarm(self, alarm_uuid):
DLOG.debug("Clearing alarm, uuid=%s." % alarm_uuid) DLOG.debug("Clearing alarm, uuid=%s." % alarm_uuid)
alarm_data, fm_uuid = self._alarm_db.get(alarm_uuid, (None, None)) alarm_data, fm_uuid = self._platform_alarm_db.get(alarm_uuid, (None, None))
if alarm_data is not None: if alarm_data is not None:
fm_alarm_id = _fm_alarm_id_mapping[alarm_data.alarm_type] self._clear_platform_alarm(alarm_uuid, alarm_data)
success = self._fm_api.clear_fault(fm_alarm_id, alarm_data.entity)
if success:
DLOG.info("Cleared alarm, uuid=%s." % alarm_uuid)
else:
DLOG.error("Failed to clear alarm, uuid=%s." % alarm_uuid)
# Always remove the alarm from our alarm db. If we failed to clear # Always remove the alarm from our alarm db. If we failed to clear
# the alarm, the audit will clear it later. # the alarm, the audit will clear it later.
del self._alarm_db[alarm_uuid] del self._platform_alarm_db[alarm_uuid]
def audit_alarms(self): alarm_data, fm_uuid = self._openstack_alarm_db.get(alarm_uuid, (None, None))
DLOG.debug("Auditing alarms.") if alarm_data is not None:
self._clear_openstack_alarm(fm_uuid)
del self._openstack_alarm_db[alarm_uuid]
def _audit_openstack_alarms(self):
DLOG.debug("Auditing openstack alarms.")
if self.openstack_fm_endpoint_disabled:
return
fm_alarms = dict()
try:
result = fm.get_alarms(self.openstack_token, OPENSTACK_SERVICE.FM)
fm_alarms = result.result_data["alarms"]
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
if self._openstack_token is not None:
self._openstack_token.set_expired()
else:
DLOG.exception("Caught exception while trying to audit openstack alarms, "
"error=%s." % e)
except Exception as e:
DLOG.exception("Caught exception while trying to audit openstack alarms, "
"error=%s." % e)
# Check for missing alarms needing to be raised
for alarm_uuid, (alarm_data, fm_uuid) in self._openstack_alarm_db.items():
if fm_uuid is None:
self.raise_alarm(alarm_uuid, alarm_data)
else:
for fm_alarm in fm_alarms:
if fm_uuid == fm_alarm["uuid"]:
break
else:
DLOG.info("Re-raise of alarm, uuid=%s." % alarm_uuid)
self.raise_alarm(alarm_uuid, alarm_data)
# Check for stale alarms needing to be cleared
for fm_alarm in fm_alarms:
for alarm_uuid, (alarm_data, fm_uuid) in self._openstack_alarm_db.items():
if fm_uuid == fm_alarm["uuid"]:
break
else:
DLOG.info("Clear stale alarm, fm_uuid=%s, fm_alarm_id=%s, "
"fm_entity_instance_id=%s."
% (fm_alarm["uuid"], fm_alarm["alarm_id"],
fm_alarm["entity_instance_id"]))
self._clear_openstack_alarm(fm_alarm["uuid"])
def _audit_platform_alarms(self):
DLOG.debug("Auditing platform alarms.")
for alarm_type in alarm_objects_v1.ALARM_TYPE: for alarm_type in alarm_objects_v1.ALARM_TYPE:
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_type, None) fm_alarm_id = _fm_alarm_id_mapping.get(alarm_type, None)
if fm_alarm_id is None: if fm_alarm_id is None:
@ -209,7 +345,7 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
continue continue
# Check for missing alarms needing to be raised # Check for missing alarms needing to be raised
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items(): for alarm_uuid, (alarm_data, fm_uuid) in self._platform_alarm_db.items():
if alarm_type == alarm_data.alarm_type: if alarm_type == alarm_data.alarm_type:
if fm_uuid is None: if fm_uuid is None:
self.raise_alarm(alarm_uuid, alarm_data) self.raise_alarm(alarm_uuid, alarm_data)
@ -224,7 +360,7 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
# Check for stale alarms needing to be cleared # Check for stale alarms needing to be cleared
for fm_fault in fm_faults: for fm_fault in fm_faults:
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items(): for alarm_uuid, (alarm_data, fm_uuid) in self._platform_alarm_db.items():
if fm_uuid == fm_fault.uuid: if fm_uuid == fm_fault.uuid:
break break
else: else:
@ -235,11 +371,30 @@ class FaultManagement(alarm_handlers_v1.AlarmHandler):
self._fm_api.clear_fault(fm_fault.alarm_id, self._fm_api.clear_fault(fm_fault.alarm_id,
fm_fault.entity_instance_id) fm_fault.entity_instance_id)
DLOG.debug("Audited alarms.")
def audit_alarms(self):
DLOG.debug("Auditing alarms begin.")
# conditional statement 'self._fault_management_pod_disabled' is used
# to disable raising alarm to containerized fm and will be removed in future.
if not self._fault_management_pod_disabled:
self._audit_openstack_alarms()
self._audit_platform_alarms()
DLOG.debug("Audited alarms end.")
def initialize(self, config_file): def initialize(self, config_file):
config.load(config_file) config.load(config_file)
self._openstack_directory = openstack.get_directory(
config, openstack.SERVICE_CATEGORY.OPENSTACK)
self._fm_api = fm_api.FaultAPIs() self._fm_api = fm_api.FaultAPIs()
DISABLED_LIST = ['Yes', 'yes', 'Y', 'y', 'True', 'true', 'T', 't', '1']
self._openstack_fm_endpoint_disabled = (config.CONF['fm']['endpoint_disabled'] in DISABLED_LIST)
# self._fault_management_pod_disabled is used to disable
# raising alarm to containerized fm and will be removed in future.
self._fault_management_pod_disabled = \
(config.CONF['openstack'].get('fault_management_pod_disabled', 'True') in DISABLED_LIST)
def finalize(self): def finalize(self):
return return

View File

@ -4,4 +4,21 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
[File-Storage] [File-Storage]
file=/var/log/nfv-vim-events.log file=/var/log/nfv-vim-events.log
[openstack]
username=admin
tenant=admin
authorization_protocol=http
authorization_ip=keystone-api.openstack.svc.cluster.local
authorization_port=5000
user_domain_name=Default
project_domain_name=Default
keyring_service=CGCS
[fm]
region_name=RegionOne
service_name=fm
service_type=alarming
endpoint_type=admin
endpoint_disabled=false

View File

@ -3,10 +3,12 @@
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
import json
import six import six
from fm_api import constants as fm_constants from fm_api import constants as fm_constants
from fm_api import fm_api from fm_api import fm_api
from six.moves import http_client as httplib
from nfv_common import debug from nfv_common import debug
@ -14,6 +16,10 @@ import nfv_common.event_log.handlers.v1 as event_log_handlers_v1
import nfv_common.event_log.objects.v1 as event_log_objects_v1 import nfv_common.event_log.objects.v1 as event_log_objects_v1
from nfv_plugins.event_log_handlers import config from nfv_plugins.event_log_handlers import config
from nfv_plugins.nfvi_plugins.openstack import exceptions
from nfv_plugins.nfvi_plugins.openstack import fm
from nfv_plugins.nfvi_plugins.openstack import openstack
DLOG = debug.debug_get_logger('nfv_plugins.event_log_handlers.fm') DLOG = debug.debug_get_logger('nfv_plugins.event_log_handlers.fm')
@ -338,6 +344,12 @@ class EventLogManagement(event_log_handlers_v1.EventLogHandler):
_log_db = dict() _log_db = dict()
_fm_api = None _fm_api = None
_openstack_token = None
_openstack_directory = None
_openstack_fm_endpoint_disabled = False
# _fault_management_pod_disabled is used to disable
# raising alarm to containerized fm and will be removed in future.
_fault_management_pod_disabled = True
@property @property
def name(self): def name(self):
@ -355,9 +367,23 @@ class EventLogManagement(event_log_handlers_v1.EventLogHandler):
def signature(self): def signature(self):
return self._signature return self._signature
def log(self, log_data): @property
DLOG.debug("Generating Customer Log") def openstack_fm_endpoint_disabled(self):
return self._openstack_fm_endpoint_disabled
@property
def openstack_token(self):
if self._openstack_token is None or \
self._openstack_token.is_expired():
self._openstack_token = openstack.get_token(self._openstack_directory)
if self._openstack_token is None:
raise Exception("OpenStack get-token did not complete.")
return self._openstack_token
def _format_log(self, log_data):
format_log = None
fm_event_id = _fm_event_id_mapping.get(log_data.event_id, None) fm_event_id = _fm_event_id_mapping.get(log_data.event_id, None)
if fm_event_id is not None: if fm_event_id is not None:
fm_event_type = _fm_event_type_mapping[log_data.event_type] fm_event_type = _fm_event_type_mapping[log_data.event_type]
@ -366,23 +392,70 @@ class EventLogManagement(event_log_handlers_v1.EventLogHandler):
fm_severity = _fm_event_importance_mapping[log_data.importance] fm_severity = _fm_event_importance_mapping[log_data.importance]
fm_uuid = None fm_uuid = None
fm_reason_text = six.text_type(log_data.reason_text) fm_reason_text = six.text_type(log_data.reason_text)
fault = fm_api.Fault(fm_event_id, fm_event_state, format_log = fm_api.Fault(fm_event_id, fm_event_state,
log_data.entity_type, log_data.entity, log_data.entity_type, log_data.entity,
fm_severity, fm_reason_text, fm_event_type, fm_severity, fm_reason_text, fm_event_type,
fm_probable_cause, "", False, True) fm_probable_cause, "", False, True)
response = self._fm_api.set_fault(fault) return format_log
if response is None:
def _raise_openstack_log(self, format_log):
if self.openstack_fm_endpoint_disabled:
DLOG.error("Openstack fm endpoint is disabled when raise openstack log.")
return None
try:
result = fm.raise_alarm(self.openstack_token, format_log)
result_data = json.loads(result.result_data)
if result_data is not None:
return result_data["uuid"]
else:
return None
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
if self._openstack_token is not None:
self._openstack_token.set_expired()
else:
DLOG.exception("Caught exception while trying to get token, "
"error=%s." % e)
except Exception as e:
DLOG.exception("Caught exception while trying to get token, "
"error=%s." % e)
def log(self, log_data):
DLOG.debug("Generating Customer Log")
fault = self._format_log(log_data)
if fault is not None:
fm_uuid = None
# conditional statements self._fault_management_pod_disabled
# is used to disable raising alarm to containerized fm and
# will be removed in future.
if "instance" in log_data.entity_type and (not self._fault_management_pod_disabled):
fm_uuid = self._raise_openstack_log(fault.as_dict())
else:
fm_uuid = self._fm_api.set_fault(fault)
if fm_uuid is None:
DLOG.error("Failed to generate customer log, fm_uuid=%s." DLOG.error("Failed to generate customer log, fm_uuid=%s."
% fm_uuid) % fm_uuid)
else: else:
fm_uuid = response
DLOG.info("Generated customer log, fm_uuid=%s." % fm_uuid) DLOG.info("Generated customer log, fm_uuid=%s." % fm_uuid)
else: else:
DLOG.error("Unknown event id (%s) given." % log_data.event_id) DLOG.error("Unknown event id (%s) given." % log_data.event_id)
def initialize(self, config_file): def initialize(self, config_file):
config.load(config_file) config.load(config_file)
self._openstack_directory = openstack.get_directory(
config, openstack.SERVICE_CATEGORY.OPENSTACK)
self._fm_api = fm_api.FaultAPIs() self._fm_api = fm_api.FaultAPIs()
DISABLED_LIST = ['Yes', 'yes', 'Y', 'y', 'True', 'true', 'T', 't', '1']
self._openstack_fm_endpoint_disabled = (config.CONF['fm']['endpoint_disabled'] in DISABLED_LIST)
# _fault_management_pod_disabled is used to disable raising alarm
# to containerized fm and will be removed in future.
self._fault_management_pod_disabled = \
(config.CONF['openstack'].get('fault_management_pod_disabled', 'True') in DISABLED_LIST)
def finalize(self): def finalize(self):
return return

View File

@ -6,11 +6,16 @@
from nfv_common import debug from nfv_common import debug
from nfv_plugins.nfvi_plugins.openstack.objects import OPENSTACK_SERVICE
from nfv_plugins.nfvi_plugins.openstack.objects import PLATFORM_SERVICE from nfv_plugins.nfvi_plugins.openstack.objects import PLATFORM_SERVICE
from nfv_plugins.nfvi_plugins.openstack.rest_api import rest_api_request from nfv_plugins.nfvi_plugins.openstack.rest_api import rest_api_request
import json
DLOG = debug.debug_get_logger('nfv_plugins.nfvi_plugins.openstack.fm') DLOG = debug.debug_get_logger('nfv_plugins.nfvi_plugins.openstack.fm')
# TODO(add service for following method)
def get_alarms(token, fm_service=PLATFORM_SERVICE.FM): def get_alarms(token, fm_service=PLATFORM_SERVICE.FM):
""" """
@ -82,3 +87,41 @@ def get_alarm_history(token, start=None, end=None, fm_service=PLATFORM_SERVICE.F
response = rest_api_request(token, "GET", api_cmd) response = rest_api_request(token, "GET", api_cmd)
return response return response
def raise_alarm(token, alarm_data="", fm_service=OPENSTACK_SERVICE.FM):
"""
Raise customer alarm to Fault Management
"""
url = token.get_service_url(fm_service)
if url is None:
raise ValueError("OpenStack FM URL is invalid")
api_cmd = url + "/alarms"
api_cmd_headers = dict()
api_cmd_headers['Content-Type'] = "application/json"
json_alarm_data = json.dumps(alarm_data)
response = rest_api_request(token, "POST", api_cmd, api_cmd_headers, json_alarm_data)
return response
def clear_alarm(token, fm_uuid="", fm_service=OPENSTACK_SERVICE.FM):
"""
Clear customer alarm to Fault Management
"""
url = token.get_service_url(fm_service)
if url is None:
raise ValueError("OpenStack FM URL is invalid")
api_cmd = url + "/alarms"
api_cmd_headers = dict()
api_cmd_headers['Content-Type'] = "application/json"
payload = ('{"id": "%s"}' % fm_uuid)
rest_api_request(token, "DELETE", api_cmd, api_cmd_headers, payload)
return

View File

@ -58,7 +58,7 @@ class OpenStackServices(Constants):
NEUTRON = Constant('neutron') NEUTRON = Constant('neutron')
NOVA = Constant('nova') NOVA = Constant('nova')
HEAT = Constant('heat') HEAT = Constant('heat')
FM = Constant('fm')
# OpenStack Services Constant # OpenStack Services Constant
OPENSTACK_SERVICE = OpenStackServices() OPENSTACK_SERVICE = OpenStackServices()

View File

@ -597,7 +597,7 @@ class SimpleHttpServer(object):
config.load(nfvi_config['config_file']) config.load(nfvi_config['config_file'])
_directory = openstack.get_directory(config, _directory = openstack.get_directory(config,
openstack.SERVICE_CATEGORY.PLATFORM) openstack.SERVICE_CATEGORY.OPENSTACK)
_vim_api_ip = vim_api_config['host'] _vim_api_ip = vim_api_config['host']
if ':' in _vim_api_ip: if ':' in _vim_api_ip:
# Wrap IPv6 address for use in URLs # Wrap IPv6 address for use in URLs