Support Heat auto-healing notifier
The auto-healing notifier works together with loadbalancer_member_health evaluator. Presumably, the end user defines a Heat template which contains an autoscaling group and all the members in the group are joined in an Octavia load balancer in order to expose service to the outside, so that when the stack scales up or scales down, Heat makes sure the new members are joining the load balancer automatically and the old members are removed. However, this notifier deals with the situation that when some member fails, the stack could be recovered by marking the given autoscaling group member unhealthy, then update Heat stack in place. Change-Id: I6e92d1fc2125e155bb5068ff2c14fa318b126442
This commit is contained in:
parent
8742b0d540
commit
a8285b564d
@ -15,6 +15,7 @@
|
||||
|
||||
import os
|
||||
|
||||
from heatclient import client as heatclient
|
||||
from keystoneauth1 import exceptions as ka_exception
|
||||
from keystoneauth1.identity.generic import password
|
||||
from keystoneauth1 import loading as ka_loading
|
||||
@ -93,6 +94,19 @@ def url_for(conf, **kwargs):
|
||||
return sess.get_endpoint(**kwargs)
|
||||
|
||||
|
||||
def get_heat_client_from_trust(conf, trust_id):
|
||||
ks_client = get_trusted_client(conf, trust_id)
|
||||
sess = ks_client.session
|
||||
|
||||
endpoint = sess.get_endpoint(
|
||||
service_type='orchestration',
|
||||
interface="internal",
|
||||
region_name=conf.service_credentials.region_name
|
||||
)
|
||||
|
||||
return heatclient.Client("1", endpoint=endpoint, session=sess)
|
||||
|
||||
|
||||
OPTS = [
|
||||
cfg.StrOpt('region-name',
|
||||
default=os.environ.get('OS_REGION_NAME'),
|
||||
|
114
aodh/notifier/heat.py
Normal file
114
aodh/notifier/heat.py
Normal file
@ -0,0 +1,114 @@
|
||||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_log import log
|
||||
from oslo_utils import uuidutils
|
||||
import six
|
||||
|
||||
from aodh import keystone_client as aodh_keystone
|
||||
from aodh import notifier
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
|
||||
"""Heat autohealing notifier.
|
||||
|
||||
The auto-healing notifier works together with loadbalancer_member_health
|
||||
evaluator.
|
||||
|
||||
Presumably, the end user defines a Heat template which contains an
|
||||
autoscaling group and all the members in the group are joined in an Octavia
|
||||
load balancer in order to expose service to the outside, so that when the
|
||||
stack scales up or scales down, Heat makes sure the new members are joining
|
||||
the load balancer automatically and the old members are removed.
|
||||
|
||||
However, this notifier deals with the situation that when some member
|
||||
fails, the stack could be recovered by marking the given autoscaling group
|
||||
member unhealthy, then update Heat stack in place. In order to do that, the
|
||||
notifier needs to know:
|
||||
|
||||
- Heat stack ID.
|
||||
- Heat autoscaling group ID.
|
||||
- The failed Octavia pool members.
|
||||
|
||||
The resource ID in the autoscaling group is saved in the Octavia member
|
||||
tags. So, only Octavia stable/stein or later versions are supported.
|
||||
"""
|
||||
|
||||
def __init__(self, conf):
|
||||
super(TrustHeatAlarmNotifier, self).__init__(conf)
|
||||
self.conf = conf
|
||||
|
||||
def notify(self, action, alarm_id, alarm_name, severity, previous, current,
|
||||
reason, reason_data):
|
||||
LOG.info(
|
||||
"Notifying alarm %(alarm_name)s %(alarm_id)s of %(severity)s "
|
||||
"priority from %(previous)s to %(current)s with action %(action)s"
|
||||
" because %(reason)s." %
|
||||
{'alarm_name': alarm_name,
|
||||
'alarm_id': alarm_id,
|
||||
'severity': severity,
|
||||
'previous': previous,
|
||||
'current': current,
|
||||
'action': action.geturl(),
|
||||
'reason': reason}
|
||||
)
|
||||
|
||||
trust_id = action.username
|
||||
stack_id = reason_data.get("stack_id")
|
||||
asg_id = reason_data.get("asg_id")
|
||||
|
||||
if not stack_id or not asg_id:
|
||||
LOG.warning(
|
||||
"stack_id and asg_id must exist to notify alarm %s", alarm_id
|
||||
)
|
||||
return
|
||||
|
||||
resources = []
|
||||
unhealthy_members = reason_data.get("unhealthy_members", [])
|
||||
|
||||
for member in unhealthy_members:
|
||||
for tag in member.get("tags", []):
|
||||
if uuidutils.is_uuid_like(tag):
|
||||
resources.append(tag)
|
||||
|
||||
if resources:
|
||||
try:
|
||||
heat_client = aodh_keystone.get_heat_client_from_trust(
|
||||
self.conf, trust_id
|
||||
)
|
||||
|
||||
for res in resources:
|
||||
heat_client.resources.mark_unhealthy(
|
||||
asg_id,
|
||||
res,
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
LOG.info(
|
||||
"Heat resource %(resource_id)s is marked as unhealthy "
|
||||
"for alarm %(alarm_id)s",
|
||||
{"resource_id": res, "alarm_id": alarm_id}
|
||||
)
|
||||
|
||||
heat_client.stacks.update(stack_id, existing=True)
|
||||
LOG.info(
|
||||
"Heat stack %(stack_id)s is updated for alarm "
|
||||
"%(alarm_id)s",
|
||||
{"stack_id": stack_id, "alarm_id": alarm_id}
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to communicate with Heat service, "
|
||||
"error: %s", six.text_type(e))
|
0
aodh/tests/unit/notifier/__init__.py
Normal file
0
aodh/tests/unit/notifier/__init__.py
Normal file
27
aodh/tests/unit/notifier/base.py
Normal file
27
aodh/tests/unit/notifier/base.py
Normal file
@ -0,0 +1,27 @@
|
||||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_config import fixture
|
||||
from oslotest import base
|
||||
|
||||
from aodh import service
|
||||
|
||||
|
||||
class TestNotifierBase(base.BaseTestCase):
|
||||
def setUp(self):
|
||||
super(TestNotifierBase, self).setUp()
|
||||
|
||||
conf = service.prepare_service(argv=[], config_files=[])
|
||||
|
||||
self.conf = self.useFixture(fixture.Config(conf)).conf
|
78
aodh/tests/unit/notifier/test_heat.py
Normal file
78
aodh/tests/unit/notifier/test_heat.py
Normal file
@ -0,0 +1,78 @@
|
||||
# Copyright 2019 Catalyst Cloud Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import mock
|
||||
from oslo_utils import netutils
|
||||
|
||||
from aodh.notifier import heat as heat_notifier
|
||||
from aodh.tests.unit.notifier import base
|
||||
|
||||
|
||||
class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
severity = "low"
|
||||
previous = "ok"
|
||||
current = "alarm"
|
||||
reason = "no good reason"
|
||||
reason_data = {
|
||||
"stack_id": "fake_stack_id",
|
||||
"asg_id": "fake_asg_id",
|
||||
"unhealthy_members": [
|
||||
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||
]
|
||||
}
|
||||
|
||||
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||
current, reason, reason_data)
|
||||
|
||||
mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id")
|
||||
|
||||
mock_client = mock_heatclient.return_value
|
||||
mock_client.resources.mark_unhealthy.assert_called_once_with(
|
||||
"fake_asg_id",
|
||||
"3bd8bc5a-7632-11e9-84cd-00224d6b7bc1",
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
|
||||
mock_client.stacks.update.assert_called_once_with(
|
||||
"fake_stack_id", existing=True
|
||||
)
|
||||
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify_stack_id_missing(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
severity = "low"
|
||||
previous = "ok"
|
||||
current = "alarm"
|
||||
reason = "no good reason"
|
||||
reason_data = {
|
||||
"asg_id": "fake_asg_id",
|
||||
"unhealthy_members": [
|
||||
{"tags": ["3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"]}
|
||||
]
|
||||
}
|
||||
|
||||
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||
current, reason, reason_data)
|
||||
|
||||
self.assertFalse(mock_heatclient.called)
|
@ -0,0 +1,3 @@
|
||||
features:
|
||||
- Added a new notifier(``trust+heat``) that works together with
|
||||
``loadbalancer_member_health`` evaluator for auto-healing purpose.
|
@ -37,3 +37,4 @@ keystoneauth1>=2.1
|
||||
debtcollector>=1.2.0 # Apache-2.0
|
||||
python-octaviaclient>=1.8.0
|
||||
python-dateutil # BSD
|
||||
python-heatclient>=1.17.0
|
||||
|
@ -91,6 +91,7 @@ aodh.notifier =
|
||||
trust+https = aodh.notifier.trust:TrustRestAlarmNotifier
|
||||
zaqar = aodh.notifier.zaqar:ZaqarAlarmNotifier
|
||||
trust+zaqar = aodh.notifier.zaqar:TrustZaqarAlarmNotifier
|
||||
trust+heat = aodh.notifier.heat:TrustHeatAlarmNotifier
|
||||
|
||||
wsgi_scripts =
|
||||
aodh-api = aodh.api.app:build_wsgi_app
|
||||
|
Loading…
Reference in New Issue
Block a user