Heat notifier: notify alarm without Octavia tags
This patch improves heat notifier by removing the hard requirement for Octavia tags. When Aodh is deployed in an environment that Octavia doens't support tags yet(before stable/stein), heat notifier will do resource search by the Octavia member ID. Note: Both approaches need specific Heat template definition: 1. Use Octavia tags. pool_member: type: OS::Octavia::PoolMember properties: address: {get_attr: [server, first_address]} pool: {get_param: pool_id} protocol_port: {get_param: webserver_port} tags: [{get_param: "OS::stack_id"}] 2. Resource search. outputs: OS::stack_id: value: { get_resource: pool_member } Change-Id: Ic2328aa907d23c6b6f827661a3fb3cb26f8496fd
This commit is contained in:
parent
4b93caca42
commit
e2d2ebf44e
@ -39,12 +39,21 @@ class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
|
||||
member unhealthy, then update Heat stack in place. In order to do that, the
|
||||
notifier needs to know:
|
||||
|
||||
- Heat stack ID.
|
||||
- Heat top/root stack ID.
|
||||
- Heat autoscaling group ID.
|
||||
- The failed Octavia pool members.
|
||||
|
||||
The resource ID in the autoscaling group is saved in the Octavia member
|
||||
tags. So, only Octavia stable/stein or later versions are supported.
|
||||
In order to find which autoscaling group member that the failed pool member
|
||||
belongs to, there are two ways supported(both ways require specific
|
||||
definition in the Heat template):
|
||||
|
||||
1. The autoscaling group member resource ID is saved in the Octavia member
|
||||
tag, the user should define that using 'tags' property of the
|
||||
OS::Octavia::PoolMember resource. So, only Octavia stable/stein or later
|
||||
versions are supported.
|
||||
2. User customizes the autoscaling group member resource identifier
|
||||
according to
|
||||
https://docs.openstack.org/heat/latest/template_guide/composition.html#making-your-template-resource-more-transparent
|
||||
"""
|
||||
|
||||
def __init__(self, conf):
|
||||
@ -69,46 +78,61 @@ class TrustHeatAlarmNotifier(notifier.AlarmNotifier):
|
||||
trust_id = action.username
|
||||
stack_id = reason_data.get("stack_id")
|
||||
asg_id = reason_data.get("asg_id")
|
||||
unhealthy_members = reason_data.get("unhealthy_members", [])
|
||||
unhealthy_resources = []
|
||||
|
||||
if not stack_id or not asg_id:
|
||||
LOG.warning(
|
||||
LOG.error(
|
||||
"stack_id and asg_id must exist to notify alarm %s", alarm_id
|
||||
)
|
||||
return
|
||||
|
||||
resources = []
|
||||
unhealthy_members = reason_data.get("unhealthy_members", [])
|
||||
heat_client = aodh_keystone.get_heat_client_from_trust(
|
||||
self.conf, trust_id
|
||||
)
|
||||
|
||||
for member in unhealthy_members:
|
||||
for tag in member.get("tags", []):
|
||||
if uuidutils.is_uuid_like(tag):
|
||||
resources.append(tag)
|
||||
unhealthy_resources.append(tag)
|
||||
|
||||
if resources:
|
||||
try:
|
||||
heat_client = aodh_keystone.get_heat_client_from_trust(
|
||||
self.conf, trust_id
|
||||
if not unhealthy_resources:
|
||||
# Fall back to search resource by the pool member ID.
|
||||
for member in unhealthy_members:
|
||||
target_resources = heat_client.resources.list(
|
||||
stack_id, nested_depth=3, filters={"id": member["id"]})
|
||||
if len(target_resources) > 0:
|
||||
# There should be only one item.
|
||||
unhealthy_resources.append(
|
||||
target_resources[0].resource_name)
|
||||
|
||||
# If we still can't find expected resources, do nothing.
|
||||
if not unhealthy_resources:
|
||||
LOG.warning("No unhealthy resource found for the alarm %s",
|
||||
alarm_id)
|
||||
return
|
||||
|
||||
try:
|
||||
for res in unhealthy_resources:
|
||||
heat_client.resources.mark_unhealthy(
|
||||
asg_id,
|
||||
res,
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
LOG.info(
|
||||
"Heat resource %(resource_id)s is marked as unhealthy "
|
||||
"for alarm %(alarm_id)s",
|
||||
{"resource_id": res, "alarm_id": alarm_id}
|
||||
)
|
||||
|
||||
for res in resources:
|
||||
heat_client.resources.mark_unhealthy(
|
||||
asg_id,
|
||||
res,
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
LOG.info(
|
||||
"Heat resource %(resource_id)s is marked as unhealthy "
|
||||
"for alarm %(alarm_id)s",
|
||||
{"resource_id": res, "alarm_id": alarm_id}
|
||||
)
|
||||
|
||||
heat_client.stacks.update(stack_id, existing=True)
|
||||
LOG.info(
|
||||
"Heat stack %(stack_id)s is updated for alarm "
|
||||
"%(alarm_id)s",
|
||||
{"stack_id": stack_id, "alarm_id": alarm_id}
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to communicate with Heat service, "
|
||||
"error: %s", six.text_type(e))
|
||||
heat_client.stacks.update(stack_id, existing=True)
|
||||
LOG.info(
|
||||
"Heat stack %(stack_id)s is updated for alarm "
|
||||
"%(alarm_id)s",
|
||||
{"stack_id": stack_id, "alarm_id": alarm_id}
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to communicate with Heat service for alarm "
|
||||
"%s, error: %s",
|
||||
alarm_id, six.text_type(e))
|
||||
|
@ -21,7 +21,7 @@ from aodh.tests.unit.notifier import base
|
||||
|
||||
class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify(self, mock_heatclient):
|
||||
def test_notify_with_tags(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
@ -55,6 +55,47 @@ class TestTrustHeatAlarmNotifier(base.TestNotifierBase):
|
||||
"fake_stack_id", existing=True
|
||||
)
|
||||
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify_without_tags(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
alarm_id = "fake_alarm_id"
|
||||
alarm_name = "fake_alarm_name"
|
||||
severity = "low"
|
||||
previous = "ok"
|
||||
current = "alarm"
|
||||
reason = "no good reason"
|
||||
reason_data = {
|
||||
"stack_id": "fake_stack_id",
|
||||
"asg_id": "fake_asg_id",
|
||||
"unhealthy_members": [
|
||||
{"id": "3bd8bc5a-7632-11e9-84cd-00224d6b7bc1"}
|
||||
]
|
||||
}
|
||||
|
||||
class FakeResource(object):
|
||||
def __init__(self, resource_name):
|
||||
self.resource_name = resource_name
|
||||
|
||||
mock_client = mock_heatclient.return_value
|
||||
mock_client.resources.list.return_value = [
|
||||
FakeResource("fake_resource_name")
|
||||
]
|
||||
|
||||
notifier = heat_notifier.TrustHeatAlarmNotifier(self.conf)
|
||||
notifier.notify(action, alarm_id, alarm_name, severity, previous,
|
||||
current, reason, reason_data)
|
||||
|
||||
mock_heatclient.assert_called_once_with(self.conf, "fake_trust_id")
|
||||
mock_client.resources.mark_unhealthy.assert_called_once_with(
|
||||
"fake_asg_id",
|
||||
"fake_resource_name",
|
||||
True,
|
||||
"unhealthy load balancer member"
|
||||
)
|
||||
mock_client.stacks.update.assert_called_once_with(
|
||||
"fake_stack_id", existing=True
|
||||
)
|
||||
|
||||
@mock.patch("aodh.keystone_client.get_heat_client_from_trust")
|
||||
def test_notify_stack_id_missing(self, mock_heatclient):
|
||||
action = netutils.urlsplit("trust+autohealer://fake_trust_id:delete@")
|
||||
|
Loading…
x
Reference in New Issue
Block a user