Fix tests that checks services' and nodes' alerts
Fix naming of getting vip name method: method "PluginHelper.full_vip_name" was renamed to "PluginHelper.get_vip_resource_name". Fix naming of services on nagios: "global-" prefix was added for plugins with version >=1.0. Fix checking of haproxy backends state: removed checks for services, that don't have haproxy backend. Fixed disk usage percentage to be consistent with alerts' criteria. Temporary disabled checking of openvswitch-agent, because it is managed by pacemaker. Removed code duplicity in tests. Change-Id: If3b77c595fbb5e4348f5f3ebd6e82b445b01062c Closes-Bug: #1627671
This commit is contained in:
parent
bd151c4956
commit
86966e30bd
@ -139,13 +139,10 @@ def manage_service(remote, name, operation="restart"):
|
|||||||
:param operation: type of operation, usually start, stop or restart.
|
:param operation: type of operation, usually start, stop or restart.
|
||||||
:type operation: str
|
:type operation: str
|
||||||
"""
|
"""
|
||||||
|
if remote.execute("ls /etc/init/{}.conf".format(name))["exit_code"] == 0:
|
||||||
if remote.execute("service {} status".format(name))['exit_code'] == 0:
|
|
||||||
service_cmd = 'service {service} {operation}'
|
|
||||||
elif remote.execute("initctl status {}".format(name))['exit_code'] == 0:
|
|
||||||
service_cmd = 'initctl {operation} {service}'
|
service_cmd = 'initctl {operation} {service}'
|
||||||
else:
|
else:
|
||||||
raise Exception('no service handler!')
|
service_cmd = 'service {service} {operation}'
|
||||||
|
|
||||||
remote.check_call(service_cmd.format(service=name, operation=operation))
|
remote.check_call(service_cmd.format(service=name, operation=operation))
|
||||||
|
|
||||||
|
@ -11,16 +11,15 @@
|
|||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
import six.moves as sm
|
|
||||||
|
|
||||||
from devops.helpers import helpers
|
from devops.helpers import helpers
|
||||||
from fuelweb_test import logger
|
from fuelweb_test import logger
|
||||||
from proboscis import asserts
|
from proboscis import asserts
|
||||||
|
|
||||||
from selenium.common.exceptions import StaleElementReferenceException
|
from selenium.common import exceptions
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common import by
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as ec
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support import ui
|
||||||
|
|
||||||
from stacklight_tests import base_test
|
from stacklight_tests import base_test
|
||||||
from stacklight_tests.lma_infrastructure_alerting import (
|
from stacklight_tests.lma_infrastructure_alerting import (
|
||||||
@ -100,8 +99,8 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
link.click()
|
link.click()
|
||||||
driver.switch_to.default_content()
|
driver.switch_to.default_content()
|
||||||
driver.switch_to.frame(driver.find_element_by_name("main"))
|
driver.switch_to.frame(driver.find_element_by_name("main"))
|
||||||
WebDriverWait(driver, 120).until(
|
ui.WebDriverWait(driver, 120).until(
|
||||||
EC.presence_of_element_located((By.XPATH, anchor)))
|
ec.presence_of_element_located((by.By.XPATH, anchor)))
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
def check_node_in_nagios(self, changed_node, state):
|
def check_node_in_nagios(self, changed_node, state):
|
||||||
@ -117,7 +116,7 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
def node_is_present(self, driver, name):
|
def node_is_present(self, driver, name):
|
||||||
table = self.ui_tester.get_table(driver,
|
table = self.ui_tester.get_table(driver,
|
||||||
"/html/body/div[2]/table/tbody")
|
"/html/body/div[2]/table/tbody")
|
||||||
for ind in sm.xrange(2, self.ui_tester.get_table_size(table) + 1):
|
for ind in range(2, self.ui_tester.get_table_size(table) + 1):
|
||||||
node_name = self.ui_tester.get_table_cell(
|
node_name = self.ui_tester.get_table_cell(
|
||||||
table, ind, 1).text.rstrip()
|
table, ind, 1).text.rstrip()
|
||||||
if name == node_name:
|
if name == node_name:
|
||||||
@ -133,9 +132,12 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
return self.helpers.check_plugin_cannot_be_uninstalled(
|
return self.helpers.check_plugin_cannot_be_uninstalled(
|
||||||
self.settings.name, self.settings.version)
|
self.settings.name, self.settings.version)
|
||||||
|
|
||||||
def get_services_for_node(self, table, node_name, driver,
|
def get_services_for_node(self, node_name, driver,
|
||||||
table_xpath="/html/body/table[3]/tbody"):
|
table_xpath="/html/body/table[3]/tbody"):
|
||||||
services = {}
|
services = {}
|
||||||
|
limit_xpath = "//select[@name='limit']/option[@value='0']"
|
||||||
|
driver.find_element_by_xpath(limit_xpath).click()
|
||||||
|
table = self.ui_tester.get_table(driver, table_xpath)
|
||||||
found_node = False
|
found_node = False
|
||||||
ind = 2
|
ind = 2
|
||||||
while ind < self.ui_tester.get_table_size(table) + 1:
|
while ind < self.ui_tester.get_table_size(table) + 1:
|
||||||
@ -144,6 +146,7 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
if found_node:
|
if found_node:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
ind += 1
|
||||||
continue
|
continue
|
||||||
if self.ui_tester.get_table_cell(
|
if self.ui_tester.get_table_cell(
|
||||||
table, ind, 1).text == node_name:
|
table, ind, 1).text == node_name:
|
||||||
@ -152,7 +155,8 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
services[self.ui_tester.get_table_cell(
|
services[self.ui_tester.get_table_cell(
|
||||||
table, ind, 2).text] = (
|
table, ind, 2).text] = (
|
||||||
self.ui_tester.get_table_cell(table, ind, 3).text)
|
self.ui_tester.get_table_cell(table, ind, 3).text)
|
||||||
except StaleElementReferenceException:
|
except exceptions.StaleElementReferenceException:
|
||||||
|
driver.find_element_by_xpath(limit_xpath).click()
|
||||||
table = self.ui_tester.get_table(driver, table_xpath)
|
table = self.ui_tester.get_table(driver, table_xpath)
|
||||||
ind -= 1
|
ind -= 1
|
||||||
ind += 1
|
ind += 1
|
||||||
@ -162,12 +166,12 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
def check_service_state_on_nagios(self, driver, service_state=None,
|
def check_service_state_on_nagios(self, driver, service_state=None,
|
||||||
node_names=None):
|
node_names=None):
|
||||||
self.open_nagios_page(
|
self.open_nagios_page(
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
driver, "Services", "//table[@class='headertable']")
|
||||||
table = self.ui_tester.get_table(driver, "/html/body/table[3]/tbody")
|
table = self.ui_tester.get_table(driver, "/html/body/table[3]/tbody")
|
||||||
if not node_names:
|
if not node_names:
|
||||||
node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
|
node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
|
||||||
for node in node_names:
|
for node in node_names:
|
||||||
node_services = self.get_services_for_node(table, node, driver)
|
node_services = self.get_services_for_node(node, driver)
|
||||||
if service_state:
|
if service_state:
|
||||||
for service in service_state:
|
for service in service_state:
|
||||||
if service_state[service] != node_services[service]:
|
if service_state[service] != node_services[service]:
|
||||||
@ -182,20 +186,10 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
node_names=None):
|
node_names=None):
|
||||||
msg = ("Fail to get expected service states for services: {0} "
|
msg = ("Fail to get expected service states for services: {0} "
|
||||||
"on nodes: {1}")
|
"on nodes: {1}")
|
||||||
|
msg = msg.format(
|
||||||
if not service_state or not node_names:
|
[key for key in service_state]
|
||||||
self.open_nagios_page(
|
if service_state is not None else "all",
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
node_names if node_names is not None else "global-cluster")
|
||||||
table = self.ui_tester.get_table(driver,
|
|
||||||
"/html/body/table[3]/tbody")
|
|
||||||
if not node_names:
|
|
||||||
node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
|
|
||||||
if not service_state:
|
|
||||||
service_state = dict((key, 'OK') for key in
|
|
||||||
self.get_services_for_node(
|
|
||||||
table, node_names[0], driver))
|
|
||||||
|
|
||||||
msg = msg.format([key for key in service_state], node_names)
|
|
||||||
|
|
||||||
helpers.wait(lambda: self.check_service_state_on_nagios(
|
helpers.wait(lambda: self.check_service_state_on_nagios(
|
||||||
driver, service_state, node_names), timeout=60 * 5,
|
driver, service_state, node_names), timeout=60 * 5,
|
||||||
|
@ -380,16 +380,18 @@ class ToolchainApi(object):
|
|||||||
and Nagios UI.
|
and Nagios UI.
|
||||||
|
|
||||||
:param service_name: name of the service to change state of.
|
:param service_name: name of the service to change state of.
|
||||||
Format [service name, service name
|
Format:
|
||||||
on dashboard] e.g. ['nova-api', 'nova']
|
[service name, service name in influx,
|
||||||
|
service name on nagios, haproxy-backend for service if exist]
|
||||||
|
e.g. ['nova-api', 'nova', 'nova-global', 'nova-api']
|
||||||
:type service_name: list.
|
:type service_name: list.
|
||||||
:param action: action to perform (e.g. stop, start).
|
:param action: action to perform (e.g. stop, start).
|
||||||
:type action: str
|
:type action: str
|
||||||
:param new_state: new state of the service.
|
:param new_state: new state of the service.
|
||||||
:type new_state: str
|
:type new_state: str
|
||||||
:param service_state_in_influx: new state of the service in influx.
|
:param service_state_in_influx: new state of the service in influx.
|
||||||
:type new_state: int
|
:type service_state_in_influx: int
|
||||||
:param down_backends_in_haproxy: amout of backends in 'down' state.
|
:param down_backends_in_haproxy: amount of backends in 'down' state.
|
||||||
:type down_backends_in_haproxy: int
|
:type down_backends_in_haproxy: int
|
||||||
:param toolchain_node: toolchain node with
|
:param toolchain_node: toolchain node with
|
||||||
infrastructure_alerting_ui vip.
|
infrastructure_alerting_ui vip.
|
||||||
@ -411,15 +413,16 @@ class ToolchainApi(object):
|
|||||||
node) as remote:
|
node) as remote:
|
||||||
self.remote_ops.manage_service(remote, service_name[0], action)
|
self.remote_ops.manage_service(remote, service_name[0], action)
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {service_name[1]: new_state})
|
nagios_driver, {service_name[2]: new_state})
|
||||||
self.INFLUXDB_GRAFANA.check_cluster_status(
|
self.INFLUXDB_GRAFANA.check_cluster_status(
|
||||||
service_name[1], service_state_in_influx)
|
service_name[1], service_state_in_influx)
|
||||||
|
if service_name[3]:
|
||||||
self.INFLUXDB_GRAFANA.check_count_of_haproxy_backends(
|
self.INFLUXDB_GRAFANA.check_count_of_haproxy_backends(
|
||||||
service_name[0], expected_count=down_backends_in_haproxy)
|
service_name[3], expected_count=down_backends_in_haproxy)
|
||||||
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
toolchain_node) as remote:
|
toolchain_node) as remote:
|
||||||
self.checkers.check_local_mail(
|
self.checkers.check_local_mail(
|
||||||
remote, toolchain_node["name"], service_name[1], new_state)
|
remote, toolchain_node["name"], service_name[2], new_state)
|
||||||
|
|
||||||
def change_verify_node_service_state(self, services, state, influx_state,
|
def change_verify_node_service_state(self, services, state, influx_state,
|
||||||
percent, toolchain_node,
|
percent, toolchain_node,
|
||||||
@ -458,11 +461,11 @@ class ToolchainApi(object):
|
|||||||
"/var/lib/mysql/test/bigfile")
|
"/var/lib/mysql/test/bigfile")
|
||||||
|
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[0]: 'OK'})
|
nagios_driver, {services[0]: "OK"})
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[1]: state},
|
nagios_driver, {services[1]: state},
|
||||||
[controller_nodes[0]['hostname']])
|
[controller_nodes[0]["hostname"]])
|
||||||
self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[2],
|
||||||
self.settings.OKAY)
|
self.settings.OKAY)
|
||||||
|
|
||||||
with self.fuel_web.get_ssh_for_nailgun_node(
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
||||||
@ -475,8 +478,8 @@ class ToolchainApi(object):
|
|||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[0]: state})
|
nagios_driver, {services[0]: state})
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[1]: state}, [node['hostname']])
|
nagios_driver, {services[1]: state}, [node["hostname"]])
|
||||||
self.INFLUXDB_GRAFANA.check_cluster_status(services[0], influx_state)
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[2], influx_state)
|
||||||
|
|
||||||
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
toolchain_node) as remote:
|
toolchain_node) as remote:
|
||||||
@ -490,13 +493,13 @@ class ToolchainApi(object):
|
|||||||
|
|
||||||
for node in controller_nodes:
|
for node in controller_nodes:
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[0]: 'OK'})
|
nagios_driver, {services[0]: "OK"})
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
nagios_driver, {services[1]: 'OK'}, [node['hostname']])
|
nagios_driver, {services[1]: "OK"}, [node["hostname"]])
|
||||||
self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[2],
|
||||||
self.settings.OKAY)
|
self.settings.OKAY)
|
||||||
|
|
||||||
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
toolchain_node) as remote:
|
toolchain_node) as remote:
|
||||||
self.checkers.check_local_mail(
|
self.checkers.check_local_mail(
|
||||||
remote, toolchain_node["name"], services[0], 'OK')
|
remote, toolchain_node["name"], services[0], "OK")
|
||||||
|
@ -236,6 +236,78 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
|
|
||||||
self.check_cinder_notifications()
|
self.check_cinder_notifications()
|
||||||
|
|
||||||
|
def _check_services_alerts(self, controllers_count, nagios_status,
|
||||||
|
influx_status, down_haproxy_count):
|
||||||
|
components = {
|
||||||
|
"nova": [("nova-api", "nova-api"), ("nova-scheduler", None)],
|
||||||
|
"cinder": [("cinder-api", "cinder-api"),
|
||||||
|
("cinder-scheduler", None)],
|
||||||
|
"neutron": [
|
||||||
|
("neutron-server", "neutron-api"),
|
||||||
|
# TODO(rpromyshlennikov): temporary fix,
|
||||||
|
# because openvswitch-agent is managed by pacemaker
|
||||||
|
# ("neutron-openvswitch-agent", None)
|
||||||
|
],
|
||||||
|
"glance": [("glance-api", "glance-api")],
|
||||||
|
"heat": [("heat-api", "heat-api")],
|
||||||
|
"keystone": [("apache2", "keystone-public-api")]
|
||||||
|
}
|
||||||
|
|
||||||
|
alerting_plugin = self.LMA_INFRASTRUCTURE_ALERTING
|
||||||
|
services_names_in_nagios = {}
|
||||||
|
for service in components:
|
||||||
|
nagios_service_name = (
|
||||||
|
service
|
||||||
|
if alerting_plugin.settings.version.startswith("0.")
|
||||||
|
else "global-{}".format(service)
|
||||||
|
)
|
||||||
|
services_names_in_nagios[service] = nagios_service_name
|
||||||
|
|
||||||
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
|
self.settings.stacklight_roles,
|
||||||
|
self.helpers.get_vip_resource_name(
|
||||||
|
alerting_plugin.settings.failover_vip))
|
||||||
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
|
lma_devops_node)
|
||||||
|
|
||||||
|
url = alerting_plugin.get_authenticated_nagios_url()
|
||||||
|
with self.ui_tester.ui_driver(url, "Nagios Core",
|
||||||
|
"//frame[2]") as driver:
|
||||||
|
alerting_plugin.open_nagios_page(
|
||||||
|
driver, "Services", "//table[@class='headertable']")
|
||||||
|
controller_nodes = (
|
||||||
|
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id,
|
||||||
|
["controller"])[:controllers_count]
|
||||||
|
)
|
||||||
|
for component in components:
|
||||||
|
for (service, haproxy_backend) in components[component]:
|
||||||
|
logger.info("Checking service {0}".format(service))
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[
|
||||||
|
service, component,
|
||||||
|
services_names_in_nagios[component],
|
||||||
|
haproxy_backend],
|
||||||
|
action="stop",
|
||||||
|
new_state=nagios_status,
|
||||||
|
service_state_in_influx=influx_status,
|
||||||
|
down_backends_in_haproxy=down_haproxy_count,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=controller_nodes,
|
||||||
|
nagios_driver=driver)
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[
|
||||||
|
service, component,
|
||||||
|
services_names_in_nagios[component],
|
||||||
|
haproxy_backend],
|
||||||
|
action="start",
|
||||||
|
new_state="OK",
|
||||||
|
service_state_in_influx=self.settings.OKAY,
|
||||||
|
down_backends_in_haproxy=0,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=controller_nodes,
|
||||||
|
nagios_driver=driver)
|
||||||
|
|
||||||
@test(depends_on_groups=["deploy_ha_toolchain"],
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
groups=["toolchain_warning_alert_service", "service_restart",
|
groups=["toolchain_warning_alert_service", "service_restart",
|
||||||
"toolchain", "functional"])
|
"toolchain", "functional"])
|
||||||
@ -282,54 +354,17 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
Duration 45m
|
Duration 45m
|
||||||
"""
|
"""
|
||||||
self.env.revert_snapshot("deploy_ha_toolchain")
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
params = {"controllers_count": 1,
|
||||||
|
"nagios_status": "WARNING",
|
||||||
|
"influx_status": self.settings.WARN,
|
||||||
|
"down_haproxy_count": 1}
|
||||||
|
|
||||||
services = {
|
self._check_services_alerts(**params)
|
||||||
'nova': ['nova-api', 'nova-scheduler'],
|
|
||||||
'cinder': ['cinder-api', 'cinder-scheduler'],
|
|
||||||
'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
|
|
||||||
'glance': ['glance-api'],
|
|
||||||
'heat': ['heat-api'],
|
|
||||||
'keystone': ['apache2']
|
|
||||||
}
|
|
||||||
|
|
||||||
lma_devops_node = self.helpers.get_node_with_vip(
|
|
||||||
self.settings.stacklight_roles,
|
|
||||||
self.helpers.full_vip_name(
|
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
|
|
||||||
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
|
||||||
lma_devops_node)
|
|
||||||
|
|
||||||
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
|
||||||
with self.ui_tester.ui_driver(url, "Nagios Core",
|
|
||||||
"//frame[2]") as driver:
|
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
|
||||||
controller_node = (
|
|
||||||
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
||||||
self.helpers.cluster_id, ['controller'])[0])
|
|
||||||
for key in services:
|
|
||||||
for service in services[key]:
|
|
||||||
self.change_verify_service_state(
|
|
||||||
service_name=[service, key], action='stop',
|
|
||||||
new_state='WARNING',
|
|
||||||
service_state_in_influx=self.settings.WARN,
|
|
||||||
down_backends_in_haproxy=1,
|
|
||||||
toolchain_node=toolchain_node,
|
|
||||||
controller_nodes=[controller_node],
|
|
||||||
nagios_driver=driver)
|
|
||||||
self.change_verify_service_state(
|
|
||||||
service_name=[service, key], action='start',
|
|
||||||
new_state='OK',
|
|
||||||
service_state_in_influx=self.settings.OKAY,
|
|
||||||
down_backends_in_haproxy=0,
|
|
||||||
toolchain_node=toolchain_node,
|
|
||||||
controller_nodes=[controller_node],
|
|
||||||
nagios_driver=driver)
|
|
||||||
|
|
||||||
@test(depends_on_groups=["deploy_ha_toolchain"],
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
groups=["toolchain_critical_alert_service", "service_restart",
|
groups=["toolchain_critical_alert_service", "service_restart",
|
||||||
"toolchain", "functional"])
|
"toolchain", "functional"])
|
||||||
# @log_snapshot_after_test
|
@log_snapshot_after_test
|
||||||
def toolchain_critical_alert_service(self):
|
def toolchain_critical_alert_service(self):
|
||||||
"""Verify that the critical alerts for services show up in
|
"""Verify that the critical alerts for services show up in
|
||||||
the Grafana and Nagios UI.
|
the Grafana and Nagios UI.
|
||||||
@ -367,52 +402,39 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
Duration 45m
|
Duration 45m
|
||||||
"""
|
"""
|
||||||
self.env.revert_snapshot("deploy_ha_toolchain")
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
params = {"controllers_count": 2,
|
||||||
|
"nagios_status": "CRITICAL",
|
||||||
|
"influx_status": self.settings.CRIT,
|
||||||
|
"down_haproxy_count": 2}
|
||||||
|
|
||||||
services = {
|
self._check_services_alerts(**params)
|
||||||
'nova': ['nova-api', 'nova-scheduler'],
|
|
||||||
'cinder': ['cinder-api', 'cinder-scheduler'],
|
|
||||||
'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
|
|
||||||
'glance': ['glance-api'],
|
|
||||||
'heat': ['heat-api'],
|
|
||||||
'keystone': ['apache2']
|
|
||||||
}
|
|
||||||
|
|
||||||
|
def _check_mysql_alerts_node(
|
||||||
|
self, nagios_status, influx_status, disk_usage_percent):
|
||||||
lma_devops_node = self.helpers.get_node_with_vip(
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
self.settings.stacklight_roles,
|
self.settings.stacklight_roles,
|
||||||
self.helpers.full_vip_name(
|
self.helpers.get_vip_resource_name(
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
|
"infrastructure_alerting_mgmt_vip"))
|
||||||
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
lma_devops_node)
|
lma_devops_node)
|
||||||
|
nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id, ["controller"])
|
||||||
|
|
||||||
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
alerting_plugin = self.LMA_INFRASTRUCTURE_ALERTING
|
||||||
|
url = alerting_plugin.get_authenticated_nagios_url()
|
||||||
with self.ui_tester.ui_driver(url, "Nagios Core",
|
with self.ui_tester.ui_driver(url, "Nagios Core",
|
||||||
"//frame[2]") as driver:
|
"//frame[2]") as driver:
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
alerting_plugin.open_nagios_page(
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
driver, "Services", "//table[@class='headertable']")
|
||||||
controller_nodes = (
|
nagios_service_name = (
|
||||||
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
"mysql"
|
||||||
self.helpers.cluster_id, ['controller']))
|
if alerting_plugin.settings.version.startswith("0.")
|
||||||
for key in services:
|
else "global-mysql")
|
||||||
for service in services[key]:
|
self.change_verify_node_service_state(
|
||||||
logger.info("Checking service {0}".format(service))
|
[nagios_service_name, "mysql-nodes.mysql-fs", "mysql"],
|
||||||
self.change_verify_service_state(
|
nagios_status,
|
||||||
service_name=[service, key], action='stop',
|
influx_status, disk_usage_percent, toolchain_node,
|
||||||
new_state='CRITICAL',
|
nailgun_nodes[:2], driver)
|
||||||
service_state_in_influx=self.settings.CRIT,
|
|
||||||
down_backends_in_haproxy=2,
|
|
||||||
toolchain_node=toolchain_node,
|
|
||||||
controller_nodes=[controller_nodes[0],
|
|
||||||
controller_nodes[1]],
|
|
||||||
nagios_driver=driver)
|
|
||||||
self.change_verify_service_state(
|
|
||||||
service_name=[service, key], action='start',
|
|
||||||
new_state='OK',
|
|
||||||
service_state_in_influx=self.settings.OKAY,
|
|
||||||
down_backends_in_haproxy=0,
|
|
||||||
toolchain_node=toolchain_node,
|
|
||||||
controller_nodes=[controller_nodes[0],
|
|
||||||
controller_nodes[1]],
|
|
||||||
nagios_driver=driver)
|
|
||||||
|
|
||||||
@test(depends_on_groups=["deploy_ha_toolchain"],
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
groups=["toolchain_warning_alert_node", "node_alert_warning",
|
groups=["toolchain_warning_alert_node", "node_alert_warning",
|
||||||
@ -473,24 +495,11 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
Duration 15m
|
Duration 15m
|
||||||
"""
|
"""
|
||||||
self.env.revert_snapshot("deploy_ha_toolchain")
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
params = {
|
||||||
lma_devops_node = self.helpers.get_node_with_vip(
|
"nagios_status": "WARNING",
|
||||||
self.settings.stacklight_roles,
|
"influx_status": self.settings.WARN,
|
||||||
self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
|
"disk_usage_percent": 91}
|
||||||
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
self._check_mysql_alerts_node(**params)
|
||||||
lma_devops_node)
|
|
||||||
nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
||||||
self.helpers.cluster_id, ['controller'])
|
|
||||||
|
|
||||||
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
|
||||||
with self.ui_tester.ui_driver(url, "Nagios Core",
|
|
||||||
"//frame[2]") as driver:
|
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
|
||||||
self.change_verify_node_service_state(
|
|
||||||
['mysql', 'mysql-nodes.mysql-fs'], 'WARNING',
|
|
||||||
self.settings.WARN, '96', toolchain_node,
|
|
||||||
[nailgun_nodes[0], nailgun_nodes[1]], driver)
|
|
||||||
|
|
||||||
@test(depends_on_groups=["deploy_ha_toolchain"],
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
groups=["toolchain_critical_alert_node", "node_alert_critical",
|
groups=["toolchain_critical_alert_node", "node_alert_critical",
|
||||||
@ -550,21 +559,8 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
Duration 15m
|
Duration 15m
|
||||||
"""
|
"""
|
||||||
self.env.revert_snapshot("deploy_ha_toolchain")
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
params = {
|
||||||
lma_devops_node = self.helpers.get_node_with_vip(
|
"nagios_status": "CRITICAL",
|
||||||
self.settings.stacklight_roles,
|
"influx_status": self.settings.CRIT,
|
||||||
self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
|
"disk_usage_percent": 96}
|
||||||
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
self._check_mysql_alerts_node(**params)
|
||||||
lma_devops_node)
|
|
||||||
nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
||||||
self.helpers.cluster_id, ['controller'])
|
|
||||||
|
|
||||||
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
|
||||||
with self.ui_tester.ui_driver(url, "Nagios Core",
|
|
||||||
"//frame[2]") as driver:
|
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
|
||||||
driver, 'Services', "//table[@class='headertable']")
|
|
||||||
self.change_verify_node_service_state(
|
|
||||||
['mysql', 'mysql-nodes.mysql-fs'], 'CRITICAL',
|
|
||||||
self.settings.UNKW, '98', toolchain_node,
|
|
||||||
[nailgun_nodes[0], nailgun_nodes[1]], driver)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user