7e7e6e5bfe
Path was changed for compatibility with MOS8, because there is no such dir ("/var/lib/mysql/test/) in MOS8. Change-Id: I4870ba4a135e7c65e947312ba14749de8ed49314 Closes-Bug: #1636261
616 lines
26 KiB
Python
616 lines
26 KiB
Python
# Copyright 2016 Mirantis, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from fuelweb_test.helpers.decorators import log_snapshot_after_test
|
|
from fuelweb_test import logger
|
|
from proboscis import test
|
|
|
|
from stacklight_tests.toolchain import api
|
|
|
|
OKAY_STATUS = 0
|
|
WARNING_STATUS = 1
|
|
UNKNOWN_STATUS = 2
|
|
CRITICAL_STATUS = 3
|
|
DOWN_STATUS = 4
|
|
|
|
WARNING_PERCENT = 91
|
|
CRITICAL_PERCENT = 96
|
|
|
|
RABBITMQ_DISK_WARNING_PERCENT = 99.99
|
|
RABBITMQ_DISK_CRITICAL_PERCENT = 100
|
|
RABBITMQ_MEMORY_WARNING_VALUE = 1.01
|
|
RABBITMQ_MEMORY_CRITICAL_VALUE = 1.0001
|
|
|
|
|
|
@test(groups=["plugins"])
|
|
class TestToolchainAlarms(api.ToolchainApi):
|
|
"""Class for testing built-in StackLight Collector alarms.
|
|
"""
|
|
|
|
def _check_filesystem_alarms(self, nailgun_node, filesystem, source,
|
|
filename, node_role, alarm_type="node"):
|
|
self.check_alarms(alarm_type, node_role, source,
|
|
nailgun_node["hostname"], OKAY_STATUS)
|
|
with self.fuel_web.get_ssh_for_nailgun_node(nailgun_node) as remote:
|
|
self.remote_ops.fill_up_filesystem(
|
|
remote, filesystem, WARNING_PERCENT, filename)
|
|
logger.info("Checking {}-warning alarm".format(source))
|
|
self.check_alarms(alarm_type, node_role, source,
|
|
nailgun_node["hostname"], WARNING_STATUS)
|
|
self.remote_ops.clean_filesystem(remote, filename)
|
|
self.check_alarms(alarm_type, node_role,
|
|
source, nailgun_node["hostname"], OKAY_STATUS)
|
|
self.remote_ops.fill_up_filesystem(
|
|
remote, filesystem, CRITICAL_PERCENT, filename)
|
|
logger.info("Checking {}-critical alarm".format(source))
|
|
self.check_alarms(alarm_type, node_role, source,
|
|
nailgun_node["hostname"], CRITICAL_STATUS)
|
|
self.remote_ops.clean_filesystem(remote, filename)
|
|
self.check_alarms(alarm_type, node_role, source,
|
|
nailgun_node["hostname"], OKAY_STATUS)
|
|
|
|
def _check_rabbit_mq_disk_alarms(self, controller, status, percent):
|
|
cmd = ("rabbitmqctl set_disk_free_limit $(df | grep /dev/dm-4 | "
|
|
"awk '{{ printf(\"%.0f\\n\", 1024 * ((($3 + $4) * "
|
|
"{percent} / 100) - $3))}}')")
|
|
self.check_alarms("service", "rabbitmq-cluster", "disk",
|
|
controller["hostname"], OKAY_STATUS)
|
|
with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
|
|
default_value = remote.check_call(
|
|
"rabbitmqctl environment | grep disk_free_limit | "
|
|
"sed -r 's/}.+//' | sed 's|.*,||'")['stdout'][0].rstrip()
|
|
remote.check_call(cmd.format(percent=percent))
|
|
self.check_alarms("service", "rabbitmq-cluster", "disk",
|
|
controller["hostname"], status)
|
|
remote.check_call("rabbitmqctl set_disk_free_limit {}".format(
|
|
default_value))
|
|
self.check_alarms("service", "rabbitmq-cluster", "disk",
|
|
controller["hostname"], OKAY_STATUS)
|
|
|
|
def _check_rabbit_mq_memory_alarms(self, controller, status, value):
|
|
cmd = "rabbitmqctl set_vm_memory_high_watermark absolute \"{memory}\""
|
|
self.check_alarms("service", "rabbitmq-cluster", "memory",
|
|
controller["hostname"], OKAY_STATUS)
|
|
with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
|
|
default_value = remote.check_call(
|
|
"rabbitmqctl environment | grep disk_free_limit | "
|
|
"sed -r 's/}.+//' | sed 's|.*,||'")['stdout'][0].rstrip()
|
|
mem_usage = self.get_rabbitmq_memory_usage()
|
|
remote.check_call(cmd.format(memory=int(mem_usage * value)))
|
|
self.check_alarms("service", "rabbitmq-cluster", "memory",
|
|
controller["hostname"], status)
|
|
self.set_rabbitmq_memory_watermark(controller, default_value)
|
|
self.check_alarms("service", "rabbitmq-cluster", "memory",
|
|
controller["hostname"], OKAY_STATUS)
|
|
|
|
def _verify_service_alarms(self, trigger_fn, trigger_count,
|
|
metrics, status):
|
|
"""Check services' alarm metrics.
|
|
|
|
:param trigger_fn: function that affects an alarm of needed service
|
|
:type trigger_fn: callable
|
|
:param trigger_count: how many times call trigger function
|
|
:type trigger_count: int
|
|
:param metrics: mapping with needed metrics of alarms to check
|
|
:type metrics: dict
|
|
:param status: value of metric to check
|
|
:type status: int (in most cases)
|
|
:return: None
|
|
"""
|
|
for _ in range(trigger_count):
|
|
trigger_fn()
|
|
for service, source in metrics.items():
|
|
self.check_alarms("service", service, source, None, status)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_mysql_fs_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_mysql_fs_alarms(self):
|
|
"""Check that mysql-fs-warning and mysql-fs-critical alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Fill up /var/lib/mysql filesystem to 91 percent.
|
|
2. Check the last value of the warning alarm in InfluxDB.
|
|
3. Clean the filesystem.
|
|
4. Fill up /var/lib/mysql filesystem to 96 percent.
|
|
5. Check the last value of the critical alarm in InfluxDB.
|
|
6. Clean the filesystem.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
self._check_filesystem_alarms(
|
|
controller, self.settings.mysql_fs, "mysql-fs",
|
|
self.settings.mysql_fs_alarm_test_file, "mysql-nodes")
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_rabbitmq_disk_alarm", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_rabbitmq_disk_alarm(self):
|
|
"""Check that rabbitmq-disk-limit-warning and
|
|
rabbitmq-disk-limit-critical alarms work as expected.
|
|
|
|
Scenario:
|
|
1. Check the last value of the okay alarm in InfluxDB.
|
|
2. Set RabbitMQ disk limit to 99.99 percent of available space.
|
|
3. Check the last value of the warning alarm in InfluxDB.
|
|
4. Set RabbitMQ disk limit to the default value.
|
|
5. Check the last value of the okay alarm in InfluxDB.
|
|
6. Set RabbitMQ disk limit to 100 percent of available space.
|
|
7. Check the last value of the critical alarm in InfluxDB.
|
|
8. Set RabbitMQ disk limit to the default value.
|
|
9. Check the last value of the okay alarm in InfluxDB.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
self._check_rabbit_mq_disk_alarms(controller, WARNING_STATUS,
|
|
RABBITMQ_DISK_WARNING_PERCENT)
|
|
self._check_rabbit_mq_disk_alarms(controller, CRITICAL_STATUS,
|
|
RABBITMQ_DISK_CRITICAL_PERCENT)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_rabbitmq_memory_alarm", "toolchain",
|
|
"alarms"])
|
|
@log_snapshot_after_test
|
|
def check_rabbitmq_memory_alarm(self):
|
|
"""Check that rabbitmq-memory-limit-warning and
|
|
rabbitmq-memory-limit-critical alarms work as expected.
|
|
|
|
Scenario:
|
|
1. Check the last value of the okay alarm in InfluxDB.
|
|
2. Set RabbitMQ memory limit to 101 percent of currently
|
|
used memory.
|
|
3. Check the last value of the warning alarm in InfluxDB.
|
|
4. Set RabbitMQ memory limit to the default value.
|
|
5. Check the last value of the okay alarm in InfluxDB.
|
|
6. Set RabbitMQ memory limit to 100.01 percent of currently
|
|
used memory.
|
|
7. Check the last value of the critical alarm in InfluxDB.
|
|
8. Set RabbitMQ memory limit to the default value.
|
|
9. Check the last value of the okay alarm in InfluxDB.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
self._check_rabbit_mq_memory_alarms(controller, WARNING_STATUS,
|
|
RABBITMQ_MEMORY_WARNING_VALUE)
|
|
self._check_rabbit_mq_memory_alarms(controller, CRITICAL_STATUS,
|
|
RABBITMQ_MEMORY_CRITICAL_VALUE)
|
|
|
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
|
groups=["check_rabbitmq_pacemaker_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_rabbitmq_pacemaker_alarms(self):
|
|
"""Check that rabbitmq-pacemaker-* alarms work as expected.
|
|
|
|
Scenario:
|
|
1. Stop one slave RabbitMQ instance.
|
|
2. Check that the status of the RabbitMQ cluster is warning.
|
|
3. Stop the second slave RabbitMQ instance.
|
|
4. Check that the status of the RabbitMQ cluster is critical.
|
|
5. Stop the master RabbitMQ instance.
|
|
6. Check that the status of the RabbitMQ cluster is down.
|
|
7. Clear the RabbitMQ resource.
|
|
8. Check that the status of the RabbitMQ cluster is okay.
|
|
|
|
Duration 10m
|
|
"""
|
|
def ban_and_check_status(node, status, wait=None):
|
|
with self.fuel_web.get_ssh_for_node(node.name) as remote:
|
|
logger.info("Ban rabbitmq resource on {}".format(node.name))
|
|
self.remote_ops.ban_resource(remote,
|
|
'master_p_rabbitmq-server',
|
|
wait=wait)
|
|
self.check_alarms('service', 'rabbitmq-cluster', 'pacemaker',
|
|
None, status)
|
|
|
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
|
|
|
self.check_alarms('service', 'rabbitmq-cluster', 'pacemaker',
|
|
None, OKAY_STATUS)
|
|
|
|
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])
|
|
|
|
controller = controllers[0]
|
|
controller_node = self.fuel_web.get_devops_node_by_nailgun_node(
|
|
controller)
|
|
rabbitmq_master = self.fuel_web.get_rabbit_master_node(
|
|
controller_node.name)
|
|
rabbitmq_slaves = self.fuel_web.get_rabbit_slaves_node(
|
|
controller_node.name)
|
|
ban_and_check_status(rabbitmq_slaves[0], WARNING_STATUS, 120)
|
|
ban_and_check_status(rabbitmq_slaves[1], CRITICAL_STATUS, 120)
|
|
# Don't wait for the pcs operation to complete as it will fail since
|
|
# the resource isn't running anywhere
|
|
ban_and_check_status(rabbitmq_master, DOWN_STATUS)
|
|
|
|
logger.info("Clear rabbitmq resource")
|
|
with self.fuel_web.get_ssh_for_node(rabbitmq_master.name) as remote:
|
|
self.remote_ops.clear_resource(remote,
|
|
'master_p_rabbitmq-server',
|
|
wait=240)
|
|
self.check_alarms('service', 'rabbitmq-cluster', 'pacemaker',
|
|
None, OKAY_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_root_fs_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_root_fs_alarms(self):
|
|
"""Check that root-fs-warning and root-fs-critical alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Fill up root filesystem to 91 percent.
|
|
2. Check the last value of the warning alarm in InfluxDB.
|
|
3. Clean the filesystem.
|
|
4. Fill up root filesystem to 96 percent.
|
|
5. Check the last value of the critical alarm in InfluxDB.
|
|
6. Clean the filesystem.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
self._check_filesystem_alarms(
|
|
controller, "/$", "root-fs", "/bigfile", "controller")
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_log_fs_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_log_fs_alarms(self):
|
|
"""Check that log-fs-warning and log-fs-critical alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Fill up /var/log filesystem to 91 percent.
|
|
2. Check the last value of the warning alarm in InfluxDB.
|
|
3. Clean the filesystem.
|
|
4. Fill up /var/log filesystem to 96 percent.
|
|
5. Check the last value of the critical alarm in InfluxDB.
|
|
6. Clean the filesystem.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
self._check_filesystem_alarms(
|
|
controller, "/var/log", "log-fs", "/var/log/bigfile", "controller")
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_nova_fs_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_nova_fs_alarms(self):
|
|
"""Check that nova-fs-warning and nova-fs-critical alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Fill up /var/lib/nova filesystem to 91 percent.
|
|
2. Check the last value of the warning alarm in InfluxDB.
|
|
3. Clean the filesystem.
|
|
4. Fill up /var/lib/nova filesystem to 96 percent.
|
|
5. Check the last value of the critical alarm in InfluxDB.
|
|
6. Clean the filesystem.
|
|
|
|
Duration 10m
|
|
"""
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["compute"])[0]
|
|
self._check_filesystem_alarms(compute, "/var/lib/nova", "nova-fs",
|
|
"/var/lib/nova/bigfile", "compute")
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_nova_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_nova_api_logs_errors_alarms(self):
|
|
"""Check that nova-logs-error and nova-api-http-errors alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Rename all nova tables to UPPERCASE.
|
|
2. Run some nova list command repeatedly.
|
|
3. Check the last value of the nova-logs-error alarm in InfluxDB.
|
|
4. Check the last value of the nova-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Revert all nova tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
def get_servers_list():
|
|
try:
|
|
self.helpers.os_conn.get_servers()
|
|
except Exception:
|
|
pass
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("nova", controller):
|
|
metrics = {"nova-logs": "error",
|
|
"nova-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_servers_list, 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_neutron_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_neutron_api_logs_errors_alarms(self):
|
|
"""Check that neutron-logs-error and neutron-api-http-errors
|
|
alarms work as expected.
|
|
|
|
Scenario:
|
|
1. Rename all neutron tables to UPPERCASE.
|
|
2. Run some neutron agents list command repeatedly.
|
|
3. Check the last value of the neutron-logs-error alarm
|
|
in InfluxDB.
|
|
4. Check the last value of the neutron-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Revert all neutron tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
def get_agents_list():
|
|
try:
|
|
self.helpers.os_conn.list_agents()
|
|
except Exception:
|
|
pass
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("neutron", controller):
|
|
metrics = {"neutron-logs": "error",
|
|
"neutron-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_agents_list, 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_glance_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_glance_api_logs_errors_alarms(self):
|
|
"""Check that glance-logs-error and glance-api-http-errors alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Rename all glance tables to UPPERCASE.
|
|
2. Run some glance image list command repeatedly.
|
|
3. Check the last value of the glance-logs-error alarm in InfluxDB.
|
|
4. Check the last value of the glance-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Revert all glance tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
def get_images_list():
|
|
try:
|
|
# NOTE(rpromyshlennikov): List is needed here
|
|
# because glance image list is lazy method
|
|
return list(self.helpers.os_conn.get_image_list())
|
|
except Exception:
|
|
pass
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("glance", controller):
|
|
metrics = {"glance-logs": "error",
|
|
"glance-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_images_list, 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_heat_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_heat_api_logs_errors_alarms(self):
|
|
"""Check that heat-logs-error and heat-api-http-errors alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Rename all heat tables to UPPERCASE.
|
|
2. Run some heat stack list command repeatedly.
|
|
3. Check the last value of the heat-logs-error alarm in InfluxDB.
|
|
4. Check the last value of the heat-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Revert all heat tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
def get_stacks_list():
|
|
try:
|
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
|
controller) as remote:
|
|
return remote.execute(
|
|
". openrc && heat stack-list > /dev/null 2>&1")
|
|
except Exception:
|
|
pass
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("heat", controller):
|
|
metrics = {"heat-logs": "error",
|
|
"heat-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_stacks_list, 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_cinder_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_cinder_api_logs_errors_alarms(self):
|
|
"""Check that cinder-logs-error and cinder-api-http-errors alarms work as
|
|
expected.
|
|
|
|
Scenario:
|
|
1. Rename all cinder tables to UPPERCASE.
|
|
2. Run some cinder list command repeatedly.
|
|
3. Check the last value of the cinder-logs-error alarm in InfluxDB.
|
|
4. Check the last value of the cinder-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Revert all cinder tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
|
|
def get_volumes_list():
|
|
try:
|
|
self.helpers.os_conn.cinder.volumes.list()
|
|
except Exception:
|
|
pass
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("cinder", controller):
|
|
metrics = {"cinder-logs": "error",
|
|
"cinder-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_volumes_list, 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_keystone_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_keystone_api_logs_errors_alarms(self):
|
|
"""Check that keystone-logs-error, keystone-public-api-http-errors and
|
|
keystone-admin-api-http-errors alarms work as expected.
|
|
|
|
Scenario:
|
|
1. Rename all keystone tables to UPPERCASE.
|
|
2. Run some keystone stack list command repeatedly.
|
|
3. Check the last value of the keystone-logs-error alarm
|
|
in InfluxDB.
|
|
4. Check the last value of the keystone-public-api-http-errors
|
|
alarm in InfluxDB.
|
|
5. Check the last value of the keystone-admin-api-http-errors alarm
|
|
in InfluxDB.
|
|
6. Revert all keystone tables names to lowercase.
|
|
|
|
Duration 10m
|
|
"""
|
|
|
|
def get_users_list(level):
|
|
additional_cmds = {
|
|
"user": ("&& export OS_AUTH_URL="
|
|
"`(echo $OS_AUTH_URL "
|
|
"| sed 's%:5000/%:5000/v2.0%')` "),
|
|
"admin": ("&& export OS_AUTH_URL="
|
|
"`(echo $OS_AUTH_URL "
|
|
"| sed 's%:5000/%:35357/v2.0%')` ")
|
|
}
|
|
|
|
def get_users_list_parametrized():
|
|
try:
|
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
|
controller) as remote:
|
|
return remote.execute(
|
|
". openrc {additional_cmd}"
|
|
"&& keystone user-list > /dev/null 2>&1".format(
|
|
additional_cmd=additional_cmds[level]
|
|
)
|
|
)
|
|
except Exception:
|
|
pass
|
|
return get_users_list_parametrized
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.helpers.make_logical_db_unavailable("keystone", controller):
|
|
metrics = {"keystone-logs": "error",
|
|
"keystone-public-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_users_list("user"), 100, metrics, WARNING_STATUS)
|
|
|
|
metrics = {"keystone-admin-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_users_list("admin"), 100, metrics, WARNING_STATUS)
|
|
|
|
@test(depends_on_groups=["deploy_toolchain"],
|
|
groups=["check_swift_api_logs_errors_alarms",
|
|
"http_logs_errors_alarms", "toolchain", "alarms"])
|
|
@log_snapshot_after_test
|
|
def check_swift_api_logs_errors_alarms(self):
|
|
"""Check that swift-logs-error and swift-api-http-error alarms
|
|
work as expected.
|
|
|
|
Scenario:
|
|
1. Stop swift-account service on controller.
|
|
2. Run some swift stack list command repeatedly.
|
|
3. Check the last value of the swift-logs-error alarm
|
|
in InfluxDB.
|
|
4. Check the last value of the swift-api-http-errors alarm
|
|
in InfluxDB.
|
|
5. Start swift-account service on controller.
|
|
|
|
Duration 15m
|
|
"""
|
|
|
|
def get_objects_list():
|
|
try:
|
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
|
controller) as remote:
|
|
return remote.execute(
|
|
". openrc "
|
|
"&& export OS_AUTH_URL="
|
|
"`(echo $OS_AUTH_URL | sed 's%:5000/%:5000/v2.0%')` "
|
|
"&& swift list > /dev/null 2>&1")
|
|
except Exception:
|
|
pass
|
|
|
|
self.env.revert_snapshot("deploy_toolchain")
|
|
|
|
controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
|
self.helpers.cluster_id, ["controller"])[0]
|
|
|
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
|
controller) as remote:
|
|
self.remote_ops.manage_service(
|
|
remote, "swift-account", "stop")
|
|
|
|
metrics = {"swift-logs": "error",
|
|
"swift-api": "http_errors"}
|
|
self._verify_service_alarms(
|
|
get_objects_list, 10, metrics, WARNING_STATUS)
|
|
|
|
with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
|
|
self.remote_ops.manage_service(
|
|
remote, "swift-account", "start")
|