From 0ef4188eab255e79644d4d94ae96ee0ff2b5e187 Mon Sep 17 00:00:00 2001 From: Rodion Promyshlennikov Date: Thu, 5 May 2016 15:04:57 +0300 Subject: [PATCH] Add scaling and failover system tests. Change-Id: I53be9c48af6223101c4754d60bf58c38c32e1597 --- stacklight_tests/helpers/helpers.py | 57 ++++- stacklight_tests/influxdb_grafana/api.py | 10 +- .../influxdb_grafana/test_system.py | 203 ++++++++++++++++++ stacklight_tests/run_tests.py | 2 + 4 files changed, 265 insertions(+), 7 deletions(-) create mode 100644 stacklight_tests/influxdb_grafana/test_system.py diff --git a/stacklight_tests/helpers/helpers.py b/stacklight_tests/helpers/helpers.py index f21c51a..88a999e 100644 --- a/stacklight_tests/helpers/helpers.py +++ b/stacklight_tests/helpers/helpers.py @@ -15,6 +15,7 @@ import os import urllib2 +from devops.helpers import helpers from fuelweb_test import logger from proboscis import asserts @@ -84,5 +85,57 @@ class PluginHelper(object): self.fuel_web.deploy_cluster_wait(self.cluster_id) def run_ostf(self, *args, **kwargs): - kwargs.update({"cluster_id": self.cluster_id}) - self.fuel_web.run_ostf(*args, **kwargs) + self.fuel_web.run_ostf(self.cluster_id, *args, **kwargs) + + def add_node_to_cluster(self, node, redeploy=True, check_services=False): + """Method to add node to cluster + :param node: node to add to cluster + :param redeploy: redeploy or just update settings + :param check_services: run OSTF after redeploy or not + """ + self.fuel_web.update_nodes( + self.cluster_id, + node, + ) + if redeploy: + self.fuel_web.deploy_cluster_wait(self.cluster_id, + check_services=check_services) + + def remove_node_from_cluster(self, node, redeploy=True, + check_services=False): + """Method to remove node to cluster + :param node: node to add to cluster + :param redeploy: redeploy or just update settings + :param check_services: run OSTF after redeploy or not + """ + self.fuel_web.update_nodes( + self.cluster_id, + node, + pending_addition=False, pending_deletion=True, + ) + if redeploy: + self.fuel_web.deploy_cluster_wait(self.cluster_id, + check_services=check_services) + + def get_master_node_by_role(self, role_name, excluded_nodes_fqdns=()): + nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.cluster_id, role_name) + nodes = [node for node in nodes + if node['fqdn'] not in set(excluded_nodes_fqdns)] + with self.fuel_web.get_ssh_for_nailgun_node(nodes[0]) as remote: + stdout = remote.check_call( + 'pcs status cluster | grep "Current DC:"')["stdout"][0] + for node in nodes: + if node['fqdn'] in stdout: + return node + + def hard_shutdown_node(self, fqdn): + devops_node = self.fuel_web.get_devops_node_by_nailgun_fqdn( + fqdn) + msg = 'Node {0} has not become offline after hard shutdown'.format( + devops_node.name) + logger.info('Destroy node %s', devops_node.name) + devops_node.destroy() + logger.info('Wait a %s node offline status', devops_node.name) + helpers.wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( + devops_node)['online'], timeout=60 * 5, timeout_msg=msg) diff --git a/stacklight_tests/influxdb_grafana/api.py b/stacklight_tests/influxdb_grafana/api.py index e3f9530..39e021a 100644 --- a/stacklight_tests/influxdb_grafana/api.py +++ b/stacklight_tests/influxdb_grafana/api.py @@ -82,17 +82,17 @@ class InfluxdbPluginApi(base_test.PluginApi): nodes_count_responsed) asserts.assert_equal(nodes_count, nodes_count_responsed, msg) - def get_influxdb_master_node(self): + def get_influxdb_master_node(self, excluded_nodes_fqdns=()): influx_master_node = self.helpers.get_master_node_by_role( - self.settings.role_name) + self.settings.role_name, excluded_nodes_fqdns=excluded_nodes_fqdns) return influx_master_node - def wait_for_rotation_influx_master(self, - old_master, timeout=5 * 60): + def wait_for_rotation_influx_master(self, old_master, timeout=5 * 60): logger.info('Wait a influxDB master node rotation') msg = "Failed influxDB master rotation from {0}".format(old_master) devops_helpers.wait( - lambda: old_master != self.get_influxdb_master_node()['fqdn'], + lambda: old_master != self.get_influxdb_master_node( + excluded_nodes_fqdns=(old_master,))['fqdn'], timeout=timeout, timeout_msg=msg) def wait_plugin_online(self, timeout=5 * 60): diff --git a/stacklight_tests/influxdb_grafana/test_system.py b/stacklight_tests/influxdb_grafana/test_system.py new file mode 100644 index 0000000..6851ea6 --- /dev/null +++ b/stacklight_tests/influxdb_grafana/test_system.py @@ -0,0 +1,203 @@ +# Copyright 2016 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from proboscis import test + +from fuelweb_test.helpers.decorators import log_snapshot_after_test + +from stacklight_tests.influxdb_grafana import api +from stacklight_tests.influxdb_grafana import test_smoke_bvt + + +@test(groups=["plugins"]) +class TestNodesInfluxdbPlugin(api.InfluxdbPluginApi): + """Class for system tests for InfluxDB-Grafana plugin.""" + + @test(depends_on=[ + test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin], + groups=["check_scaling_influxdb_grafana", "scaling", + "influxdb_grafana", "system", + "check_add_delete_controller_influxdb_grafana"]) + @log_snapshot_after_test + def add_remove_controller_influxdb_grafana_plugin(self): + """Verify that the number of controllers can scale up and down + + Scenario: + 1. Revert snapshot with 9 deployed nodes in HA configuration + 2. Remove one controller node and update the cluster + 3. Check that plugin is working + 4. Run OSTF + 5. Add one controller node (return previous state) and + update the cluster + 6. Check that plugin is working + 7. Run OSTF + + Duration 120m + """ + self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin") + + manipulated_node = {'slave-03': ['controller']} + + # NOTE(rpromyshlennikov): We set "check_services=False" and + # "should_fail=1" parameters in deploy_cluster_wait and run_ostf + # methods because after removing one node + # nova has been keeping it in service list + + # Remove controller + self.helpers.remove_node_from_cluster(manipulated_node) + + self.check_plugin_online() + + self.helpers.run_ostf(should_fail=1) + + # Add controller + self.helpers.add_node_to_cluster(manipulated_node) + + self.check_plugin_online() + + self.helpers.run_ostf(should_fail=1) + + self.env.make_snapshot("add_remove_controller_influxdb_grafana_plugin") + + @test(depends_on=[ + test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin], + groups=["check_scaling_influxdb_grafana", "scaling", + "influxdb_grafana", "system", + "check_add_delete_compute_influxdb_grafana"]) + @log_snapshot_after_test + def add_remove_compute_influxdb_grafana_plugin(self): + """Verify that the number of computes can scale up and down + + Scenario: + 1. Revert snapshot with 9 deployed nodes in HA configuration + 2. Remove one compute node and update the cluster + 3. Check that plugin is working + 4. Run OSTF + 5. Add one compute node (return previous state) and + update the cluster + 6. Check that plugin is working + 7. Run OSTF + + Duration 120m + """ + self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin") + + manipulated_node = {'slave-04': ['compute', 'cinder']} + + # NOTE(rpromyshlennikov): We set "check_services=False" and + # "should_fail=1" parameters in deploy_cluster_wait and run_ostf + # methods because after removing one node + # nova has been keeping it in service list + + # Remove compute + self.helpers.remove_node_from_cluster(manipulated_node) + + self.check_plugin_online() + + self.helpers.run_ostf(should_fail=1) + + # Add controller + self.helpers.add_node_to_cluster(manipulated_node) + + self.check_plugin_online() + + self.helpers.run_ostf(should_fail=1) + + self.env.make_snapshot("add_remove_compute_influxdb_grafana_plugin") + + @test(depends_on=[ + test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin], + groups=["check_scaling_influxdb_grafana", "scaling", + "influxdb_grafana", "system", + "check_add_delete_influxdb_grafana_node"]) + @log_snapshot_after_test + def add_remove_node_with_influxdb_grafana_plugin(self): + """Verify that the number of InfluxDB-Grafana nodes + can scale up and down + + Scenario: + 1. Revert snapshot with 9 deployed nodes in HA configuration + 2. Remove one InfluxDB-Grafana node and update the cluster + 3. Check that plugin is working + 4. Run OSTF + 5. Add one InfluxDB-Grafana node (return previous state) and + update the cluster + 6. Check that plugin is working + 7. Run OSTF + + Duration 120m + """ + self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin") + + self.check_influxdb_nodes_count(3) + + manipulated_node = {'slave-07': self.settings.role_name} + + # Remove InfluxDB-Grafana node + self.helpers.remove_node_from_cluster(manipulated_node) + + self.check_plugin_online() + + # NOTE(rpromyshlennikov): shouldn't fail, + # but it'll be fixed in next releases + self.check_influxdb_nodes_count(2) + + self.fuel_web.run_ostf() + + # Add InfluxDB-Grafana node + self.helpers.add_node_to_cluster(manipulated_node) + + self.check_plugin_online() + + self.check_influxdb_nodes_count(3) + + self.helpers.run_ostf() + + self.env.make_snapshot("add_remove_node_with_influxdb_grafana_plugin") + + @test(depends_on=[ + test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin], + groups=["check_failover_influxdb_grafana" "failover", + "influxdb_grafana", "system", "destructive", + "check_shutdown_influxdb_grafana_node"]) + @log_snapshot_after_test + def shutdown_node_with_influxdb_grafana_plugin(self): + """Verify that failover for InfluxDB cluster works. + + Scenario: + 1. Revert snapshot with 9 deployed nodes in HA configuration + 2. Determine influx_db master node were vip_influxdb was started + 3. Shutdown influx_db master node + 4. Check that vip_influxdb was started on another node + 5. Check that plugin is working + 6. Check that no data lost after shutdown + 7. Run OSTF + + Duration 30m + """ + self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin") + + master_node_hostname = self.get_influxdb_master_node()['fqdn'] + + self.helpers.hard_shutdown_node(master_node_hostname) + + self.wait_for_rotation_influx_master(master_node_hostname) + + self.check_plugin_online() + + # TODO(rpromyshlennikov): check no data lost + + self.helpers.run_ostf() + + self.env.make_snapshot("shutdown_node_with_influxdb_grafana_plugin") diff --git a/stacklight_tests/run_tests.py b/stacklight_tests/run_tests.py index 318d03c..1796929 100644 --- a/stacklight_tests/run_tests.py +++ b/stacklight_tests/run_tests.py @@ -41,7 +41,9 @@ class CloseSSHConnectionsPlugin(plugins.Plugin): def import_tests(): + from stacklight_tests.influxdb_grafana import test_destructive # noqa from stacklight_tests.influxdb_grafana import test_smoke_bvt # noqa + from stacklight_tests.influxdb_grafana import test_system # noqa def run_tests():