From 0ef4188eab255e79644d4d94ae96ee0ff2b5e187 Mon Sep 17 00:00:00 2001
From: Rodion Promyshlennikov <rpromyshlennikov@mirantis.com>
Date: Thu, 5 May 2016 15:04:57 +0300
Subject: [PATCH] Add scaling and failover system tests.

Change-Id: I53be9c48af6223101c4754d60bf58c38c32e1597
---
 stacklight_tests/helpers/helpers.py           |  57 ++++-
 stacklight_tests/influxdb_grafana/api.py      |  10 +-
 .../influxdb_grafana/test_system.py           | 203 ++++++++++++++++++
 stacklight_tests/run_tests.py                 |   2 +
 4 files changed, 265 insertions(+), 7 deletions(-)
 create mode 100644 stacklight_tests/influxdb_grafana/test_system.py

diff --git a/stacklight_tests/helpers/helpers.py b/stacklight_tests/helpers/helpers.py
index f21c51a..88a999e 100644
--- a/stacklight_tests/helpers/helpers.py
+++ b/stacklight_tests/helpers/helpers.py
@@ -15,6 +15,7 @@
 import os
 import urllib2
 
+from devops.helpers import helpers
 from fuelweb_test import logger
 from proboscis import asserts
 
@@ -84,5 +85,57 @@ class PluginHelper(object):
         self.fuel_web.deploy_cluster_wait(self.cluster_id)
 
     def run_ostf(self, *args, **kwargs):
-        kwargs.update({"cluster_id": self.cluster_id})
-        self.fuel_web.run_ostf(*args, **kwargs)
+        self.fuel_web.run_ostf(self.cluster_id, *args, **kwargs)
+
+    def add_node_to_cluster(self, node, redeploy=True, check_services=False):
+        """Method to add node to cluster
+        :param node: node to add to cluster
+        :param redeploy: redeploy or just update settings
+        :param check_services: run OSTF after redeploy or not
+        """
+        self.fuel_web.update_nodes(
+            self.cluster_id,
+            node,
+        )
+        if redeploy:
+            self.fuel_web.deploy_cluster_wait(self.cluster_id,
+                                              check_services=check_services)
+
+    def remove_node_from_cluster(self, node, redeploy=True,
+                                 check_services=False):
+        """Method to remove node to cluster
+            :param node: node to add to cluster
+            :param redeploy: redeploy or just update settings
+            :param check_services: run OSTF after redeploy or not
+            """
+        self.fuel_web.update_nodes(
+            self.cluster_id,
+            node,
+            pending_addition=False, pending_deletion=True,
+        )
+        if redeploy:
+            self.fuel_web.deploy_cluster_wait(self.cluster_id,
+                                              check_services=check_services)
+
+    def get_master_node_by_role(self, role_name, excluded_nodes_fqdns=()):
+        nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
+            self.cluster_id, role_name)
+        nodes = [node for node in nodes
+                 if node['fqdn'] not in set(excluded_nodes_fqdns)]
+        with self.fuel_web.get_ssh_for_nailgun_node(nodes[0]) as remote:
+            stdout = remote.check_call(
+                'pcs status cluster | grep "Current DC:"')["stdout"][0]
+        for node in nodes:
+            if node['fqdn'] in stdout:
+                return node
+
+    def hard_shutdown_node(self, fqdn):
+        devops_node = self.fuel_web.get_devops_node_by_nailgun_fqdn(
+            fqdn)
+        msg = 'Node {0} has not become offline after hard shutdown'.format(
+            devops_node.name)
+        logger.info('Destroy node %s', devops_node.name)
+        devops_node.destroy()
+        logger.info('Wait a %s node offline status', devops_node.name)
+        helpers.wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
+            devops_node)['online'], timeout=60 * 5, timeout_msg=msg)
diff --git a/stacklight_tests/influxdb_grafana/api.py b/stacklight_tests/influxdb_grafana/api.py
index e3f9530..39e021a 100644
--- a/stacklight_tests/influxdb_grafana/api.py
+++ b/stacklight_tests/influxdb_grafana/api.py
@@ -82,17 +82,17 @@ class InfluxdbPluginApi(base_test.PluginApi):
             nodes_count_responsed)
         asserts.assert_equal(nodes_count, nodes_count_responsed, msg)
 
-    def get_influxdb_master_node(self):
+    def get_influxdb_master_node(self, excluded_nodes_fqdns=()):
         influx_master_node = self.helpers.get_master_node_by_role(
-            self.settings.role_name)
+            self.settings.role_name, excluded_nodes_fqdns=excluded_nodes_fqdns)
         return influx_master_node
 
-    def wait_for_rotation_influx_master(self,
-                                        old_master, timeout=5 * 60):
+    def wait_for_rotation_influx_master(self, old_master, timeout=5 * 60):
         logger.info('Wait a influxDB master node rotation')
         msg = "Failed influxDB master rotation from {0}".format(old_master)
         devops_helpers.wait(
-            lambda: old_master != self.get_influxdb_master_node()['fqdn'],
+            lambda: old_master != self.get_influxdb_master_node(
+                excluded_nodes_fqdns=(old_master,))['fqdn'],
             timeout=timeout, timeout_msg=msg)
 
     def wait_plugin_online(self, timeout=5 * 60):
diff --git a/stacklight_tests/influxdb_grafana/test_system.py b/stacklight_tests/influxdb_grafana/test_system.py
new file mode 100644
index 0000000..6851ea6
--- /dev/null
+++ b/stacklight_tests/influxdb_grafana/test_system.py
@@ -0,0 +1,203 @@
+#    Copyright 2016 Mirantis, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from proboscis import test
+
+from fuelweb_test.helpers.decorators import log_snapshot_after_test
+
+from stacklight_tests.influxdb_grafana import api
+from stacklight_tests.influxdb_grafana import test_smoke_bvt
+
+
+@test(groups=["plugins"])
+class TestNodesInfluxdbPlugin(api.InfluxdbPluginApi):
+    """Class for system tests for InfluxDB-Grafana plugin."""
+
+    @test(depends_on=[
+        test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
+        groups=["check_scaling_influxdb_grafana", "scaling",
+                "influxdb_grafana", "system",
+                "check_add_delete_controller_influxdb_grafana"])
+    @log_snapshot_after_test
+    def add_remove_controller_influxdb_grafana_plugin(self):
+        """Verify that the number of controllers can scale up and down
+
+        Scenario:
+            1. Revert snapshot with 9 deployed nodes in HA configuration
+            2. Remove one controller node and update the cluster
+            3. Check that plugin is working
+            4. Run OSTF
+            5. Add one controller node (return previous state) and
+               update the cluster
+            6. Check that plugin is working
+            7. Run OSTF
+
+        Duration 120m
+        """
+        self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
+
+        manipulated_node = {'slave-03': ['controller']}
+
+        # NOTE(rpromyshlennikov): We set "check_services=False" and
+        # "should_fail=1" parameters in deploy_cluster_wait and run_ostf
+        # methods because after removing one node
+        # nova has been keeping it in service list
+
+        # Remove controller
+        self.helpers.remove_node_from_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        self.helpers.run_ostf(should_fail=1)
+
+        # Add controller
+        self.helpers.add_node_to_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        self.helpers.run_ostf(should_fail=1)
+
+        self.env.make_snapshot("add_remove_controller_influxdb_grafana_plugin")
+
+    @test(depends_on=[
+        test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
+        groups=["check_scaling_influxdb_grafana", "scaling",
+                "influxdb_grafana", "system",
+                "check_add_delete_compute_influxdb_grafana"])
+    @log_snapshot_after_test
+    def add_remove_compute_influxdb_grafana_plugin(self):
+        """Verify that the number of computes can scale up and down
+
+        Scenario:
+            1. Revert snapshot with 9 deployed nodes in HA configuration
+            2. Remove one compute node and update the cluster
+            3. Check that plugin is working
+            4. Run OSTF
+            5. Add one compute node (return previous state) and
+               update the cluster
+            6. Check that plugin is working
+            7. Run OSTF
+
+        Duration 120m
+        """
+        self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
+
+        manipulated_node = {'slave-04': ['compute', 'cinder']}
+
+        # NOTE(rpromyshlennikov): We set "check_services=False" and
+        # "should_fail=1" parameters in deploy_cluster_wait and run_ostf
+        # methods because after removing one node
+        # nova has been keeping it in service list
+
+        # Remove compute
+        self.helpers.remove_node_from_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        self.helpers.run_ostf(should_fail=1)
+
+        # Add controller
+        self.helpers.add_node_to_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        self.helpers.run_ostf(should_fail=1)
+
+        self.env.make_snapshot("add_remove_compute_influxdb_grafana_plugin")
+
+    @test(depends_on=[
+        test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
+        groups=["check_scaling_influxdb_grafana", "scaling",
+                "influxdb_grafana", "system",
+                "check_add_delete_influxdb_grafana_node"])
+    @log_snapshot_after_test
+    def add_remove_node_with_influxdb_grafana_plugin(self):
+        """Verify that the number of InfluxDB-Grafana nodes
+        can scale up and down
+
+        Scenario:
+            1. Revert snapshot with 9 deployed nodes in HA configuration
+            2. Remove one InfluxDB-Grafana node and update the cluster
+            3. Check that plugin is working
+            4. Run OSTF
+            5. Add one InfluxDB-Grafana node (return previous state) and
+               update the cluster
+            6. Check that plugin is working
+            7. Run OSTF
+
+        Duration 120m
+        """
+        self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
+
+        self.check_influxdb_nodes_count(3)
+
+        manipulated_node = {'slave-07': self.settings.role_name}
+
+        # Remove InfluxDB-Grafana node
+        self.helpers.remove_node_from_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        # NOTE(rpromyshlennikov): shouldn't fail,
+        # but it'll be fixed in next releases
+        self.check_influxdb_nodes_count(2)
+
+        self.fuel_web.run_ostf()
+
+        # Add InfluxDB-Grafana node
+        self.helpers.add_node_to_cluster(manipulated_node)
+
+        self.check_plugin_online()
+
+        self.check_influxdb_nodes_count(3)
+
+        self.helpers.run_ostf()
+
+        self.env.make_snapshot("add_remove_node_with_influxdb_grafana_plugin")
+
+    @test(depends_on=[
+        test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
+        groups=["check_failover_influxdb_grafana" "failover",
+                "influxdb_grafana", "system", "destructive",
+                "check_shutdown_influxdb_grafana_node"])
+    @log_snapshot_after_test
+    def shutdown_node_with_influxdb_grafana_plugin(self):
+        """Verify that failover for InfluxDB cluster works.
+
+        Scenario:
+            1. Revert snapshot with 9 deployed nodes in HA configuration
+            2. Determine influx_db master node were vip_influxdb was started
+            3. Shutdown influx_db master node
+            4. Check that vip_influxdb was started on another node
+            5. Check that plugin is working
+            6. Check that no data lost after shutdown
+            7. Run OSTF
+
+        Duration 30m
+        """
+        self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
+
+        master_node_hostname = self.get_influxdb_master_node()['fqdn']
+
+        self.helpers.hard_shutdown_node(master_node_hostname)
+
+        self.wait_for_rotation_influx_master(master_node_hostname)
+
+        self.check_plugin_online()
+
+        # TODO(rpromyshlennikov): check no data lost
+
+        self.helpers.run_ostf()
+
+        self.env.make_snapshot("shutdown_node_with_influxdb_grafana_plugin")
diff --git a/stacklight_tests/run_tests.py b/stacklight_tests/run_tests.py
index 318d03c..1796929 100644
--- a/stacklight_tests/run_tests.py
+++ b/stacklight_tests/run_tests.py
@@ -41,7 +41,9 @@ class CloseSSHConnectionsPlugin(plugins.Plugin):
 
 
 def import_tests():
+    from stacklight_tests.influxdb_grafana import test_destructive  # noqa
     from stacklight_tests.influxdb_grafana import test_smoke_bvt  # noqa
+    from stacklight_tests.influxdb_grafana import test_system  # noqa
 
 
 def run_tests():