Add scaling and failover system tests.

Change-Id: I53be9c48af6223101c4754d60bf58c38c32e1597
This commit is contained in:
Rodion Promyshlennikov 2016-05-05 15:04:57 +03:00
parent 8c13244e25
commit 0ef4188eab
4 changed files with 265 additions and 7 deletions

View File

@ -15,6 +15,7 @@
import os
import urllib2
from devops.helpers import helpers
from fuelweb_test import logger
from proboscis import asserts
@ -84,5 +85,57 @@ class PluginHelper(object):
self.fuel_web.deploy_cluster_wait(self.cluster_id)
def run_ostf(self, *args, **kwargs):
kwargs.update({"cluster_id": self.cluster_id})
self.fuel_web.run_ostf(*args, **kwargs)
self.fuel_web.run_ostf(self.cluster_id, *args, **kwargs)
def add_node_to_cluster(self, node, redeploy=True, check_services=False):
"""Method to add node to cluster
:param node: node to add to cluster
:param redeploy: redeploy or just update settings
:param check_services: run OSTF after redeploy or not
"""
self.fuel_web.update_nodes(
self.cluster_id,
node,
)
if redeploy:
self.fuel_web.deploy_cluster_wait(self.cluster_id,
check_services=check_services)
def remove_node_from_cluster(self, node, redeploy=True,
check_services=False):
"""Method to remove node to cluster
:param node: node to add to cluster
:param redeploy: redeploy or just update settings
:param check_services: run OSTF after redeploy or not
"""
self.fuel_web.update_nodes(
self.cluster_id,
node,
pending_addition=False, pending_deletion=True,
)
if redeploy:
self.fuel_web.deploy_cluster_wait(self.cluster_id,
check_services=check_services)
def get_master_node_by_role(self, role_name, excluded_nodes_fqdns=()):
nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
self.cluster_id, role_name)
nodes = [node for node in nodes
if node['fqdn'] not in set(excluded_nodes_fqdns)]
with self.fuel_web.get_ssh_for_nailgun_node(nodes[0]) as remote:
stdout = remote.check_call(
'pcs status cluster | grep "Current DC:"')["stdout"][0]
for node in nodes:
if node['fqdn'] in stdout:
return node
def hard_shutdown_node(self, fqdn):
devops_node = self.fuel_web.get_devops_node_by_nailgun_fqdn(
fqdn)
msg = 'Node {0} has not become offline after hard shutdown'.format(
devops_node.name)
logger.info('Destroy node %s', devops_node.name)
devops_node.destroy()
logger.info('Wait a %s node offline status', devops_node.name)
helpers.wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
devops_node)['online'], timeout=60 * 5, timeout_msg=msg)

View File

@ -82,17 +82,17 @@ class InfluxdbPluginApi(base_test.PluginApi):
nodes_count_responsed)
asserts.assert_equal(nodes_count, nodes_count_responsed, msg)
def get_influxdb_master_node(self):
def get_influxdb_master_node(self, excluded_nodes_fqdns=()):
influx_master_node = self.helpers.get_master_node_by_role(
self.settings.role_name)
self.settings.role_name, excluded_nodes_fqdns=excluded_nodes_fqdns)
return influx_master_node
def wait_for_rotation_influx_master(self,
old_master, timeout=5 * 60):
def wait_for_rotation_influx_master(self, old_master, timeout=5 * 60):
logger.info('Wait a influxDB master node rotation')
msg = "Failed influxDB master rotation from {0}".format(old_master)
devops_helpers.wait(
lambda: old_master != self.get_influxdb_master_node()['fqdn'],
lambda: old_master != self.get_influxdb_master_node(
excluded_nodes_fqdns=(old_master,))['fqdn'],
timeout=timeout, timeout_msg=msg)
def wait_plugin_online(self, timeout=5 * 60):

View File

@ -0,0 +1,203 @@
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from proboscis import test
from fuelweb_test.helpers.decorators import log_snapshot_after_test
from stacklight_tests.influxdb_grafana import api
from stacklight_tests.influxdb_grafana import test_smoke_bvt
@test(groups=["plugins"])
class TestNodesInfluxdbPlugin(api.InfluxdbPluginApi):
"""Class for system tests for InfluxDB-Grafana plugin."""
@test(depends_on=[
test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
groups=["check_scaling_influxdb_grafana", "scaling",
"influxdb_grafana", "system",
"check_add_delete_controller_influxdb_grafana"])
@log_snapshot_after_test
def add_remove_controller_influxdb_grafana_plugin(self):
"""Verify that the number of controllers can scale up and down
Scenario:
1. Revert snapshot with 9 deployed nodes in HA configuration
2. Remove one controller node and update the cluster
3. Check that plugin is working
4. Run OSTF
5. Add one controller node (return previous state) and
update the cluster
6. Check that plugin is working
7. Run OSTF
Duration 120m
"""
self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
manipulated_node = {'slave-03': ['controller']}
# NOTE(rpromyshlennikov): We set "check_services=False" and
# "should_fail=1" parameters in deploy_cluster_wait and run_ostf
# methods because after removing one node
# nova has been keeping it in service list
# Remove controller
self.helpers.remove_node_from_cluster(manipulated_node)
self.check_plugin_online()
self.helpers.run_ostf(should_fail=1)
# Add controller
self.helpers.add_node_to_cluster(manipulated_node)
self.check_plugin_online()
self.helpers.run_ostf(should_fail=1)
self.env.make_snapshot("add_remove_controller_influxdb_grafana_plugin")
@test(depends_on=[
test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
groups=["check_scaling_influxdb_grafana", "scaling",
"influxdb_grafana", "system",
"check_add_delete_compute_influxdb_grafana"])
@log_snapshot_after_test
def add_remove_compute_influxdb_grafana_plugin(self):
"""Verify that the number of computes can scale up and down
Scenario:
1. Revert snapshot with 9 deployed nodes in HA configuration
2. Remove one compute node and update the cluster
3. Check that plugin is working
4. Run OSTF
5. Add one compute node (return previous state) and
update the cluster
6. Check that plugin is working
7. Run OSTF
Duration 120m
"""
self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
manipulated_node = {'slave-04': ['compute', 'cinder']}
# NOTE(rpromyshlennikov): We set "check_services=False" and
# "should_fail=1" parameters in deploy_cluster_wait and run_ostf
# methods because after removing one node
# nova has been keeping it in service list
# Remove compute
self.helpers.remove_node_from_cluster(manipulated_node)
self.check_plugin_online()
self.helpers.run_ostf(should_fail=1)
# Add controller
self.helpers.add_node_to_cluster(manipulated_node)
self.check_plugin_online()
self.helpers.run_ostf(should_fail=1)
self.env.make_snapshot("add_remove_compute_influxdb_grafana_plugin")
@test(depends_on=[
test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
groups=["check_scaling_influxdb_grafana", "scaling",
"influxdb_grafana", "system",
"check_add_delete_influxdb_grafana_node"])
@log_snapshot_after_test
def add_remove_node_with_influxdb_grafana_plugin(self):
"""Verify that the number of InfluxDB-Grafana nodes
can scale up and down
Scenario:
1. Revert snapshot with 9 deployed nodes in HA configuration
2. Remove one InfluxDB-Grafana node and update the cluster
3. Check that plugin is working
4. Run OSTF
5. Add one InfluxDB-Grafana node (return previous state) and
update the cluster
6. Check that plugin is working
7. Run OSTF
Duration 120m
"""
self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
self.check_influxdb_nodes_count(3)
manipulated_node = {'slave-07': self.settings.role_name}
# Remove InfluxDB-Grafana node
self.helpers.remove_node_from_cluster(manipulated_node)
self.check_plugin_online()
# NOTE(rpromyshlennikov): shouldn't fail,
# but it'll be fixed in next releases
self.check_influxdb_nodes_count(2)
self.fuel_web.run_ostf()
# Add InfluxDB-Grafana node
self.helpers.add_node_to_cluster(manipulated_node)
self.check_plugin_online()
self.check_influxdb_nodes_count(3)
self.helpers.run_ostf()
self.env.make_snapshot("add_remove_node_with_influxdb_grafana_plugin")
@test(depends_on=[
test_smoke_bvt.TestInfluxdbPlugin.deploy_ha_influxdb_grafana_plugin],
groups=["check_failover_influxdb_grafana" "failover",
"influxdb_grafana", "system", "destructive",
"check_shutdown_influxdb_grafana_node"])
@log_snapshot_after_test
def shutdown_node_with_influxdb_grafana_plugin(self):
"""Verify that failover for InfluxDB cluster works.
Scenario:
1. Revert snapshot with 9 deployed nodes in HA configuration
2. Determine influx_db master node were vip_influxdb was started
3. Shutdown influx_db master node
4. Check that vip_influxdb was started on another node
5. Check that plugin is working
6. Check that no data lost after shutdown
7. Run OSTF
Duration 30m
"""
self.env.revert_snapshot("deploy_ha_influxdb_grafana_plugin")
master_node_hostname = self.get_influxdb_master_node()['fqdn']
self.helpers.hard_shutdown_node(master_node_hostname)
self.wait_for_rotation_influx_master(master_node_hostname)
self.check_plugin_online()
# TODO(rpromyshlennikov): check no data lost
self.helpers.run_ostf()
self.env.make_snapshot("shutdown_node_with_influxdb_grafana_plugin")

View File

@ -41,7 +41,9 @@ class CloseSSHConnectionsPlugin(plugins.Plugin):
def import_tests():
from stacklight_tests.influxdb_grafana import test_destructive # noqa
from stacklight_tests.influxdb_grafana import test_smoke_bvt # noqa
from stacklight_tests.influxdb_grafana import test_system # noqa
def run_tests():