stacklight-integration-tests/stacklight_tests/toolchain/test_functional.py

#    Copyright 2016 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test import logger
from proboscis import test

from stacklight_tests.toolchain import api


@test(groups=["plugins"])
class TestFunctionalToolchain(api.ToolchainApi):
    """Class for functional testing of plugins toolchain."""

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_display_grafana_dashboards_toolchain",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def check_display_grafana_dashboards_toolchain(self):
        """Verify that the dashboards show up in the Grafana UI.

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Open the Grafana URL (
                open the "Dashboard" tab and click the "Grafana" link)
            3. Sign-in using the credentials provided
                during the configuration of the environment
            4. Go to the Main dashboard and verify that everything is ok
            5. Repeat the previous step for the following dashboards:
                * Apache
                * Cinder
                * Elasticsearch
                * Glance
                * HAProxy
                * Heat
                * Hypervisor
                * InfluxDB
                * Keystone
                * LMA self-monitoring
                * Memcached
                * MySQL
                * Neutron
                * Nova
                * RabbitMQ
                * System

        Duration 20m
        """

        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.INFLUXDB_GRAFANA.check_grafana_dashboards()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_nova_metrics_toolchain",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def check_nova_metrics_toolchain(self):
        """Verify that the Nova metrics are collecting.

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Check that plugins are online
            3. Check Nova metrics in InfluxDB during OSTF tests

        Duration 20m
        """

        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_nova_metrics()

    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["check_nova_logs_in_elasticsearch", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_nova_logs_in_elasticsearch(self):
        """Check that Nova logs are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 9 deployed nodes in HA configuration
            2. Query Nova logs are present in current Elasticsearch index
            3. Check that Nova logs are collected from all controller and
               compute nodes

        Duration 10m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")

        self.check_plugins_online()

        self.check_nova_logs()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_nova_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_nova_notifications_toolchain(self):
        """Check that Nova notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Launch, update, rebuild, resize, power-off, power-on, snapshot,
               suspend, shutdown, and delete an instance
            3. Check that Nova notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_nova_notifications()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_glance_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_glance_notifications_toolchain(self):
        """Check that Glance notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Run the OSTF platform test "Check create, update and delete
               image actions using Glance v2"
            3. Check that Glance notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_glance_notifications()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_keystone_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_keystone_notifications_toolchain(self):
        """Check that Keystone notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Run OSTF functional test "Create user and authenticate with it
               to Horizon"
            3. Check that Keystone notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_keystone_notifications()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_heat_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_heat_notifications_toolchain(self):
        """Check that Heat notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Run OSTF Heat platform tests
            3. Check that Heat notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_heat_notifications()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_neutron_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_neutron_notifications_toolchain(self):
        """Check that Neutron notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Run OSTF functional test "Check network connectivity from
               instance via floating IP"
            3. Check that Neutron notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_neutron_notifications()

    @test(depends_on_groups=["deploy_toolchain"],
          groups=["check_cinder_notifications_toolchain", "toolchain",
                  "functional", "query_elasticsearch"])
    @log_snapshot_after_test
    def check_cinder_notifications_toolchain(self):
        """Check that Cinder notifications are present in Elasticsearch

        Scenario:
            1. Revert snapshot with 3 deployed nodes
            2. Create a volume and update it
            3. Check that Cinder notifications are present in current
               Elasticsearch index

        Duration 25m
        """
        self.env.revert_snapshot("deploy_toolchain")

        self.check_plugins_online()

        self.check_cinder_notifications()

    def _check_services_alerts(self, controllers_count, nagios_status,
                               influx_status, down_haproxy_count):
        components = {
            "nova": [("nova-api", "nova-api"), ("nova-scheduler", None)],
            "cinder": [("cinder-api", "cinder-api"),
                       ("cinder-scheduler", None)],
            "neutron": [
                ("neutron-server", "neutron-api"),
                # TODO(rpromyshlennikov): temporary fix,
                # because openvswitch-agent is managed by pacemaker
                # ("neutron-openvswitch-agent", None)
            ],
            "glance": [("glance-api", "glance-api")],
            "heat": [("heat-api", "heat-api")],
            "keystone": [("apache2", "keystone-public-api")]
        }

        alerting_plugin = self.LMA_INFRASTRUCTURE_ALERTING
        services_names_in_nagios = {}
        services_names_in_influx = {}
        for component in components:
            influx_service_name = component
            if alerting_plugin.settings.version.startswith("0."):
                nagios_service_name = component
            else:
                nagios_service_name = "global-{}".format(component)
                if component in ("nova", "neutron", "cinder"):
                    nagios_service_name = "{}-control-plane".format(
                        nagios_service_name)
                    influx_service_name = "{}-control-plane".format(
                        influx_service_name)

            services_names_in_nagios[component] = nagios_service_name
            services_names_in_influx[component] = influx_service_name

        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.get_vip_resource_name(
                alerting_plugin.settings.failover_vip))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)

        url = alerting_plugin.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "Nagios Core",
                                      "//frame[2]") as driver:
            alerting_plugin.open_nagios_page(
                driver, "Services", "//table[@class='headertable']")
            controller_nodes = (
                self.fuel_web.get_nailgun_cluster_nodes_by_roles(
                    self.helpers.cluster_id,
                    ["controller"])[:controllers_count]
            )
            for component in components:
                for (service, haproxy_backend) in components[component]:
                    logger.info("Checking service {0}".format(service))
                    self.change_verify_service_state(
                        service_name=[
                            service,
                            services_names_in_influx[component],
                            services_names_in_nagios[component],
                            haproxy_backend],
                        action="stop",
                        new_state=nagios_status,
                        service_state_in_influx=influx_status,
                        down_backends_in_haproxy=down_haproxy_count,
                        toolchain_node=toolchain_node,
                        controller_nodes=controller_nodes,
                        nagios_driver=driver)
                    self.change_verify_service_state(
                        service_name=[
                            service,
                            services_names_in_influx[component],
                            services_names_in_nagios[component],
                            haproxy_backend],
                        action="start",
                        new_state="OK",
                        service_state_in_influx=self.settings.OKAY,
                        down_backends_in_haproxy=0,
                        toolchain_node=toolchain_node,
                        controller_nodes=controller_nodes,
                        nagios_driver=driver)

    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_warning_alert_service", "service_restart",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_warning_alert_service(self):
        """Verify that the warning alerts for services show up in the
         Grafana and Nagios UI.

        Scenario:
            1. Connect to one of the controller nodes using ssh and
             stop the nova-api service.
            2. Wait for at least 1 minute.
            3. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'WARN' with an orange background,
                    - the API panels report 1 entity as down.
            4. On Nagios, check the following items:
                    - the 'nova' service is in 'WARNING' state,
                    - the local user root on the lma node has received
                     an email about the service
                     being in warning state.
            5. Restart the nova-api service.
            6. Wait for at least 1 minute.
            7. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - the API panels report 0 entity as down.
            8. On Nagios, check the following items:
                    - the 'nova' service is in 'OK' state,
                    - the local user root on the lma node has received
                    an email about the recovery
                     of the service.
            9. Repeat steps 2 to 8 for the following services:
                    - Nova (stopping and starting the nova-api and
                     nova-scheduler)
                    - Cinder (stopping and starting the cinder-api and
                    cinder-scheduler services respectively).
                    - Neutron (stopping and starting the neutron-server
                    and neutron-openvswitch-agent services respectively).
                    - Glance (stopping and starting the glance-api service).
                    - Heat (stopping and starting the heat-api service).
                    - Keystone (stopping and starting the Apache service).

        Duration 45m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        params = {"controllers_count": 1,
                  "nagios_status": "WARNING",
                  "influx_status": self.settings.WARN,
                  "down_haproxy_count": 1}

        self._check_services_alerts(**params)

    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_critical_alert_service", "service_restart",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_critical_alert_service(self):
        """Verify that the critical alerts for services show up in
        the Grafana and Nagios UI.

        Scenario:
            1. Open the Nagios URL
            2. Connect to one of the controller nodes using ssh and
            stop the nova-api service.
            3. Connect to a second controller node using ssh and stop
            the nova-api service.
            4. Wait for at least 1 minute.
            5. On Nagios, check the following items:
                    - the 'nova' service is in 'WARNING' state,
                    - the local user root on the lma node has received
                     an email about the service
                     being in warning state.
            6. Restart the nova-api service on both nodes.
            7. Wait for at least 1 minute.
            8. On Nagios, check the following items:
                    - the 'nova' service is in 'OK' state,
                    - the local user root on the lma node has received
                    an email about the recovery
                     of the service.
            9. Repeat steps 2 to 8 for the following services:
                    - Nova (stopping and starting the nova-api and
                     nova-scheduler)
                    - Cinder (stopping and starting the cinder-api and
                    cinder-scheduler services respectively).
                    - Neutron (stopping and starting the neutron-server
                    and neutron-openvswitch-agent services respectively).
                    - Glance (stopping and starting the glance-api service).
                    - Heat (stopping and starting the heat-api service).
                    - Keystone (stopping and starting the Apache service).

        Duration 45m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        params = {"controllers_count": 2,
                  "nagios_status": "CRITICAL",
                  "influx_status": self.settings.CRIT,
                  "down_haproxy_count": 2}

        self._check_services_alerts(**params)

    def _check_mysql_alerts_node(
            self, nagios_status, influx_status, disk_usage_percent):
        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.get_vip_resource_name(
                "infrastructure_alerting_mgmt_vip"))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)
        nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.helpers.cluster_id, ["controller"])

        alerting_plugin = self.LMA_INFRASTRUCTURE_ALERTING
        url = alerting_plugin.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "Nagios Core",
                                      "//frame[2]") as driver:
            alerting_plugin.open_nagios_page(
                driver, "Services", "//table[@class='headertable']")
            nagios_service_name = (
                "mysql"
                if alerting_plugin.settings.version.startswith("0.")
                else "global-mysql")
            self.change_verify_node_service_state(
                [nagios_service_name, "mysql-nodes.mysql-fs", "mysql"],
                nagios_status,
                influx_status, disk_usage_percent, toolchain_node,
                nailgun_nodes[:2], driver)

    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_warning_alert_node", "node_alert_warning",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_warning_alert_node(self):
        """Verify that the warning alerts for nodes show up in the
         Grafana and Nagios UI.

        Scenario:
            1. Open the Nagios URL
            2. Open the Grafana URl
            3. Connect to one of the controller nodes using ssh and
               run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
                     / 100) - $3))}') /var/lib/mysql/test
            4. Wait for at least 1 minute.
            5. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
            6. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
                     state for the node.
            7. Connect to a second controller node using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
                     / 100) - $3))}') /var/lib/mysql/test
            8. Wait for at least 1 minute.
            9. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'WARN' with an orange background,
                    - an annotation telling that the service went from 'OKAY'
                     to 'WARN' is displayed.
            10. On Nagios, check the following items:
                    - the 'mysql' service is in 'WARNING' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
                     state for the 2 nodes,
                    - the local user root on the lma node has received an
                     email about the service
                    being in warning state.
            11. Run the following command on both controller nodes:
                    rm /var/lib/mysql/test
            12. Wait for at least 1 minutes.
            13. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - an annotation telling that the service went from 'WARN'
                     to 'OKAY' is displayed.
            14. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
                     for the 2 nodes,
                    - the local user root on the lma node has received an
                     email about the recovery of the service.

        Duration 15m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        params = {
            "nagios_status": "WARNING",
            "influx_status": self.settings.WARN,
            "disk_usage_percent": 91}
        self._check_mysql_alerts_node(**params)

    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_critical_alert_node", "node_alert_critical",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_critical_alert_node(self):
        """Verify that the critical alerts for nodes show up in the
         Grafana and Nagios UI.

        Scenario:
            1. Open the Nagios URL
            2. Open the Grafana URl
            3. Connect to one of the controller nodes using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
                    98 / 100) - $3))}') /var/lib/mysql/test
            4. Wait for at least 1 minute.
            5. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
            6. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
                     state for the node.
            7. Connect to a second controller node using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
                    98 / 100) - $3))}') /var/lib/mysql/test
            8. Wait for at least 1 minute.
            9. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'CRIT' with an orange background,
                    - an annotation telling that the service went from 'OKAY'
                     to 'WARN' is displayed.
            10. On Nagios, check the following items:
                    - the 'mysql' service is in 'CRITICAL' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
                     state for the 2 nodes,
                    - the local user root on the lma node has received an
                    email about the service
                    being in warning state.
            11. Run the following command on both controller nodes:
                    rm /var/lib/mysql/test
            12. Wait for at least 1 minutes.
            13. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - an annotation telling that the service went from 'CRIT'
                     to 'OKAY' is displayed.
            14. On Nagios, check the following items:
                    - the 'mysql' service is in OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
                     for the 2 nodes,
                    - the local user root on the lma node has received an
                    email about the recovery of the service.

        Duration 15m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        params = {
            "nagios_status": "CRITICAL",
            "influx_status": self.settings.CRIT,
            "disk_usage_percent": 96}
        self._check_mysql_alerts_node(**params)