From de8eb398667eca9dd12086d58ab4fabeccfd9e7e Mon Sep 17 00:00:00 2001
From: Duncan Martin Walker <Duncan.Walker02@bbc.co.uk>
Date: Thu, 23 Jan 2020 10:03:37 +0000
Subject: [PATCH] Updated log elk_metrics_7x collection for elastic beats

Introduced the logging script from openstack-ansible-ops to allow for the
collection of elasticsearch and elastic beat logs.

Change-Id: I4971156f6cd3592693058b707f2d05a26fa02882
---
 elk_metrics_7x/tests/post-run.yml |  17 ++
 scripts/log-collect.sh            | 286 ++++++++++++++++++++++++++++++
 2 files changed, 303 insertions(+)
 create mode 100755 scripts/log-collect.sh

diff --git a/elk_metrics_7x/tests/post-run.yml b/elk_metrics_7x/tests/post-run.yml
index d38a34cf..2d38425c 100644
--- a/elk_metrics_7x/tests/post-run.yml
+++ b/elk_metrics_7x/tests/post-run.yml
@@ -18,6 +18,23 @@
 - name: Run post tasks
   hosts: "all"
   tasks:
+    - name: Run log collection script
+      command: scripts/log-collect.sh
+      become: yes
+      become_user: root
+      args:
+        chdir: "src/opendev.org/openstack/openstack-ansible-ops"
+      environment:
+        # ZUUL_PROJECT is used by the log collection functions to enable
+        # log collection configuration specific to OpenStack CI
+        ZUUL_PROJECT: "{{ zuul.project.short_name }}"
+        TEST_EXIT_CODE: "{{ zuul_success | lower }}"
+        RUN_ARA: "true"
+        # Some Zuul environments (such as OpenStack CI) use html based ara reports
+        ARA_REPORT_TYPE: "{{ ara_report_type | default('database') }}"
+        WORKING_DIR: "{{ ansible_user_dir }}/src/opendev.org/openstack/openstack-ansible-ops/"
+        LOGGING_DIR: "/tmp/elk-metrics-7x-logs"
+
     - name: Copy logs back to the executor
       synchronize:
         src: "/tmp/elk-metrics-7x-logs"
diff --git a/scripts/log-collect.sh b/scripts/log-collect.sh
new file mode 100755
index 00000000..a65c3eeb
--- /dev/null
+++ b/scripts/log-collect.sh
@@ -0,0 +1,286 @@
+#!/usr/bin/env bash
+
+# Copyright 2016, Rackspace US, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# WARNING:
+# This file is use by all OpenStack-Ansible roles for testing purposes.
+# Any changes here will affect all OpenStack-Ansible role repositories
+# with immediate effect.
+
+# PURPOSE:
+# This script collects, renames and compresses the logs produced in
+# a role test if the host is in OpenStack-CI.
+
+## Vars ----------------------------------------------------------------------
+export WORKING_DIR=${WORKING_DIR:-$(pwd)}
+export RUN_ARA=${RUN_ARA:-false}
+export ARA_REPORT_TYPE=${ARA_REPORT_TYPE:-"database"}
+export TESTING_HOME=${TESTING_HOME:-$HOME}
+export TS=$(date +"%H-%M-%S")
+
+export RSYNC_CMD="rsync --archive --copy-links --ignore-errors --quiet --no-perms --no-owner --no-group --whole-file --inplace"
+
+# NOTE(cloudnull): This is a very simple list of common directories in /etc we
+#                  wish to search for when storing gate artifacts. When adding
+#                  things to this list please alphabetize the entries so it's
+#                  easy for folks to find and adjust items as needed.
+COMMON_ETC_LOG_NAMES="apt \
+    apache2 \
+    auditbeat \
+    auditd \
+    calico \
+    ceph \
+    elasticsearch \
+    etcd \
+    filebeat \
+    haproxy \
+    heartbeat-elastic \
+    httpd \
+    kibana \
+    memcached \
+    metricbeat \
+    mongodb \
+    my.cnf \
+    mariadb \
+    netplan \
+    network \
+    nginx \
+    openstack_deploy \
+    packetbeat \
+    pip.conf \
+    qpid-dispatch \
+    rabbitmq \
+    repo \
+    resolv.conf \
+    rsyslog \
+    sasl2 \
+    sysconfig/network-scripts \
+    sysconfig/network \
+    systemd \
+    uwsgi \
+    yum \
+    yum.repos.d \
+    zypp"
+
+COMMON_ETC_LOG_NAMES+=" $(awk -F'os_' '/name.*os_.*/ {print $2}' $(dirname $(readlink -f ${BASH_SOURCE[0]}))/../ansible-role-requirements.yml | tr '\n' ' ')"
+
+## Functions -----------------------------------------------------------------
+
+function repo_information {
+    [[ "${1}" != "host" ]] && lxc_cmd="lxc-attach --name ${1} --" || lxc_cmd=""
+    echo "Collecting list of installed packages and enabled repositories for \"${1}\""
+    # Redhat package debugging
+    if eval sudo ${lxc_cmd} which yum &>/dev/null || eval sudo ${lxc_cmd} which dnf &>/dev/null; then
+        # Prefer dnf over yum for CentOS.
+        eval sudo ${lxc_cmd} which dnf &>/dev/null && RHT_PKG_MGR='dnf' || RHT_PKG_MGR='yum'
+        eval sudo ${lxc_cmd} $RHT_PKG_MGR repolist -v > "${LOGGING_DIR}/redhat-rpm-repolist-${1}-${TS}.txt" || true
+        eval sudo ${lxc_cmd} $RHT_PKG_MGR list installed > "${LOGGING_DIR}/redhat-rpm-list-installed-${1}-${TS}.txt" || true
+
+    # SUSE package debugging
+    elif eval sudo ${lxc_cmd} which zypper &>/dev/null; then
+        eval sudo ${lxc_cmd} zypper lr -d > "${LOGGING_DIR}/suse-zypper-repolist-${1}-${TS}.txt" || true
+        eval sudo ${lxc_cmd} zypper --disable-repositories pa -i > "${LOGGING_DIR}/suse-zypper-list-installed-${1}-${TS}.txt" || true
+
+    # Ubuntu package debugging
+    elif eval sudo ${lxc_cmd} which apt-get &> /dev/null; then
+        eval sudo ${lxc_cmd} apt-cache policy | grep http | awk '{print $1" "$2" "$3}' | sort -u > "${LOGGING_DIR}/ubuntu-apt-repolist-${1}-${TS}.txt" || true
+        eval sudo ${lxc_cmd} apt list --installed > "${LOGGING_DIR}/ubuntu-apt-list-installed-${1}-${TS}.txt" || true
+
+    # Gentoo package debugging
+    elif eval sudo ${lxc_cmd} which emerge &> /dev/null; then
+        # list installed packages
+        eval sudo ${lxc_cmd} equery list "*" > "${LOGGING_DIR}/gentoo-portage-list-installed-${1}-${TS}.txt" || true
+        # list only packages called for install (not dependancies)
+        eval sudo ${lxc_cmd} cat /var/lib/portage/world > "${LOGGING_DIR}/gentoo-portage-list-manual-installed-${1}-${TS}.txt" || true
+    fi
+
+}
+
+function store_artifacts {
+    # Store known artifacts only if they exist. If the target directory does
+    # exist, it will be created.
+    # USAGE: store_artifacts /src/to/artifacts /path/to/store
+    if sudo test -e "${1}"; then
+        if [[ ! -d "${2}" ]]; then
+            mkdir -vp "${2}"
+        fi
+        echo "Running artifact sync for \"${1}\" to \"${2}\""
+        sudo ${RSYNC_CMD} ${1} ${2} || true
+    fi
+}
+
+function store_journal_artifacts {
+    # Store lines from a known unit's journal as a plain-text log file.
+    # USAGE: store_journal_artifacts UNIT_TO_MATCH /path/to/store
+    if [ $? == 0 ]; then
+        if [[ ! -d "${2}" ]]; then
+            mkdir -vp "${2}"
+        fi
+        if [[ ${3:-false} != false ]]; then
+            if [[ -f "${3}/system.journal" ]]; then
+                SYSTEMD_UNITS=$(sudo journalctl --file="${3}/system.journal" -F _SYSTEMD_UNIT | grep "${service}")
+                for service_unit in $(echo -e "${SYSTEMD_UNITS}"); do
+                    echo "Pulling journal for ${service_unit}"
+                    sudo journalctl --file="${3}/system.journal" \
+                                    --unit="${service_unit}" | sudo tee "${2}/${service_unit}.journal-${TS}.log" &>/dev/null
+                done
+            fi
+        else
+            SYSTEMD_UNITS=$(sudo journalctl --output=json-pretty -F _SYSTEMD_UNIT | grep "${service}")
+            for service_unit in $(echo -e "${SYSTEMD_UNITS}"); do
+                echo "Pulling journal for ${service_unit}"
+                sudo journalctl --unit="${service_unit}" | sudo tee "${2}/${service_unit}.journal-${TS}.log" &>/dev/null
+            done
+        fi
+    fi
+}
+
+function find_files {
+    find "${LOGGING_DIR}/" -type f \
+        ! -name "*.gz" \
+        ! -name '*.html' \
+        ! -name '*.subunit' \
+        ! -name "*.journal" \
+        ! -name 'ansible.sqlite' | egrep -v 'stackviz|ara-report'
+}
+
+function rename_files {
+    find_files |\
+        while read filename; do \
+            mv ${filename} ${filename}.txt || echo "WARNING: Could not rename ${filename}"; \
+        done
+}
+
+## Main ----------------------------------------------------------------------
+
+echo "#### BEGIN LOG COLLECTION ###"
+
+mkdir -vp "${LOGGING_DIR}"
+
+# Gather basic logs
+store_artifacts /openstack/log/ansible-logging/ "${LOGGING_DIR}/ansible"
+store_artifacts /openstack/log/ "${LOGGING_DIR}/openstack"
+store_artifacts /var/log/ "${LOGGING_DIR}/host"
+
+# Build the ARA static html report if required
+if [[ "$ARA_REPORT_TYPE" == "html" ]]; then
+    echo "Generating ARA static html report."
+    /opt/ansible-runtime/bin/ara generate html "${LOGGING_DIR}/ara-report"
+fi
+
+# Store the ara sqlite database in the openstack-ci expected path
+store_artifacts "${TESTING_HOME}/.ara/ansible.sqlite" "${LOGGING_DIR}/ara-report/"
+
+# Store netstat report
+store_artifacts /tmp/listening_port_report.txt "${LOGGING_DIR}/host"
+
+# Copy the repo os-releases *.txt files
+# container path
+store_artifacts /openstack/*repo*/repo/os-releases/*/*/*.txt "${WORKING_DIR}/repo"
+
+# metal path
+store_artifacts /var/www/repo/os-releases/*/*/*.txt "${WORKING_DIR}/repo"
+
+# Verify the integrity of the journal files but do not fail if one of them is not usable
+echo "Verifying journal files consistency..."
+find /var/log/journal/ -type f -name "*.journal" -exec bash -c 'sudo journalctl --file={} --verify || true' \;
+
+
+# Gather host etc artifacts
+PIDS=()
+for service in ${COMMON_ETC_LOG_NAMES}; do
+    echo "Running collection for service ${service}"
+    store_artifacts "/etc/${service}" "${LOGGING_DIR}/etc/host/" &
+    pid=$!
+    PIDS[${pid}]=${pid}
+    store_journal_artifacts "${service}" "${LOGGING_DIR}/host" &
+    pid=$!
+    PIDS[${pid}]=${pid}
+done
+echo "Waiting for host collection jobs to finish"
+for job_pid in ${!PIDS[@]}; do
+    wait ${PIDS[$job_pid]} || exit 99
+done
+
+
+# Gather container etc artifacts
+if which lxc-ls &> /dev/null; then
+    for CONTAINER_NAME in $(sudo lxc-ls -1); do
+        CONTAINER_PID=$(sudo lxc-info -p -n ${CONTAINER_NAME} | awk '{print $2}')
+        ETC_DIR="/proc/${CONTAINER_PID}/root/etc"
+        MACHINE_ID="$(sudo cat ${ETC_DIR}/machine-id)"
+        LOG_DIR="/proc/${CONTAINER_PID}/root/var/log"
+        JOURNAL_DIR="/proc/${CONTAINER_PID}/root/run/log/journal/${MACHINE_ID}"
+        repo_information ${CONTAINER_NAME}
+        PIDS=()
+        for service in ${COMMON_ETC_LOG_NAMES}; do
+            echo "Running in container collection for service ${service}"
+            store_artifacts ${ETC_DIR}/${service} "${LOGGING_DIR}/etc/openstack/${CONTAINER_NAME}/" &
+            pid=$!
+            PIDS[${pid}]=${pid}
+            store_artifacts ${LOG_DIR}/${service} "${LOGGING_DIR}/openstack/${CONTAINER_NAME}/" &
+            pid=$!
+            PIDS[${pid}]=${pid}
+            store_journal_artifacts ${service} "${LOGGING_DIR}/openstack/${CONTAINER_NAME}" "${JOURNAL_DIR}" &
+            pid=$!
+            PIDS[${pid}]=${pid}
+        done
+        echo "Waiting for container collection jobs for ${CONTAINER_NAME} to finish"
+        for job_pid in ${!PIDS[@]}; do
+            wait ${PIDS[$job_pid]} || exit 99
+        done
+    done
+fi
+
+
+# Rename all files gathered to have a .txt suffix so that the compressed
+# files are viewable via a web browser in OpenStack-CI.
+rename_files
+
+# If we could not find ARA, assume it was not installed
+# and skip all the related activities.
+if [ "${RUN_ARA}" = true ]; then
+    # Generate the ARA subunit report so that the
+    # results reflect in OpenStack-Health
+    mkdir -vp "${LOGGING_DIR}/ara-data"
+    echo "Generating ARA report subunit report."
+    /opt/ansible-runtime/bin/ara generate subunit "${LOGGING_DIR}/ara-data/testrepository.subunit" || true
+fi
+
+# Get a dmesg output so we can look for kernel failures
+dmesg > "${LOGGING_DIR}/dmesg-${TS}.txt" || true
+
+# Collect job environment
+env > "${LOGGING_DIR}/environment-${TS}.txt"  || true
+
+repo_information host
+
+# Record the active interface configs
+if which ethtool &> /dev/null; then
+    for interface in $(ip -o link | awk -F':' '{print $2}' | sed 's/@.*//g'); do
+        echo "ethtool -k ${interface}"
+        ethtool -k ${interface} > "${LOGGING_DIR}/ethtool-${interface}-${TS}-cfg.txt" || true
+    done
+else
+    echo "No ethtool available" | tee -a "${LOGGING_DIR}/ethtool-${TS}-${interface}-cfg.txt"
+fi
+
+# Ensure that the files are readable by all users, including the non-root
+# OpenStack-CI jenkins user.
+sudo chmod -R ugo+rX "${LOGGING_DIR}"
+sudo chown -R $(whoami) "${LOGGING_DIR}"
+
+echo "#### END LOG COLLECTION ###"
+