openstack-ansible/scripts/scripts-library.sh
Jonathan Rosser dacc9eb13f Convert host and container journal collection to python
This takes only one pass over all the journal files to generate
the output rather than iterating over the whole journal once per
service.

The journal_dump script is run from the ansible-runtime venv and
the uses the yaml and systemd-python packages from the venv rather
than system packages.

Change-Id: Ia7ccd5da55119273cf1fac26316ce37aa484d95a
2021-03-23 16:35:36 +00:00

426 lines
15 KiB
Bash
Executable File

#!/usr/bin/env bash
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Vars ----------------------------------------------------------------------
LINE='----------------------------------------------------------------------'
ANSIBLE_PARAMETERS=${ANSIBLE_PARAMETERS:-""}
STARTTIME="${STARTTIME:-$(date +%s)}"
COMMAND_LOGS=${COMMAND_LOGS:-"/openstack/log/ansible_cmd_logs"}
# The vars used to prepare the Ansible runtime venv
PIP_COMMAND="/opt/ansible-runtime/bin/pip"
ZUUL_PROJECT="${ZUUL_PROJECT:-}"
GATE_EXIT_LOG_COPY="${GATE_EXIT_LOG_COPY:-false}"
GATE_EXIT_LOG_GZIP="${GATE_EXIT_LOG_GZIP:-true}"
GATE_EXIT_RUN_ARA="${GATE_EXIT_RUN_ARA:-true}"
GATE_EXIT_RUN_DSTAT="${GATE_EXIT_RUN_DSTAT:-true}"
# If this is a gate node from OpenStack-Infra Store all logs into the
# execution directory after gate run.
if [[ -n "$ZUUL_PROJECT" ]]; then
GATE_EXIT_LOG_COPY=true
fi
# The default SSHD configuration has MaxSessions = 10. If a deployer changes
# their SSHD config, then the ANSIBLE_FORKS may be set to a higher number. We
# set the value to 10 or the number of CPU's, whichever is less. This is to
# balance between performance gains from the higher number, and CPU
# consumption. If ANSIBLE_FORKS is already set to a value, then we leave it
# alone.
# ref: https://bugs.launchpad.net/openstack-ansible/+bug/1479812
if [ -z "${ANSIBLE_FORKS:-}" ]; then
CPU_NUM=$(grep -c ^processor /proc/cpuinfo)
if [ ${CPU_NUM} -lt "10" ]; then
ANSIBLE_FORKS=${CPU_NUM}
else
ANSIBLE_FORKS=10
fi
fi
## Functions -----------------------------------------------------------------
# Build ansible-runtime venv
function build_ansible_runtime_venv {
# All distros have a python-virtualenv > 13.
# - Centos 8 has 15.1, which holds pip 9.0.1, setuptools 28.8, wheel 0.29
# See also: http://mirror.centos.org/centos/7/os/x86_64/Packages/
# - openSUSE 42.3 has 13.1.2, which holds pip 7.1.2, setuptools 18.2, wheel 0.24.
# See also: https://build.opensuse.org/package/show/openSUSE%3ALeap%3A42.3/python-virtualenv
# - Ubuntu Xenial has 15.0.1, holding pip 8.1.1, setuptools 20.3, wheel 0.29
# See also: https://packages.ubuntu.com/xenial/python-virtualenv
virtualenv --python=${PYTHON_EXEC_PATH} --never-download --clear /opt/ansible-runtime
# The vars used to prepare the Ansible runtime venv
PIP_OPTS+=" --constraint global-requirement-pins.txt"
PIP_OPTS+=" --constraint ${UPPER_CONSTRAINTS_FILE}"
# When executing the installation, we want to specify all our options on the CLI,
# making sure to completely ignore any config already on the host. This is to
# prevent the repo server's extra constraints being applied, which include
# a different version of Ansible to the one we want to install. As such, we
# use --isolated so that the config file is ignored.
# Upgrade pip setuptools and wheel to the appropriate version
${PIP_COMMAND} install --isolated ${PIP_OPTS} --upgrade pip setuptools wheel
# Install ansible and the other required packages
${PIP_COMMAND} install --isolated ${PIP_OPTS} -r requirements.txt ${ANSIBLE_PACKAGE}
# Install our osa_toolkit code from the current checkout
$PIP_COMMAND install -e .
# If we are in openstack-CI, install systemd-python for the log collection python script
if [[ -e /etc/ci/mirror_info.sh ]]; then
${PIP_COMMAND} install --isolated ${PIP_OPTS} systemd-python
fi
# Add SELinux support to the venv
if [ -d "/usr/lib64/python3.6/site-packages/selinux/" ]; then
rsync -avX /usr/lib64/python3.6/site-packages/selinux/ /opt/ansible-runtime/lib64/python3.6/site-packages/selinux/
rsync -avX /usr/lib64/python3.6/site-packages/_selinux.cpython-36m-x86_64-linux-gnu.so /opt/ansible-runtime/lib64/python3.6/site-packages/
fi
}
# If in OpenStack-Infra, set some vars to use the mirror when bootstrapping Ansible
function load_nodepool_pip_opts {
if [[ -e /etc/ci/mirror_info.sh ]]; then
source /etc/ci/mirror_info.sh
export PIP_OPTS="--index-url ${NODEPOOL_PYPI_MIRROR} --trusted-host ${NODEPOOL_MIRROR_HOST} --extra-index-url ${NODEPOOL_WHEEL_MIRROR}"
fi
}
# Determine the distribution we are running on, so that we can configure it
# appropriately.
function determine_distro {
source /etc/os-release 2>/dev/null
export DISTRO_ID="${ID}"
export DISTRO_NAME="${NAME}"
}
function ssh_key_create {
# Ensure that the ssh key exists and is an authorized_key
key_path="${HOME}/.ssh"
key_file="${key_path}/id_rsa"
# Ensure that the .ssh directory exists and has the right mode
if [ ! -d ${key_path} ]; then
mkdir -p ${key_path}
chmod 700 ${key_path}
fi
# Ensure a full keypair exists
if [ ! -f "${key_file}" -o ! -f "${key_file}.pub" ]; then
# Regenrate public key if private key exists
if [ -f "${key_file}" ]; then
ssh-keygen -f ${key_file} -y > ${key_file}.pub
fi
# Delete public key if private key missing
if [ ! -f "${key_file}" ]; then
rm -f ${key_file}.pub
fi
# Regenerate keypair if both keys missing
if [ ! -f "${key_file}" -a ! -f "${key_file}.pub" ]; then
ssh-keygen -t rsa -f ${key_file} -N ''
fi
fi
# Ensure that the public key is included in the authorized_keys
# for the default root directory and the current home directory
key_content=$(cat "${key_file}.pub")
if ! grep -q "${key_content}" ${key_path}/authorized_keys; then
echo "${key_content}" | tee -a ${key_path}/authorized_keys
fi
}
function exit_state {
set +x
TOTALSECONDS="$(( $(date +%s) - STARTTIME ))"
info_block "Run Time = ${TOTALSECONDS} seconds || $((TOTALSECONDS / 60)) minutes"
if [ "${1}" == 0 ];then
info_block "Status: Success"
else
info_block "Status: Failure"
fi
exit ${1}
}
function exit_success {
set +x
exit_state 0
}
function exit_fail {
set +x
log_instance_info
info_block "Error Info - $@"
exit_state 1
}
function gate_job_exit_tasks {
# This environment variable captures the exit code
# which was present when the trap was initiated.
# This would be the success/failure of the test.
TEST_EXIT_CODE=${TEST_EXIT_CODE:-$?}
# Disable logging of every command, as it is too verbose.
set +x
# If this is a gate node from OpenStack-Infra Store all logs into the
# execution directory after gate run.
if [ "$GATE_EXIT_LOG_COPY" == true ]; then
if [ "$GATE_EXIT_RUN_DSTAT" == true ]; then
generate_dstat_charts || true
fi
# Disable logging of every command, as it is too verbose.
# We have to do this here because log_instance_info does set -x
set +x
fi
# System status & Information
log_instance_info
}
function setup_ara {
# Install ARA and add it to the callback path provided by bootstrap-ansible.sh/openstack-ansible.rc
# This is added *here* instead of bootstrap-ansible so it's used for CI purposes only.
# PIP_COMMAND and PIP_OPTS are exported by the bootstrap-ansible script.
# PIP_OPTS contains the whole set of constraints that need to be applied.
${PIP_COMMAND} install --isolated ${PIP_OPTS} "ara[server]" "dynaconf<3.1.3"
}
function run_dstat {
if [ "$GATE_EXIT_RUN_DSTAT" == true ]; then
case ${DISTRO_ID} in
centos|rhel)
dnf -y install dstat
;;
ubuntu)
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get -y install dstat
;;
opensuse)
zypper -n install -l dstat
;;
esac
# https://stackoverflow.com/a/20338327 executing in ()& decouples the dstat
# process from scripts-library to prevent hung builds if dstat fails to exit
# for any reason.
(dstat -tcmsdn --top-cpu --top-mem --top-bio --nocolor --output /openstack/log/instance-info/dstat.csv \
< /dev/null > /openstack/log/instance-info/dstat.log 2>&1 &)
fi
}
function generate_dstat_charts {
kill $(pgrep -f dstat)
if [[ ! -d /opt/dstat_graph ]]; then
git clone https://github.com/Dabz/dstat_graph /opt/dstat_graph
fi
pushd /opt/dstat_graph
/usr/bin/env bash -e ./generate_page.sh /openstack/log/instance-info/dstat.csv >> /openstack/log/instance-info/dstat.html
popd
}
function print_info {
PROC_NAME="- [ $@ ] -"
printf "\n%s%s\n" "$PROC_NAME" "${LINE:${#PROC_NAME}}"
}
function info_block {
echo "${LINE}"
print_info "$@"
echo "${LINE}"
}
function log_instance_info {
# ensure packages are installed to get instance info
determine_distro
case ${DISTRO_ID} in
ubuntu|debian)
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get -y install iproute2 net-tools
;;
centos|rhel)
dnf -y install iproute
;;
esac
set +x
# Get host information post initial setup and reset verbosity
if [ ! -d "/openstack/log/instance-info" ];then
mkdir -p "/openstack/log/instance-info"
fi
get_instance_info
# Run log collection when needed
if [ "${1:-false}" = "true" ]; then
RUN_ARA="${GATE_EXIT_RUN_ARA}" WORKING_DIR="${GATE_LOG_DIR:-${HOME:-/opt}/osa-logs}" bash -e "$(dirname $(readlink -f ${BASH_SOURCE[0]}))/log-collect.sh"
fi
set -x
}
function get_repos_info {
for i in /etc/apt/sources.list /etc/apt/sources.list.d/* /etc/yum.conf /etc/yum.repos.d/* /etc/zypp/repos.d/*; do
if [ -f "${i}" ]; then
echo -e "\n$i"
cat $i
fi
done
}
# Get instance info
function get_instance_info {
TS="$(date +"%H-%M-%S")"
(cat /etc/resolv.conf && \
which systemd-resolve &> /dev/null && \
systemd-resolve --statistics && \
cat /etc/systemd/resolved.conf) > \
"/openstack/log/instance-info/host_dns_info_${TS}.log" || true
if [ "$(which tracepath)" ]; then
{ tracepath "8.8.8.8" -m 5 2>/dev/null || tracepath "8.8.8.8"; } > \
"/openstack/log/instance-info/host_tracepath_info_${TS}.log" || true
fi
if [ "$(which tracepath6)" ]; then
{ tracepath6 "2001:4860:4860::8888" -m 5 2>/dev/null || tracepath6 "2001:4860:4860::8888"; } >> \
"/openstack/log/instance-info/host_tracepath_info_${TS}.log" || true
fi
if [ "$(which lxc-ls)" ]; then
lxc-ls --fancy > \
"/openstack/log/instance-info/host_lxc_container_info_${TS}.log" || true
fi
if [ "$(which lxc-checkconfig)" ]; then
lxc-checkconfig > \
"/openstack/log/instance-info/host_lxc_config_info_${TS}.log" || true
fi
if [ "$(which machinectl)" ]; then
machinectl list > \
"/openstack/log/instance-info/host_nspawn_container_info_${TS}.log" || true
machinectl list-images > \
"/openstack/log/instance-info/host_nspawn_container_image_info_${TS}.log" || true
fi
if [ "$(which networkctl)" ]; then
networkctl list > \
"/openstack/log/instance-info/host_networkd_list_${TS}.log" || true
networkctl status >> \
"/openstack/log/instance-info/host_networkd_status_${TS}.log" || true
networkctl lldp >> \
"/openstack/log/instance-info/host_networkd_lldp_${TS}.log" || true
fi
if [ "$(which iptables)" ]; then
(iptables -vnL && iptables -t nat -vnL && iptables -t mangle -vnL) > \
"/openstack/log/instance-info/host_firewall_info_${TS}.log" || true
fi
if [ "$(which ansible)" ]; then
ANSIBLE_HOST_KEY_CHECKING=False \
ansible -i "localhost," localhost -m setup > \
"/openstack/log/instance-info/host_system_info_${TS}.log" || true
fi
get_repos_info > \
"/openstack/log/instance-info/host_repo_info_${TS}.log" || true
for i in nspawn-macvlan.service nspawn-networking.slice nspawn.slice; do
if [ "$(systemctl is-active --quiet ${i})" ]; then
systemctl status ${i} > "/openstack/log/instance-info/${i}_${TS}.log" || true
journalctl -u ${i} >> "/openstack/log/instance-info/${i}_${TS}.log" || true
fi
done
ip route get 1 > "/openstack/log/instance-info/routes_${TS}.log" || true
ip link show > "/openstack/log/instance-info/links_${TS}.log" || true
determine_distro
case ${DISTRO_ID} in
centos|rhel|fedora|opensuse)
rpm -qa | sort > \
"/openstack/log/instance-info/host_packages_info_${TS}.log" || true
;;
ubuntu|debian)
dpkg-query --list > \
"/openstack/log/instance-info/host_packages_info_${TS}.log" || true
;;
esac
# Storage reports
for dir_name in lxc machines; do
if [ "$(which btrfs)" ]; then
btrfs filesystem usage /var/lib/${dir_name} 2>/dev/null > \
"/openstack/log/instance-info/btrfs_${dir_name}_usage_${TS}.log" || true
btrfs filesystem show /var/lib/${dir_name} 2>/dev/null > \
"/openstack/log/instance-info/btrfs_${dir_name}_show_${TS}.log" || true
btrfs filesystem df /var/lib/${dir_name} 2>/dev/null > \
"/openstack/log/instance-info/btrfs_${dir_name}_df_${TS}.log" || true
btrfs qgroup show --human-readable -pcre --iec /var/lib/${dir_name} 2>/dev/null > \
"/openstack/log/instance-info/btrfs_${dir_name}_quotas_${TS}.log" || true
fi
done
if [ "$(which zfs)" ]; then
zfs list > "/openstack/log/instance-info/zfs_lxc_${TS}.log" || true
fi
df -h > "/openstack/log/instance-info/report_fs_df_${TS}.log" || true
lsmod > "/openstack/log/instance-info/lsmod_${TS}.log" || true
free -m > "/openstack/log/instance-info/free_${TS}.log" || true
cat /proc/cpuinfo > "/openstack/log/instance-info/cpuinfo_${TS}.log" || true
ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > "/openstack/log/instance-info/ps_${TS}.log" || true
# Check if system has netstat or iproute2
if command -v ss >/dev/null; then
ss -tulpn > "/openstack/log/instance-info/ss_${TS}.log" || true
fi
if command -v netstat >/dev/null; then
netstat -tulpn > "/openstack/log/instance-info/netstat_${TS}.log" || true
fi
}
## Signal traps --------------------------------------------------------------
# Trap all Death Signals and Errors
trap "exit_fail ${LINENO} $? 'Received STOP Signal'" SIGHUP SIGINT SIGTERM
trap "exit_fail ${LINENO} $?" ERR
## Pre-flight check ----------------------------------------------------------
# Make sure only root can run our script
if [ "$(id -u)" != "0" ]; then
info_block "This script must be run as root"
exit_state 1
fi
# Check that we are in the root path of the cloned repo
if [ ! -d "etc" -a ! -d "scripts" -a ! -d "playbooks" ]; then
info_block "** ERROR **"
echo "Please execute this script from the root directory of the cloned source code."
echo -e "Example: /opt/openstack-ansible/\n"
exit_state 1
fi
## Exports -------------------------------------------------------------------
# Export known paths
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH}"
# Export the home directory just in case it's not set
export HOME="/root"
if [[ -f "/usr/local/bin/openstack-ansible.rc" ]];then
source "/usr/local/bin/openstack-ansible.rc"
fi