Convert host and container journal collection to python
This takes only one pass over all the journal files to generate the output rather than iterating over the whole journal once per service. The journal_dump script is run from the ansible-runtime venv and the uses the yaml and systemd-python packages from the venv rather than system packages. Change-Id: Ia7ccd5da55119273cf1fac26316ce37aa484d95a
This commit is contained in:
parent
d6c8be2477
commit
dacc9eb13f
@ -70,6 +70,7 @@ case ${DISTRO_ID} in
|
|||||||
dnf -y install \
|
dnf -y install \
|
||||||
git curl autoconf gcc gcc-c++ nc \
|
git curl autoconf gcc gcc-c++ nc \
|
||||||
python3 python3-devel libselinux-python3 \
|
python3 python3-devel libselinux-python3 \
|
||||||
|
systemd-devel pkgconf \
|
||||||
openssl-devel libffi-devel \
|
openssl-devel libffi-devel \
|
||||||
python3-virtualenv rsync
|
python3-virtualenv rsync
|
||||||
;;
|
;;
|
||||||
@ -83,6 +84,7 @@ case ${DISTRO_ID} in
|
|||||||
git-core curl gcc netcat \
|
git-core curl gcc netcat \
|
||||||
python3 python3-dev \
|
python3 python3-dev \
|
||||||
libssl-dev libffi-dev \
|
libssl-dev libffi-dev \
|
||||||
|
libsystemd-dev pkg-config \
|
||||||
python3-apt virtualenv \
|
python3-apt virtualenv \
|
||||||
python3-minimal
|
python3-minimal
|
||||||
;;
|
;;
|
||||||
|
168
scripts/journal_dump.py
Executable file
168
scripts/journal_dump.py
Executable file
@ -0,0 +1,168 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import multiprocessing
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import yaml
|
||||||
|
from systemd import journal
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# load ansible-role-requirements.yml
|
||||||
|
def get_ansible_role_names():
|
||||||
|
with open(str(sys.argv[1]), 'r') as a_r_r_file:
|
||||||
|
try:
|
||||||
|
a_r_r = yaml.safe_load(a_r_r_file)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
print(exc)
|
||||||
|
|
||||||
|
role_service_names = []
|
||||||
|
role_prefix = "os_"
|
||||||
|
for role in a_r_r:
|
||||||
|
if role['name'].startswith(role_prefix):
|
||||||
|
role_service_names.append(role['name'][len(role_prefix):])
|
||||||
|
|
||||||
|
return role_service_names
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# get the list of containers and where their journals are
|
||||||
|
def get_container_journals():
|
||||||
|
journals = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
s = subprocess.run(['lxc-ls', '-1'], capture_output=True)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return journals
|
||||||
|
|
||||||
|
containers = s.stdout.decode('utf-8').splitlines()
|
||||||
|
|
||||||
|
for container_name in containers:
|
||||||
|
info = {}
|
||||||
|
info['name'] = container_name
|
||||||
|
info['subdir'] = "openstack"
|
||||||
|
s = subprocess.run(['lxc-info', '--pid', '--no-humanize', container_name], capture_output=True)
|
||||||
|
info['pid'] = s.stdout.decode('utf-8').strip()
|
||||||
|
|
||||||
|
if(len(info['pid']) == 0):
|
||||||
|
continue
|
||||||
|
|
||||||
|
info['etc_dir'] = "/proc/" + str(info['pid']) + "/root/etc"
|
||||||
|
|
||||||
|
with open(info['etc_dir'] + "/machine-id", 'r') as machine_id_file:
|
||||||
|
machine_id = machine_id_file.read().strip()
|
||||||
|
|
||||||
|
info['journal_dir'] = "/proc/" + str(info['pid']) + \
|
||||||
|
"/root/var/log/journal/" + machine_id
|
||||||
|
journals.append(info)
|
||||||
|
|
||||||
|
return journals
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
def demux_one_journal(j):
|
||||||
|
print("Gathering journals from " + j['name'])
|
||||||
|
|
||||||
|
# open the journal from a specific directory, or use the host journal
|
||||||
|
if 'journal_dir' in j:
|
||||||
|
print(" Using journal dir " + j['journal_dir'])
|
||||||
|
jreader = journal.Reader(path=j['journal_dir'])
|
||||||
|
else:
|
||||||
|
print(" Using host journal")
|
||||||
|
jreader = journal.Reader()
|
||||||
|
|
||||||
|
# the path to where we will save the journal for this host/container
|
||||||
|
j_dir = working_dir + '/logs'
|
||||||
|
if 'subdir' in j:
|
||||||
|
j_dir = j_dir + '/' + j['subdir']
|
||||||
|
d_dir = j_dir
|
||||||
|
j_dir = j_dir + '/' + j['name']
|
||||||
|
d_dir = d_dir + '/deprecations/' + j['name']
|
||||||
|
|
||||||
|
if not os.path.isdir(j_dir):
|
||||||
|
os.makedirs(j_dir)
|
||||||
|
|
||||||
|
output_files = {}
|
||||||
|
|
||||||
|
# for each journal entry, try to match it with the services we care about
|
||||||
|
# and split each service out into its own list of journal entries
|
||||||
|
for entry in jreader:
|
||||||
|
if 'MESSAGE' not in entry:
|
||||||
|
continue
|
||||||
|
if '_SYSTEMD_UNIT' not in entry:
|
||||||
|
continue
|
||||||
|
|
||||||
|
unit = entry['_SYSTEMD_UNIT']
|
||||||
|
if not next((s for s in service_names if s in unit), None):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# write each matched service journal entry out
|
||||||
|
s_name = '/' + unit + '.journal-' + timestamp + '.log'
|
||||||
|
j_filename = j_dir + s_name
|
||||||
|
message = str(entry['MESSAGE'])
|
||||||
|
if j_filename not in output_files:
|
||||||
|
output_files[j_filename] = open(j_filename, 'w')
|
||||||
|
output_files[j_filename].write(unit + ' ' + message + '\n')
|
||||||
|
|
||||||
|
if 'eprecat' not in message:
|
||||||
|
continue
|
||||||
|
|
||||||
|
d_filename = d_dir + s_name
|
||||||
|
if not os.path.isdir(d_dir):
|
||||||
|
os.makedirs(d_dir)
|
||||||
|
with open(d_filename, 'a') as d:
|
||||||
|
d.write(unit + ' ' + message + "\n")
|
||||||
|
|
||||||
|
print(''.join([' Written ' + k + '\n' for k in output_files.keys()]))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
def init_signal():
|
||||||
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# always collect the host journal, first in the list as it's probably
|
||||||
|
# the largest
|
||||||
|
host_journal = [{}]
|
||||||
|
host_journal[0]['name'] = 'host'
|
||||||
|
|
||||||
|
journals = []
|
||||||
|
journals = journals + host_journal
|
||||||
|
journals = journals + get_container_journals()
|
||||||
|
|
||||||
|
print(journals)
|
||||||
|
|
||||||
|
# common log names are passed as the trailing arguments
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
common_log_names = sys.argv[2::]
|
||||||
|
else:
|
||||||
|
common_log_names = []
|
||||||
|
|
||||||
|
service_names = common_log_names + get_ansible_role_names()
|
||||||
|
print("Service names to search for " + str(service_names))
|
||||||
|
|
||||||
|
if os.getenv('WORKING_DIR') is not None:
|
||||||
|
working_dir = os.getenv('WORKING_DIR')
|
||||||
|
else:
|
||||||
|
working_dir = os.getcwd()
|
||||||
|
|
||||||
|
if os.getenv('TS') is not None:
|
||||||
|
timestamp = os.getenv('TS')
|
||||||
|
else:
|
||||||
|
timestamp = datetime.datetime.now().strftime('%H-%M-%S')
|
||||||
|
|
||||||
|
p = multiprocessing.Pool(multiprocessing.cpu_count(), init_signal)
|
||||||
|
journal_success = p.map(demux_one_journal, journals)
|
||||||
|
p.close()
|
||||||
|
|
||||||
|
success = all(i for i in journal_success)
|
||||||
|
if success:
|
||||||
|
print("Journal collection Success!")
|
||||||
|
else:
|
||||||
|
print("Error during journal collection")
|
@ -118,32 +118,6 @@ function store_artifacts {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
function store_journal_artifacts {
|
|
||||||
# Store lines from a known unit's journal as a plain-text log file.
|
|
||||||
# USAGE: store_journal_artifacts UNIT_TO_MATCH /path/to/store
|
|
||||||
if [ $? == 0 ]; then
|
|
||||||
if [[ ! -d "${2}" ]]; then
|
|
||||||
mkdir -vp "${2}"
|
|
||||||
fi
|
|
||||||
if [[ ${3:-false} != false ]]; then
|
|
||||||
if [[ -f "${3}/system.journal" ]]; then
|
|
||||||
SYSTEMD_UNITS=$(sudo journalctl --file="${3}/system.journal" -F _SYSTEMD_UNIT | grep "${service}")
|
|
||||||
for service_unit in $(echo -e "${SYSTEMD_UNITS}"); do
|
|
||||||
echo "Pulling journal for ${service_unit}"
|
|
||||||
sudo journalctl --file="${3}/system.journal" \
|
|
||||||
--unit="${service_unit}" | sudo tee "${2}/${service_unit}.journal-${TS}.log" &>/dev/null
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
SYSTEMD_UNITS=$(sudo journalctl --output=json-pretty -F _SYSTEMD_UNIT | grep "${service}")
|
|
||||||
for service_unit in $(echo -e "${SYSTEMD_UNITS}"); do
|
|
||||||
echo "Pulling journal for ${service_unit}"
|
|
||||||
sudo journalctl --unit="${service_unit}" | sudo tee "${2}/${service_unit}.journal-${TS}.log" &>/dev/null
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function find_files {
|
function find_files {
|
||||||
find "${WORKING_DIR}/logs/" -type f \
|
find "${WORKING_DIR}/logs/" -type f \
|
||||||
! -name "*.gz" \
|
! -name "*.gz" \
|
||||||
@ -190,11 +164,6 @@ store_artifacts /openstack/*repo*/repo/os-releases/*/*/*.txt "${WORKING_DIR}/rep
|
|||||||
# metal path
|
# metal path
|
||||||
store_artifacts /var/www/repo/os-releases/*/*/*.txt "${WORKING_DIR}/repo"
|
store_artifacts /var/www/repo/os-releases/*/*/*.txt "${WORKING_DIR}/repo"
|
||||||
|
|
||||||
# Verify the integrity of the journal files but do not fail if one of them is not usable
|
|
||||||
echo "Verifying journal files consistency..."
|
|
||||||
find /var/log/journal/ -type f -name "*.journal" -exec bash -c 'sudo journalctl --file={} --verify || true' \;
|
|
||||||
|
|
||||||
|
|
||||||
# Gather host etc artifacts
|
# Gather host etc artifacts
|
||||||
PIDS=()
|
PIDS=()
|
||||||
for service in ${COMMON_ETC_LOG_NAMES}; do
|
for service in ${COMMON_ETC_LOG_NAMES}; do
|
||||||
@ -202,7 +171,6 @@ for service in ${COMMON_ETC_LOG_NAMES}; do
|
|||||||
store_artifacts "/etc/${service}" "${WORKING_DIR}/logs/etc/host/" &
|
store_artifacts "/etc/${service}" "${WORKING_DIR}/logs/etc/host/" &
|
||||||
pid=$!
|
pid=$!
|
||||||
PIDS[${pid}]=${pid}
|
PIDS[${pid}]=${pid}
|
||||||
store_journal_artifacts "${service}" "${WORKING_DIR}/logs/host" &
|
|
||||||
pid=$!
|
pid=$!
|
||||||
PIDS[${pid}]=${pid}
|
PIDS[${pid}]=${pid}
|
||||||
done
|
done
|
||||||
@ -217,9 +185,7 @@ if which lxc-ls &> /dev/null; then
|
|||||||
for CONTAINER_NAME in $(sudo lxc-ls -1); do
|
for CONTAINER_NAME in $(sudo lxc-ls -1); do
|
||||||
CONTAINER_PID=$(sudo lxc-info -p -n ${CONTAINER_NAME} | awk '{print $2}')
|
CONTAINER_PID=$(sudo lxc-info -p -n ${CONTAINER_NAME} | awk '{print $2}')
|
||||||
ETC_DIR="/proc/${CONTAINER_PID}/root/etc"
|
ETC_DIR="/proc/${CONTAINER_PID}/root/etc"
|
||||||
MACHINE_ID="$(sudo cat ${ETC_DIR}/machine-id)"
|
|
||||||
LOG_DIR="/proc/${CONTAINER_PID}/root/var/log"
|
LOG_DIR="/proc/${CONTAINER_PID}/root/var/log"
|
||||||
JOURNAL_DIR="/proc/${CONTAINER_PID}/root/run/log/journal/${MACHINE_ID}"
|
|
||||||
repo_information ${CONTAINER_NAME}
|
repo_information ${CONTAINER_NAME}
|
||||||
PIDS=()
|
PIDS=()
|
||||||
for service in ${COMMON_ETC_LOG_NAMES}; do
|
for service in ${COMMON_ETC_LOG_NAMES}; do
|
||||||
@ -230,7 +196,6 @@ if which lxc-ls &> /dev/null; then
|
|||||||
store_artifacts ${LOG_DIR}/${service} "${WORKING_DIR}/logs/openstack/${CONTAINER_NAME}/" &
|
store_artifacts ${LOG_DIR}/${service} "${WORKING_DIR}/logs/openstack/${CONTAINER_NAME}/" &
|
||||||
pid=$!
|
pid=$!
|
||||||
PIDS[${pid}]=${pid}
|
PIDS[${pid}]=${pid}
|
||||||
store_journal_artifacts ${service} "${WORKING_DIR}/logs/openstack/${CONTAINER_NAME}" "${JOURNAL_DIR}" &
|
|
||||||
pid=$!
|
pid=$!
|
||||||
PIDS[${pid}]=${pid}
|
PIDS[${pid}]=${pid}
|
||||||
done
|
done
|
||||||
@ -241,6 +206,9 @@ if which lxc-ls &> /dev/null; then
|
|||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# gather host and container journals and deprecation warnings
|
||||||
|
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
/opt/ansible-runtime/bin/python ${__dir}/journal_dump.py ${__dir}/../ansible-role-requirements.yml ${COMMON_ETC_LOG_NAMES}
|
||||||
|
|
||||||
# Rename all files gathered to have a .txt suffix so that the compressed
|
# Rename all files gathered to have a .txt suffix so that the compressed
|
||||||
# files are viewable via a web browser in OpenStack-CI.
|
# files are viewable via a web browser in OpenStack-CI.
|
||||||
|
@ -85,6 +85,11 @@ function build_ansible_runtime_venv {
|
|||||||
# Install our osa_toolkit code from the current checkout
|
# Install our osa_toolkit code from the current checkout
|
||||||
$PIP_COMMAND install -e .
|
$PIP_COMMAND install -e .
|
||||||
|
|
||||||
|
# If we are in openstack-CI, install systemd-python for the log collection python script
|
||||||
|
if [[ -e /etc/ci/mirror_info.sh ]]; then
|
||||||
|
${PIP_COMMAND} install --isolated ${PIP_OPTS} systemd-python
|
||||||
|
fi
|
||||||
|
|
||||||
# Add SELinux support to the venv
|
# Add SELinux support to the venv
|
||||||
if [ -d "/usr/lib64/python3.6/site-packages/selinux/" ]; then
|
if [ -d "/usr/lib64/python3.6/site-packages/selinux/" ]; then
|
||||||
rsync -avX /usr/lib64/python3.6/site-packages/selinux/ /opt/ansible-runtime/lib64/python3.6/site-packages/selinux/
|
rsync -avX /usr/lib64/python3.6/site-packages/selinux/ /opt/ansible-runtime/lib64/python3.6/site-packages/selinux/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user