Report Tool: Log Event Reporter
The Report tool helps with the debugging process by aggregating relevant log events and information from collect bundle, and presenting them in output files. The tool allows users to create plugin files which contain specifications for what log events/information to gather. Users also have the option of using information gathering algorithms directly in the command line without creating a plugin file. The tool features context sensitive help messages for every algorithm. Tests: PASS: Verify substring algorithm is working PASS: Verify alarm algorithm is working PASS: Verify system info algorithm is working PASS: Verify swact activity algorithm is working PASS: Verify puppet log error algorithm is working PASS: Verify process failure algorithm is working PASS: Verify the tool works on different collect bundles PASS: Verify context sensitive help is working PASS: Verify running algorithms in command line is working PASS: Verify plugin file verification is working PASS: Verify plugins do not take too long to run PASS: Verify report tool logging logs proper info and errors Story: 2010166 Task: 45841 Signed-off-by: Yang Lu <yang.lu@windriver.com> Change-Id: I6f2439e5268a10bf5c70712c8863a6893c1a16b9
This commit is contained in:
parent
c85d0c80da
commit
f7f3c59410
62
tools/collector/report/README
Normal file
62
tools/collector/report/README
Normal file
@ -0,0 +1,62 @@
|
||||
Refer to report.py file header for a description of the tool
|
||||
|
||||
Example:
|
||||
|
||||
Consider the following collect bundle structure
|
||||
|
||||
SELECT_NODES_20220527.193605
|
||||
├── controller-0_20220527.193605
|
||||
│ ├── etc
|
||||
│ ├── root
|
||||
│ └── var
|
||||
├── controller-1_20220527.193605
|
||||
│ ├── etc
|
||||
│ ├── root
|
||||
│ └── var
|
||||
├── plugins (where the plugin files will be placed)
|
||||
│ ├── alarm_plugin_example
|
||||
│ └── substring_plugin_example
|
||||
├── report
|
||||
└── tool (where the tool will be placed)
|
||||
└── output (where the output files will be placed)
|
||||
|
||||
|
||||
> cat plugins/alarm_plugin_example
|
||||
|
||||
algorithm=alarm
|
||||
alarm_ids=400.,401.
|
||||
entity_ids = host=controller-0
|
||||
|
||||
> cat plugins/substring_plugin_example
|
||||
|
||||
algorithm=substring
|
||||
files=var/log/mtcAgent.log
|
||||
hosts=controllers
|
||||
substring=operation failed
|
||||
|
||||
> report/tool/report.py --start 20220501 --end 20220530
|
||||
|
||||
Running the command above will populate the report folder with output files.
|
||||
The tool also provides default values, more details are in 'report.py -h'.
|
||||
|
||||
The substring algorithm creates an output file for every host of the
|
||||
specified host type. The files will contain log events within the
|
||||
provided date range containing the substring 'operation failed'.
|
||||
|
||||
The alarm algorithm creates two output file: 'log' and 'alarm'
|
||||
'log' contains customer log messages created within the provided date range,
|
||||
and 'alarm' contains system alarms created within the provided date range.
|
||||
|
||||
For more detailed information about an algorithm use 'report.py <algorithm> -h'.
|
||||
|
||||
Here is the report directory after running the above command
|
||||
|
||||
report
|
||||
├── output
|
||||
│ └── 20220815.140008 (time in utc when tool was ran)
|
||||
│ ├── alarm
|
||||
│ ├── controller-0_substring_plugin_example_substring
|
||||
│ ├── controller-1_substring_plugin_example_substring
|
||||
│ ├── report.log (log file for report tool)
|
||||
│ └── log
|
||||
└── tool (where the report tool is)
|
16
tools/collector/report/algorithms.py
Normal file
16
tools/collector/report/algorithms.py
Normal file
@ -0,0 +1,16 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
|
||||
# Algorithm string constants
|
||||
ALARM = "alarm"
|
||||
AUDIT = "audit"
|
||||
PROCESS_FAILURE = "process_failure"
|
||||
PUPPET = "puppet"
|
||||
SUBSTRING = "substring"
|
||||
SWACT = "swact"
|
||||
SYSTEM_INFO = "system_info"
|
545
tools/collector/report/execution_engine.py
Executable file
545
tools/collector/report/execution_engine.py
Executable file
@ -0,0 +1,545 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the ExecutionEngine class.
|
||||
# The ExecutionEngine class contains all the available algorithms.
|
||||
#
|
||||
# The ExecutionEngine class runs plugins and gathers relevant logs and
|
||||
# information, creating output files in the report directory.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import shutil
|
||||
import algorithms
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExecutionEngine:
|
||||
def __init__(self, opts):
|
||||
"""Constructor for the ExecutionEngine class
|
||||
|
||||
Parameters:
|
||||
opts (dictionary): Options from command line
|
||||
"""
|
||||
self.opts = opts
|
||||
self.hosts = {"controllers": {}, "workers": {}, "storages": {}}
|
||||
self.active_controller_directory = None
|
||||
|
||||
for folder in (f.path for f in os.scandir(self.opts.directory)):
|
||||
database_path = os.path.join(folder, "var", "extra", "database")
|
||||
host_info_path = os.path.join(folder, "var", "extra", "host.info")
|
||||
|
||||
if os.path.isdir(database_path) and os.listdir(database_path):
|
||||
self.active_controller_directory = folder
|
||||
|
||||
if os.path.exists(host_info_path):
|
||||
hostname, subfunction = self._extract_subfunction(host_info_path)
|
||||
if "controller" in subfunction:
|
||||
self.hosts["controllers"][hostname] = folder
|
||||
elif "worker" in subfunction:
|
||||
self.hosts["workers"][hostname] = folder
|
||||
elif "storage" in subfunction:
|
||||
self.hosts["storages"][hostname] = folder
|
||||
|
||||
if not self.active_controller_directory:
|
||||
raise ValueError("Active controller not found")
|
||||
|
||||
def execute(self, plugins, output_directory):
|
||||
"""Run a list of plugins
|
||||
|
||||
Parameters:
|
||||
plugins (Plugin list): List of plugins to run
|
||||
|
||||
Errors:
|
||||
FileNotFoundError
|
||||
"""
|
||||
|
||||
for plugin in plugins:
|
||||
logger.info(f"Processing plugin: {os.path.basename(plugin.file)}")
|
||||
hosts = {}
|
||||
if (
|
||||
plugin.state["hosts"] and len(plugin.state["hosts"]) >= 1
|
||||
): # if host list is given
|
||||
for h in plugin.state["hosts"]:
|
||||
if h == "all":
|
||||
hosts.update(self.hosts["workers"])
|
||||
hosts.update(self.hosts["storages"])
|
||||
hosts.update(self.hosts["controllers"])
|
||||
else:
|
||||
hosts.update(self.hosts[h])
|
||||
|
||||
for hostname, folderpath in hosts.items():
|
||||
|
||||
events = []
|
||||
if plugin.state["algorithm"] == algorithms.SUBSTRING:
|
||||
try:
|
||||
events = self.substring(
|
||||
plugin.state["substring"],
|
||||
[
|
||||
os.path.join(folderpath, file)
|
||||
for file in plugin.state["files"]
|
||||
],
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
logger.error(e)
|
||||
continue
|
||||
|
||||
# creating output file
|
||||
output_file = os.path.join(
|
||||
output_directory,
|
||||
f"{hostname}_{os.path.basename(plugin.file)}_{plugin.state['algorithm']}",
|
||||
)
|
||||
logger.info("output at " + output_file)
|
||||
with open(output_file, "w") as file:
|
||||
file.write(
|
||||
f"Date range: {self.opts.start} until {self.opts.end}\n"
|
||||
)
|
||||
file.write(
|
||||
f"substrings: {' '.join(plugin.state['substring'])}\n"
|
||||
)
|
||||
for line in events:
|
||||
file.write(line + "\n")
|
||||
else:
|
||||
if plugin.state["algorithm"] == algorithms.SYSTEM_INFO:
|
||||
info = self.system_info()
|
||||
system_info_output = os.path.join(output_directory, "system_info")
|
||||
with open(system_info_output, "w") as file:
|
||||
for i in info:
|
||||
file.write(i + "\n")
|
||||
|
||||
for k, v in self.hosts.items():
|
||||
file.write(f"{k}: {','.join(v.keys())}\n")
|
||||
logger.info("output at " + system_info_output)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.AUDIT:
|
||||
hosts = {}
|
||||
hosts.update(self.hosts["workers"])
|
||||
hosts.update(self.hosts["storages"])
|
||||
hosts.update(self.hosts["controllers"])
|
||||
|
||||
for hostname, folderpath in hosts.items():
|
||||
self._create_output_file(
|
||||
f"{hostname}_audit",
|
||||
output_directory,
|
||||
self.audit(
|
||||
plugin.state["start"],
|
||||
plugin.state["end"],
|
||||
os.path.join(
|
||||
folderpath, "var", "log", "dcmanager", "audit.log"
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.SWACT:
|
||||
self._create_output_file(
|
||||
"swact_activity", output_directory, self.swact()
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.PUPPET:
|
||||
self._create_output_file(
|
||||
"puppet_errors", output_directory, self.puppet()
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.PROCESS_FAILURE:
|
||||
self._create_output_file(
|
||||
"process_failures", output_directory, self.process_failure()
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.ALARM:
|
||||
alarms, logs = self.alarm(
|
||||
plugin.state["alarm_ids"], plugin.state["entity_ids"]
|
||||
)
|
||||
alarm_output = os.path.join(output_directory, "alarm")
|
||||
log_output = os.path.join(output_directory, "log")
|
||||
os.makedirs(os.path.dirname(log_output), exist_ok=True)
|
||||
|
||||
# creating output alarm file
|
||||
with open(alarm_output, "w") as file:
|
||||
for k, v in alarms.items():
|
||||
file.write(f"{k} {v['count']}\n")
|
||||
file.write("\n")
|
||||
for k, v in alarms.items():
|
||||
file.write(f"{k}\n")
|
||||
for date in v["dates"]:
|
||||
file.write(f" {date}\n")
|
||||
|
||||
# creating output log file
|
||||
with open(log_output, "w") as file:
|
||||
for k, v in logs.items():
|
||||
file.write(f"{k} {v['count']}\n")
|
||||
file.write("\n")
|
||||
for k, v in logs.items():
|
||||
file.write(f"{k}\n")
|
||||
for date in v["dates"]:
|
||||
file.write(f" {date}\n")
|
||||
logger.info("output at " + alarm_output)
|
||||
logger.info("output at " + log_output)
|
||||
|
||||
# Built-in algorithms ------------------------------
|
||||
def alarm(self, alarm_ids=[], entity_ids=[]):
|
||||
"""Alarm algorithm
|
||||
Gathers list of alarms and customer logs
|
||||
|
||||
Parameters:
|
||||
alarm_ids (string list) : List of alarm id patterns to search for
|
||||
entity_ids (string list): List of entity id patterns to search for
|
||||
"""
|
||||
alarm_data = {}
|
||||
log_data = {}
|
||||
with open(
|
||||
os.path.join(
|
||||
self.active_controller_directory,
|
||||
"var",
|
||||
"extra",
|
||||
"database",
|
||||
"fm.db.sql.txt",
|
||||
)
|
||||
) as file:
|
||||
start = False
|
||||
for line in file:
|
||||
# start of event log
|
||||
if "COPY event_log" in line:
|
||||
start = True
|
||||
elif start and line == "\\.\n":
|
||||
break
|
||||
elif start:
|
||||
entry = re.split(r"\t", line)
|
||||
|
||||
INDEX_ALARM_ID = 5
|
||||
INDEX_ACTION = 6
|
||||
INDEX_ENTITY_ID = 8
|
||||
INDEX_ALARM_DATE = 9
|
||||
INDEX_SEVERITY = 10
|
||||
|
||||
alarm_id = entry[INDEX_ALARM_ID]
|
||||
entity_id = entry[INDEX_ENTITY_ID]
|
||||
action = entry[INDEX_ACTION]
|
||||
severity = entry[INDEX_SEVERITY]
|
||||
alarm_date = entry[INDEX_ALARM_DATE]
|
||||
|
||||
entry_date = alarm_date.replace(
|
||||
" ", "T"
|
||||
) # making time format of alarm the same
|
||||
if self.opts.start <= entry_date and entry_date <= self.opts.end:
|
||||
# if the alarm is not in the user specified list of alarm or entity ids
|
||||
for id in alarm_ids:
|
||||
if id in alarm_id:
|
||||
break
|
||||
else:
|
||||
if len(alarm_ids) > 0:
|
||||
continue
|
||||
|
||||
for entity in entity_ids:
|
||||
if entity in entity_id:
|
||||
break
|
||||
else:
|
||||
if len(entity_ids) > 0:
|
||||
continue
|
||||
|
||||
try:
|
||||
if action == "log":
|
||||
log_info = log_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
log_info["count"] += 1
|
||||
log_info["dates"].append(alarm_date)
|
||||
else:
|
||||
alarm_info = alarm_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
alarm_info["count"] += 1
|
||||
alarm_info["dates"].append(f"{alarm_date} {action}")
|
||||
except KeyError:
|
||||
if entry[6] != "log":
|
||||
alarm_data[f"{alarm_id} {entity_id} {severity}"] = {
|
||||
"count": 1,
|
||||
"dates": [f"{alarm_date} {action}"],
|
||||
}
|
||||
else:
|
||||
log_data[f"{alarm_id} {entity_id} {severity}"] = {
|
||||
"count": 1,
|
||||
"dates": [alarm_date],
|
||||
}
|
||||
|
||||
for _, v in alarm_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
|
||||
for _, v in log_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
|
||||
return alarm_data, log_data
|
||||
|
||||
def substring(self, substr, files):
|
||||
"""Substring algorithm
|
||||
Looks for substrings within files
|
||||
|
||||
Parameters:
|
||||
substr (string list): List of substrings to look for
|
||||
files (string list): List of absolute filepaths to search in
|
||||
|
||||
Errors:
|
||||
FileNotFoundError
|
||||
"""
|
||||
CONTINUE_CURRENT = 0 # don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1 # analyze older files, continue with current file
|
||||
|
||||
data = []
|
||||
for file in files:
|
||||
if not os.path.exists(file):
|
||||
raise FileNotFoundError(f"File not found: {file}")
|
||||
cont = True
|
||||
# Searching through file
|
||||
command = f"""grep -Ea "{'|'.join(s for s in substr)}" {file}"""
|
||||
status = self._continue(file)
|
||||
|
||||
if (
|
||||
status == CONTINUE_CURRENT or status == CONTINUE_CURRENT_OLD
|
||||
): # continue with current file
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
self._evaluate_substring(data, command)
|
||||
|
||||
# Searching through rotated log files
|
||||
n = 1
|
||||
while os.path.exists(f"{file}.{n}.gz") and cont:
|
||||
command = f"""zgrep -E "{'|'.join(s for s in substr)}" {file}.{n}.gz"""
|
||||
status = self._continue(f"{file}.{n}.gz", compressed=True)
|
||||
|
||||
if status == CONTINUE_CURRENT or status == CONTINUE_CURRENT_OLD:
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
self._evaluate_substring(data, command)
|
||||
|
||||
n += 1
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def system_info(self):
|
||||
"""System info algorithm
|
||||
Presents basic information about the system
|
||||
"""
|
||||
data = []
|
||||
with open(
|
||||
os.path.join(
|
||||
self.active_controller_directory, "etc", "platform", "platform.conf"
|
||||
)
|
||||
) as file:
|
||||
for line in file:
|
||||
if "system_mode" in line:
|
||||
data.append(
|
||||
f"System Mode: {re.match('^system_mode=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "system_type" in line:
|
||||
data.append(
|
||||
f"System Type: {re.match('^system_type=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "distributed_cloud_role" in line:
|
||||
data.append(
|
||||
f"Distributed cloud role: {re.match('^distributed_cloud_role=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "sw_version" in line:
|
||||
data.append(
|
||||
f"SW Version: {re.match('^sw_version=(.*)', line).group(1)}"
|
||||
)
|
||||
with open(
|
||||
os.path.join(self.active_controller_directory, "etc", "build.info")
|
||||
) as file:
|
||||
for line in file:
|
||||
if "BUILD_TYPE" in line:
|
||||
data.append(
|
||||
f"Build Type: {re.match('^BUILD_TYPE=(.*)', line).group(1)}"
|
||||
)
|
||||
elif re.match("^OS=(.*)", line):
|
||||
data.append(f"OS: {re.match('^OS=(.*)', line).group(1)}")
|
||||
|
||||
return data
|
||||
|
||||
def swact(self):
|
||||
"""Swact activity algorithm
|
||||
Presents all swacting activity in the system
|
||||
"""
|
||||
data = []
|
||||
sm_files = []
|
||||
sm_customer_files = []
|
||||
swact_start = None
|
||||
swact_in_progress = False
|
||||
swact_end = None
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
sm_path = os.path.join(folder, "var", "log", "sm.log")
|
||||
sm_files.append(sm_path)
|
||||
|
||||
sm_substrings = ["Swact has started,", "Swact update"]
|
||||
data = self.substring(sm_substrings, sm_files)
|
||||
|
||||
for i, line in enumerate(data):
|
||||
if "Swact has started," in line and not swact_in_progress:
|
||||
swact_in_progress = True
|
||||
swact_start = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
elif "Swact update" in line and swact_in_progress:
|
||||
swact_in_progress = False
|
||||
swact_end = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
line += f" SWACT TOOK {swact_end - swact_start} \n"
|
||||
data[i] = line
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
sm_customer_path = os.path.join(folder, "var", "log", "sm-customer.log")
|
||||
sm_customer_files.append(sm_customer_path)
|
||||
|
||||
sm_customer_substrings = ["swact"]
|
||||
data += self.substring(sm_customer_substrings, sm_customer_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def puppet(self):
|
||||
"""Puppet error algorithm
|
||||
Presents log errors from puppet logs
|
||||
"""
|
||||
data = []
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
puppet_folder = os.path.join(folder, "var", "log", "puppet")
|
||||
command = f"grep -rh 'Error:' {puppet_folder}"
|
||||
self._evaluate_substring(data, command)
|
||||
return sorted(data)
|
||||
|
||||
def process_failure(self):
|
||||
"""Process failure algorithm
|
||||
Presents log errors from pmond
|
||||
"""
|
||||
data = []
|
||||
files = []
|
||||
for host_type in self.hosts.keys():
|
||||
for _, folder in self.hosts[host_type].items():
|
||||
pmond = os.path.join(folder, "var", "log", "pmond.log")
|
||||
files.append(pmond)
|
||||
data = self.substring(["Error :"], files)
|
||||
return data
|
||||
|
||||
def audit(self, start, end, audit_log_path):
|
||||
"""Counts audit events in dcmanager within a specified date range
|
||||
|
||||
Parameters:
|
||||
start (string) : start date in YYYY-MM-DD HH:MM:SS format
|
||||
end (string) : end date in YYYY-MM-DD HH:MM:SS format
|
||||
audit_log_path (string) : absolute path of augit log file
|
||||
"""
|
||||
if not shutil.which("lnav"):
|
||||
raise ValueError("Lnav program not found")
|
||||
|
||||
SECONDS_PER_HOUR = 3600
|
||||
fmt = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
d1 = datetime.strptime(start, fmt)
|
||||
d2 = datetime.strptime(end, fmt)
|
||||
seconds = (d2 - d1).total_seconds()
|
||||
|
||||
log_texts = [
|
||||
"Triggered subcloud audit%",
|
||||
"Trigger patch audit%",
|
||||
"Trigger load audit%",
|
||||
"Triggered firmware audit%",
|
||||
"Triggered kubernetes audit%",
|
||||
# Counts sum of audits from all subclouds
|
||||
]
|
||||
INDEX_MIDDLE_WORD = 1
|
||||
data = ["These rates and totals represent the sum of audits from all subclouds"]
|
||||
|
||||
def command(text):
|
||||
|
||||
return (
|
||||
f'lnav -R -n -c ";SELECT count(log_body) AS {text.split(" ")[INDEX_MIDDLE_WORD]}_total'
|
||||
f' from openstack_log WHERE (log_time > \\"{start}\\" AND not log_time > \\"{end}\\")'
|
||||
f' AND log_body like \\"{text}\\"" "{audit_log_path}"'
|
||||
)
|
||||
|
||||
for text in log_texts:
|
||||
p = subprocess.Popen(command(text), shell=True, stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.isnumeric():
|
||||
data.append(
|
||||
f"rate {round((int(line)/seconds * SECONDS_PER_HOUR), 3)} per hour. total: {line}"
|
||||
)
|
||||
else:
|
||||
data.append(line)
|
||||
return data
|
||||
|
||||
# -----------------------------------
|
||||
|
||||
def _continue(self, file, compressed=False):
|
||||
CONTINUE_CURRENT = 0 # don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1 # analyze older files, continue with current file
|
||||
CONTINUE_OLD = 2 # don't analyze current file, continue to older files
|
||||
|
||||
# check date of first log event and compare with provided start end dates
|
||||
first = ""
|
||||
|
||||
if not compressed:
|
||||
with open(file) as f:
|
||||
line = f.readline()
|
||||
first = line[0:19]
|
||||
else:
|
||||
with gzip.open(file, "rb") as f:
|
||||
line = f.readline().decode("utf-8")
|
||||
first = line[0:19]
|
||||
try:
|
||||
datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
first = line[0:19]
|
||||
except ValueError:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
|
||||
if first < self.opts.start:
|
||||
return CONTINUE_CURRENT
|
||||
elif first < self.opts.end and first > self.opts.start:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
elif first > self.opts.end:
|
||||
return CONTINUE_OLD
|
||||
|
||||
def _evaluate_substring(self, data, command):
|
||||
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8")
|
||||
dates = [line[0:19], line[2:21]] # different date locations for log events
|
||||
for date in dates:
|
||||
try:
|
||||
datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
|
||||
if date > self.opts.start and date < self.opts.end:
|
||||
if line[0] == "|": # sm-customer.log edge case
|
||||
line = line.replace("|", "").strip()
|
||||
line = re.sub("\s+", " ", line)
|
||||
data.append(line)
|
||||
break
|
||||
except ValueError:
|
||||
if date == dates[-1]:
|
||||
data.append(line)
|
||||
|
||||
def _extract_subfunction(self, host_info_path):
|
||||
GROUP_ONE = 1
|
||||
with open(host_info_path) as file:
|
||||
for line in file:
|
||||
hostname_match = re.match("^hostname => (.+)", line)
|
||||
subfunction_match = re.match("^subfunction => (.+)", line)
|
||||
if subfunction_match:
|
||||
subfunction = subfunction_match.group(GROUP_ONE)
|
||||
if hostname_match:
|
||||
hostname = hostname_match.group(GROUP_ONE)
|
||||
return hostname, subfunction
|
||||
|
||||
def _create_output_file(self, filename, directory, events):
|
||||
with open(os.path.join(directory, filename), "w") as file:
|
||||
for i in events:
|
||||
file.write(i + "\n")
|
||||
logger.info("output at " + os.path.join(directory, filename))
|
189
tools/collector/report/plugin.py
Executable file
189
tools/collector/report/plugin.py
Executable file
@ -0,0 +1,189 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the Plugin class.
|
||||
# The Plugin class contains all the labels and information of a plugin.
|
||||
#
|
||||
# Plugins contain labels to instruct the execution engine what to search
|
||||
# for and where to search.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import algorithms
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Plugin:
|
||||
def __init__(self, file="", opts=None):
|
||||
"""Constructor for the Plugin class
|
||||
|
||||
Parameters:
|
||||
file (string) : Absolute filepath of the plugin
|
||||
opts (dictionary): Options from command line when running algorithm
|
||||
"""
|
||||
self.file = file
|
||||
self.opts = opts
|
||||
self.state = {
|
||||
"algorithm": None,
|
||||
"files": [],
|
||||
"hosts": [],
|
||||
"substring": [],
|
||||
"alarm_ids": [],
|
||||
"entity_ids": [],
|
||||
"start": None,
|
||||
"end": None,
|
||||
}
|
||||
if file:
|
||||
try:
|
||||
self._file_set_attributes()
|
||||
except KeyError as e:
|
||||
raise e
|
||||
elif opts:
|
||||
self._opts_set_attributes()
|
||||
|
||||
try:
|
||||
self.verify()
|
||||
except ValueError as e:
|
||||
raise e
|
||||
|
||||
def _file_set_attributes(self):
|
||||
"""Sets plugin attributes from plugin files"""
|
||||
with open(self.file) as f:
|
||||
for line in f:
|
||||
try:
|
||||
self.extract(line)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def _opts_set_attributes(self):
|
||||
"""Sets plugin attributes from command line options"""
|
||||
for k, v in self.opts.items():
|
||||
self.state[k] = v
|
||||
|
||||
def extract(self, line):
|
||||
"""Extracts and sets attributes for this plugin
|
||||
|
||||
Parameters:
|
||||
line (string): Line from plugin file to extract
|
||||
"""
|
||||
|
||||
# split string from first '=', left side is label right side is value
|
||||
data = line.strip().split("=", 1)
|
||||
if len(data) <= 1:
|
||||
raise ValueError("Value not specified for label")
|
||||
label = data[0]
|
||||
value = data[1]
|
||||
label = label.replace(" ", "")
|
||||
try:
|
||||
if label == "algorithm":
|
||||
self.state["algorithm"] = value.replace(" ", "")
|
||||
elif label == "substring":
|
||||
self.state["substring"].append(data[1])
|
||||
elif label == "hosts":
|
||||
self.state["hosts"] = value.replace(" ", "").split(",")
|
||||
elif label == "alarm_ids":
|
||||
self.state["alarm_ids"] = value.replace(" ", "").split(",")
|
||||
elif label == "entity_ids":
|
||||
self.state["entity_ids"] = value.replace(" ", "").split(",")
|
||||
elif label == "files":
|
||||
self.state["files"] = value.replace(" ", "").split(",")
|
||||
elif label == "start":
|
||||
self.state["start"] = value
|
||||
elif label == "end":
|
||||
self.state["end"] = value
|
||||
else:
|
||||
logger.warning("unknown label: %s", label)
|
||||
|
||||
except KeyError:
|
||||
logger.warning("unknown label: %s", label)
|
||||
|
||||
def verify(self):
|
||||
"""Verify if this plugin's attributes are viable
|
||||
|
||||
Errors:
|
||||
ValueError if a value is incorrectly set
|
||||
"""
|
||||
|
||||
plugin_name = os.path.basename(self.file)
|
||||
|
||||
if self.state["algorithm"] == algorithms.SUBSTRING:
|
||||
if len(self.state["files"]) == 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} needs files specified for substring algorithm"
|
||||
)
|
||||
if len(self.state["hosts"]) == 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} needs hosts specified for substring algorithm"
|
||||
)
|
||||
if len(self.state["substring"]) == 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} need substring specified for substring algorithm"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.ALARM:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.SYSTEM_INFO:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.SWACT:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.PUPPET:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.PROCESS_FAILURE:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
elif self.state["algorithm"] == algorithms.AUDIT:
|
||||
if len(self.state["hosts"]) > 0:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} should not have hosts to be specified"
|
||||
)
|
||||
|
||||
try:
|
||||
datetime.strptime(self.state["start"], "%Y-%m-%d %H:%M:%S")
|
||||
except:
|
||||
raise ValueError(
|
||||
f"plugin : {plugin_name} needs a start time in YYYY-MM-DD HH:MM:SS format"
|
||||
)
|
||||
|
||||
try:
|
||||
datetime.strptime(self.state["end"], "%Y-%m-%d %H:%M:%S")
|
||||
except:
|
||||
raise ValueError(
|
||||
f"plugin : {plugin_name} needs an end time in YYYY-MM-DD HH:MM:SS format"
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} unknown algorithm {self.state['algorithm']}"
|
||||
)
|
||||
|
||||
for host in self.state["hosts"]:
|
||||
if host not in ["controllers", "workers", "storages", "all"]:
|
||||
raise ValueError(
|
||||
f"host not recognized: '{host}', accepted hosts are 'controllers', 'workers', 'storages', 'all'"
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{json.dumps(self.state)} File: {self.file}"
|
257
tools/collector/report/report.py
Executable file
257
tools/collector/report/report.py
Executable file
@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python3
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# Description: The Report tool is used to gather relevant log events
|
||||
# and information about the system from a collect bundle.
|
||||
#
|
||||
# The report tool allows user created plugins which decides relevance
|
||||
# for log events. Plugins contain an algorithm label which instructs the
|
||||
# tool what information to search and how to search for it.
|
||||
#
|
||||
# The report tool requires the collect bundle and host tarballs to be
|
||||
# untarred.
|
||||
#
|
||||
# The report tool reads user plugins from a plugins directory in the
|
||||
# top level of the collect bundle, and outputs files containing
|
||||
# relevant logs to a report directory in the top level as well.
|
||||
#
|
||||
# Typical Usage:
|
||||
# command line functionality
|
||||
# ------------------------------- ----------------------------------
|
||||
# > report.py - Run all plugins in directory
|
||||
# > report.py [plugin ...] - Run only specified plugins
|
||||
# > report.py <algorithm> [labels] - Run algorithm with labels
|
||||
# > report.py --help - help message
|
||||
# > report.py <algorithm> --help - algorithm specific help
|
||||
#
|
||||
# See --help output for a complete list of full and abbreviated
|
||||
# command line options and examples of plugins.
|
||||
#
|
||||
# Refer to README file for more usage and output examples
|
||||
#######################################################################
|
||||
|
||||
import argparse
|
||||
from cmath import log
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from execution_engine import ExecutionEngine
|
||||
from plugin import Plugin
|
||||
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
base_dir = os.path.realpath(__file__)
|
||||
default_path = os.path.join(os.path.dirname(base_dir), "..", "..")
|
||||
plugins = []
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Log Event Reporter",
|
||||
epilog="Place plugins in 'plugins' directory at top level of collect bundle. Output files will be placed in 'report' directory."
|
||||
"\nThis tool will create a report.log file along with other output files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--start",
|
||||
default="20000101",
|
||||
help="Specify a start date in YYYYMMDD format for analysis (default:20000101)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--end",
|
||||
default=datetime.strftime(now, "%Y%m%d"),
|
||||
help="Specify an end date in YYYYMMDD format for analysis (default: current date)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--plugin",
|
||||
default=None,
|
||||
nargs="*",
|
||||
help="Specify what plugins to run (default: runs every plugin in plugins folder)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
default=default_path,
|
||||
help="Specify top level of collect bundle to analyze (default: two levels above current location)",
|
||||
)
|
||||
subparsers = parser.add_subparsers(help="algorithms", dest="algorithm")
|
||||
|
||||
# substring algorithm arguments
|
||||
parser_substring = subparsers.add_parser(
|
||||
"substring",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="""Searches through specified files for lines containing specified substring.
|
||||
There will be an output file for each host of the host type specified.""",
|
||||
epilog="Plugin file example:\n"
|
||||
" algorithm=substring\n"
|
||||
" files=mtcAgent.log, sm.log\n"
|
||||
" hosts=controllers, workers\n"
|
||||
" substring=Swact in progress\n"
|
||||
" substring=Swact update",
|
||||
)
|
||||
substring_required = parser_substring.add_argument_group("required arguments")
|
||||
substring_required.add_argument(
|
||||
"--files",
|
||||
required=True,
|
||||
nargs="+",
|
||||
help="Files to perform substring analysis on (required)",
|
||||
)
|
||||
substring_required.add_argument(
|
||||
"--substring", nargs="+", required=True, help="Substrings to search for (required)"
|
||||
)
|
||||
substring_required.add_argument(
|
||||
"--hosts",
|
||||
choices=["controllers", "workers", "storages", "all"],
|
||||
required=True,
|
||||
nargs="+",
|
||||
help="Host types to perform analysis on (required)",
|
||||
)
|
||||
|
||||
|
||||
# alarm algorithm arguments
|
||||
parser_alarm = subparsers.add_parser(
|
||||
"alarm",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Searches through fm.db.sql.txt for alarms and logs. There are 2 output files: 'alarm', and 'log'",
|
||||
epilog="Plugin file example:\n"
|
||||
" algorithm=alarm\n"
|
||||
" alarm_ids=400.005,200.004\n"
|
||||
" entity_ids= host=controller-0,host=controller-1\n",
|
||||
)
|
||||
parser_alarm.add_argument(
|
||||
"--alarm_ids",
|
||||
nargs="+",
|
||||
required=False,
|
||||
default=[],
|
||||
help="Alarm id patterns to search for (not required)",
|
||||
)
|
||||
parser_alarm.add_argument(
|
||||
"--entity_ids",
|
||||
nargs="+",
|
||||
required=False,
|
||||
default=[],
|
||||
help="Entity id patterns to search for (not required)",
|
||||
)
|
||||
|
||||
# system info algorithm
|
||||
parser_system_info = subparsers.add_parser(
|
||||
"system_info",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Presents information about the system",
|
||||
epilog="Plugin file example:\n" " algorithm=system_info\n",
|
||||
)
|
||||
|
||||
# swact activity algorithm
|
||||
parser_swact = subparsers.add_parser(
|
||||
"swact",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Presents system swacting activity",
|
||||
epilog="Plugin file example:\n" " algorithm=swact\n",
|
||||
)
|
||||
|
||||
# puppet errors algorithm
|
||||
parser_puppet = subparsers.add_parser(
|
||||
"puppet",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Presents any puppet errors",
|
||||
epilog="Plugin file example:\n" " algorithm=puppet\n",
|
||||
)
|
||||
|
||||
# process failure algorithm
|
||||
parser_process_failure = subparsers.add_parser(
|
||||
"process_failure",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Presents any process failures from pmond.log",
|
||||
epilog="Plugin file example:\n" " algorithm=process_failure\n",
|
||||
)
|
||||
|
||||
# audit algorithm
|
||||
parser_audit = subparsers.add_parser(
|
||||
"audit",
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
help="Presents information about audit events in dcmanager.\n"
|
||||
"The rates and totals represents the sum of audits on all subclouds ",
|
||||
epilog="Plugin file example:\n"
|
||||
" algorithm=audit\n"
|
||||
" start=2022-06-01 10:00:00\n"
|
||||
" end=2022-06-01 04:00:00\n",
|
||||
)
|
||||
parser_audit_required = parser_audit.add_argument_group("required arguments")
|
||||
parser_audit_required.add_argument("--start", required=True)
|
||||
parser_audit_required.add_argument(
|
||||
"--end",
|
||||
required=True,
|
||||
)
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
args.start = datetime.strptime(args.start, "%Y%m%d").strftime("%Y-%m-%dT%H:%M:%S")
|
||||
args.end = datetime.strptime(args.end, "%Y%m%d").strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
output_directory = os.path.join(
|
||||
args.directory, "report", "output", now.strftime("%Y%m%d.%H%M%S")
|
||||
)
|
||||
|
||||
# creating report log
|
||||
os.makedirs(output_directory)
|
||||
open(os.path.join(output_directory, "report.log"), "w").close()
|
||||
|
||||
# setting up logger
|
||||
formatter = logging.Formatter("%(message)s")
|
||||
logger = logging.getLogger()
|
||||
|
||||
logging.basicConfig(
|
||||
filename=os.path.join(output_directory, "report.log"),
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s: %(message)s",
|
||||
datefmt="%Y-%m-%dT%H:%M:%S",
|
||||
)
|
||||
logging.Formatter.converter = time.gmtime
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(ch)
|
||||
|
||||
try:
|
||||
engine = ExecutionEngine(args)
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
|
||||
if args.algorithm:
|
||||
plugins.append(Plugin(opts=vars(args)))
|
||||
else:
|
||||
if args.plugin:
|
||||
for p in args.plugin:
|
||||
path = os.path.join(args.directory, "plugins", p)
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
plugins.append(Plugin(path))
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
|
||||
else:
|
||||
logger.warning(f"{p} plugin does not exist")
|
||||
else:
|
||||
path = os.path.join(args.directory, "plugins")
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
logger.error("Plugins folder is empty")
|
||||
else:
|
||||
for file in os.listdir(path):
|
||||
try:
|
||||
plugins.append(Plugin(os.path.join(path, file)))
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
|
||||
engine.execute(plugins, output_directory)
|
Loading…
x
Reference in New Issue
Block a user