Merge "Report Tool: Log Event Reporter"

2022-09-01 14:51:19 +00:00 · 2022-09-01 14:51:19 +00:00 · f1169f47f7
commit f1169f47f7
parent c09c525718 f7f3c59410
5 changed files with 1069 additions and 0 deletions
--- a/tools/collector/report/README
+++ b/tools/collector/report/README
@ -0,0 +1,62 @@
+Refer to report.py file header for a description of the tool
+
+Example:
+
+Consider the following collect bundle structure
+
+SELECT_NODES_20220527.193605
+├── controller-0_20220527.193605
+│   ├── etc
+│   ├── root
+│   └── var
+├── controller-1_20220527.193605
+│   ├── etc
+│   ├── root
+│   └── var
+├── plugins         (where the plugin files will be placed)
+│   ├── alarm_plugin_example
+│   └── substring_plugin_example
+├── report
+    └── tool      (where the tool will be placed)
+    └── output    (where the output files will be placed)
+
+
+> cat plugins/alarm_plugin_example
+
+algorithm=alarm
+alarm_ids=400.,401.
+entity_ids = host=controller-0
+
+> cat plugins/substring_plugin_example
+
+algorithm=substring
+files=var/log/mtcAgent.log
+hosts=controllers
+substring=operation failed
+
+> report/tool/report.py --start 20220501 --end 20220530
+
+Running the command above will populate the report folder with output files.
+The tool also provides default values, more details are in 'report.py -h'.
+
+The substring algorithm creates an output file for every host of the
+specified host type. The files will contain log events within the
+provided date range containing the substring 'operation failed'.
+
+The alarm algorithm creates two output file: 'log' and 'alarm'
+'log' contains customer log messages created within the provided date range,
+and 'alarm' contains system alarms created within the provided date range.
+
+For more detailed information about an algorithm use 'report.py <algorithm> -h'.
+
+Here is the report directory after running the above command
+
+report
+├── output
+│   └── 20220815.140008 (time in utc when tool was ran)
+│       ├── alarm
+│       ├── controller-0_substring_plugin_example_substring
+│       ├── controller-1_substring_plugin_example_substring
+│       ├── report.log (log file for report tool)
+│       └── log
+└── tool    (where the report tool is)
--- a/tools/collector/report/algorithms.py
+++ b/tools/collector/report/algorithms.py
@ -0,0 +1,16 @@
+########################################################################
+#
+# Copyright (c) 2022 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+########################################################################
+
+# Algorithm string constants
+ALARM = "alarm"
+AUDIT = "audit"
+PROCESS_FAILURE = "process_failure"
+PUPPET = "puppet"
+SUBSTRING = "substring"
+SWACT = "swact"
+SYSTEM_INFO = "system_info"
--- a/tools/collector/report/execution_engine.py
+++ b/tools/collector/report/execution_engine.py
@ -0,0 +1,545 @@
+########################################################################
+#
+# Copyright (c) 2022 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+########################################################################
+#
+# This file contains the ExecutionEngine class.
+# The ExecutionEngine class contains all the available algorithms.
+#
+# The ExecutionEngine class runs plugins and gathers relevant logs and
+# information, creating output files in the report directory.
+#
+########################################################################
+
+import shutil
+import algorithms
+import gzip
+import logging
+import os
+import re
+import subprocess
+
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+
+class ExecutionEngine:
+    def __init__(self, opts):
+        """Constructor for the ExecutionEngine class
+
+        Parameters:
+            opts (dictionary): Options from command line
+        """
+        self.opts = opts
+        self.hosts = {"controllers": {}, "workers": {}, "storages": {}}
+        self.active_controller_directory = None
+
+        for folder in (f.path for f in os.scandir(self.opts.directory)):
+            database_path = os.path.join(folder, "var", "extra", "database")
+            host_info_path = os.path.join(folder, "var", "extra", "host.info")
+
+            if os.path.isdir(database_path) and os.listdir(database_path):
+                self.active_controller_directory = folder
+
+            if os.path.exists(host_info_path):
+                hostname, subfunction = self._extract_subfunction(host_info_path)
+                if "controller" in subfunction:
+                    self.hosts["controllers"][hostname] = folder
+                elif "worker" in subfunction:
+                    self.hosts["workers"][hostname] = folder
+                elif "storage" in subfunction:
+                    self.hosts["storages"][hostname] = folder
+
+        if not self.active_controller_directory:
+            raise ValueError("Active controller not found")
+
+    def execute(self, plugins, output_directory):
+        """Run a list of plugins
+
+        Parameters:
+            plugins (Plugin list): List of plugins to run
+
+        Errors:
+            FileNotFoundError
+        """
+
+        for plugin in plugins:
+            logger.info(f"Processing plugin: {os.path.basename(plugin.file)}")
+            hosts = {}
+            if (
+                plugin.state["hosts"] and len(plugin.state["hosts"]) >= 1
+            ):  # if host list is given
+                for h in plugin.state["hosts"]:
+                    if h == "all":
+                        hosts.update(self.hosts["workers"])
+                        hosts.update(self.hosts["storages"])
+                        hosts.update(self.hosts["controllers"])
+                    else:
+                        hosts.update(self.hosts[h])
+
+                for hostname, folderpath in hosts.items():
+
+                    events = []
+                    if plugin.state["algorithm"] == algorithms.SUBSTRING:
+                        try:
+                            events = self.substring(
+                                plugin.state["substring"],
+                                [
+                                    os.path.join(folderpath, file)
+                                    for file in plugin.state["files"]
+                                ],
+                            )
+                        except FileNotFoundError as e:
+                            logger.error(e)
+                            continue
+
+                        # creating output file
+                        output_file = os.path.join(
+                            output_directory,
+                            f"{hostname}_{os.path.basename(plugin.file)}_{plugin.state['algorithm']}",
+                        )
+                        logger.info("output at " + output_file)
+                        with open(output_file, "w") as file:
+                            file.write(
+                                f"Date range: {self.opts.start} until {self.opts.end}\n"
+                            )
+                            file.write(
+                                f"substrings: {' '.join(plugin.state['substring'])}\n"
+                            )
+                            for line in events:
+                                file.write(line + "\n")
+            else:
+                if plugin.state["algorithm"] == algorithms.SYSTEM_INFO:
+                    info = self.system_info()
+                    system_info_output = os.path.join(output_directory, "system_info")
+                    with open(system_info_output, "w") as file:
+                        for i in info:
+                            file.write(i + "\n")
+
+                        for k, v in self.hosts.items():
+                            file.write(f"{k}: {','.join(v.keys())}\n")
+                    logger.info("output at " + system_info_output)
+
+                elif plugin.state["algorithm"] == algorithms.AUDIT:
+                    hosts = {}
+                    hosts.update(self.hosts["workers"])
+                    hosts.update(self.hosts["storages"])
+                    hosts.update(self.hosts["controllers"])
+
+                    for hostname, folderpath in hosts.items():
+                        self._create_output_file(
+                            f"{hostname}_audit",
+                            output_directory,
+                            self.audit(
+                                plugin.state["start"],
+                                plugin.state["end"],
+                                os.path.join(
+                                    folderpath, "var", "log", "dcmanager", "audit.log"
+                                ),
+                            ),
+                        )
+
+                elif plugin.state["algorithm"] == algorithms.SWACT:
+                    self._create_output_file(
+                        "swact_activity", output_directory, self.swact()
+                    )
+
+                elif plugin.state["algorithm"] == algorithms.PUPPET:
+                    self._create_output_file(
+                        "puppet_errors", output_directory, self.puppet()
+                    )
+
+                elif plugin.state["algorithm"] == algorithms.PROCESS_FAILURE:
+                    self._create_output_file(
+                        "process_failures", output_directory, self.process_failure()
+                    )
+
+                elif plugin.state["algorithm"] == algorithms.ALARM:
+                    alarms, logs = self.alarm(
+                        plugin.state["alarm_ids"], plugin.state["entity_ids"]
+                    )
+                    alarm_output = os.path.join(output_directory, "alarm")
+                    log_output = os.path.join(output_directory, "log")
+                    os.makedirs(os.path.dirname(log_output), exist_ok=True)
+
+                    # creating output alarm file
+                    with open(alarm_output, "w") as file:
+                        for k, v in alarms.items():
+                            file.write(f"{k} {v['count']}\n")
+                        file.write("\n")
+                        for k, v in alarms.items():
+                            file.write(f"{k}\n")
+                            for date in v["dates"]:
+                                file.write(f"   {date}\n")
+
+                    # creating output log file
+                    with open(log_output, "w") as file:
+                        for k, v in logs.items():
+                            file.write(f"{k} {v['count']}\n")
+                        file.write("\n")
+                        for k, v in logs.items():
+                            file.write(f"{k}\n")
+                            for date in v["dates"]:
+                                file.write(f"   {date}\n")
+                    logger.info("output at " + alarm_output)
+                    logger.info("output at " + log_output)
+
+    # Built-in algorithms ------------------------------
+    def alarm(self, alarm_ids=[], entity_ids=[]):
+        """Alarm algorithm
+        Gathers list of alarms and customer logs
+
+        Parameters:
+            alarm_ids (string list) : List of alarm id patterns to search for
+            entity_ids (string list): List of entity id patterns to search for
+        """
+        alarm_data = {}
+        log_data = {}
+        with open(
+            os.path.join(
+                self.active_controller_directory,
+                "var",
+                "extra",
+                "database",
+                "fm.db.sql.txt",
+            )
+        ) as file:
+            start = False
+            for line in file:
+                # start of event log
+                if "COPY event_log" in line:
+                    start = True
+                elif start and line == "\\.\n":
+                    break
+                elif start:
+                    entry = re.split(r"\t", line)
+
+                    INDEX_ALARM_ID = 5
+                    INDEX_ACTION = 6
+                    INDEX_ENTITY_ID = 8
+                    INDEX_ALARM_DATE = 9
+                    INDEX_SEVERITY = 10
+
+                    alarm_id = entry[INDEX_ALARM_ID]
+                    entity_id = entry[INDEX_ENTITY_ID]
+                    action = entry[INDEX_ACTION]
+                    severity = entry[INDEX_SEVERITY]
+                    alarm_date = entry[INDEX_ALARM_DATE]
+
+                    entry_date = alarm_date.replace(
+                        " ", "T"
+                    )  # making time format of alarm the same
+                    if self.opts.start <= entry_date and entry_date <= self.opts.end:
+                        # if the alarm is not in the user specified list of alarm or entity ids
+                        for id in alarm_ids:
+                            if id in alarm_id:
+                                break
+                        else:
+                            if len(alarm_ids) > 0:
+                                continue
+
+                        for entity in entity_ids:
+                            if entity in entity_id:
+                                break
+                        else:
+                            if len(entity_ids) > 0:
+                                continue
+
+                        try:
+                            if action == "log":
+                                log_info = log_data[
+                                    f"{alarm_id} {entity_id} {severity}"
+                                ]
+                                log_info["count"] += 1
+                                log_info["dates"].append(alarm_date)
+                            else:
+                                alarm_info = alarm_data[
+                                    f"{alarm_id} {entity_id} {severity}"
+                                ]
+                                alarm_info["count"] += 1
+                                alarm_info["dates"].append(f"{alarm_date} {action}")
+                        except KeyError:
+                            if entry[6] != "log":
+                                alarm_data[f"{alarm_id} {entity_id} {severity}"] = {
+                                    "count": 1,
+                                    "dates": [f"{alarm_date} {action}"],
+                                }
+                            else:
+                                log_data[f"{alarm_id} {entity_id} {severity}"] = {
+                                    "count": 1,
+                                    "dates": [alarm_date],
+                                }
+
+        for _, v in alarm_data.items():
+            v["dates"] = sorted(v["dates"])
+
+        for _, v in log_data.items():
+            v["dates"] = sorted(v["dates"])
+
+        return alarm_data, log_data
+
+    def substring(self, substr, files):
+        """Substring algorithm
+        Looks for substrings within files
+
+        Parameters:
+            substr (string list): List of substrings to look for
+            files  (string list): List of absolute filepaths to search in
+
+        Errors:
+            FileNotFoundError
+        """
+        CONTINUE_CURRENT = 0  # don't analyze older files, continue with current file
+        CONTINUE_CURRENT_OLD = 1  # analyze older files, continue with current file
+
+        data = []
+        for file in files:
+            if not os.path.exists(file):
+                raise FileNotFoundError(f"File not found: {file}")
+            cont = True
+            # Searching through file
+            command = f"""grep -Ea "{'|'.join(s for s in substr)}" {file}"""
+            status = self._continue(file)
+
+            if (
+                status == CONTINUE_CURRENT or status == CONTINUE_CURRENT_OLD
+            ):  # continue with current file
+                if status == CONTINUE_CURRENT:
+                    cont = False
+                self._evaluate_substring(data, command)
+
+            # Searching through rotated log files
+            n = 1
+            while os.path.exists(f"{file}.{n}.gz") and cont:
+                command = f"""zgrep -E "{'|'.join(s for s in substr)}" {file}.{n}.gz"""
+                status = self._continue(f"{file}.{n}.gz", compressed=True)
+
+                if status == CONTINUE_CURRENT or status == CONTINUE_CURRENT_OLD:
+                    if status == CONTINUE_CURRENT:
+                        cont = False
+                    self._evaluate_substring(data, command)
+
+                n += 1
+
+        return sorted(data)
+
+    def system_info(self):
+        """System info algorithm
+        Presents basic information about the system
+        """
+        data = []
+        with open(
+            os.path.join(
+                self.active_controller_directory, "etc", "platform", "platform.conf"
+            )
+        ) as file:
+            for line in file:
+                if "system_mode" in line:
+                    data.append(
+                        f"System Mode: {re.match('^system_mode=(.*)', line).group(1)}"
+                    )
+                elif "system_type" in line:
+                    data.append(
+                        f"System Type: {re.match('^system_type=(.*)', line).group(1)}"
+                    )
+                elif "distributed_cloud_role" in line:
+                    data.append(
+                        f"Distributed cloud role: {re.match('^distributed_cloud_role=(.*)', line).group(1)}"
+                    )
+                elif "sw_version" in line:
+                    data.append(
+                        f"SW Version: {re.match('^sw_version=(.*)', line).group(1)}"
+                    )
+        with open(
+            os.path.join(self.active_controller_directory, "etc", "build.info")
+        ) as file:
+            for line in file:
+                if "BUILD_TYPE" in line:
+                    data.append(
+                        f"Build Type: {re.match('^BUILD_TYPE=(.*)', line).group(1)}"
+                    )
+                elif re.match("^OS=(.*)", line):
+                    data.append(f"OS: {re.match('^OS=(.*)', line).group(1)}")
+
+        return data
+
+    def swact(self):
+        """Swact activity algorithm
+        Presents all swacting activity in the system
+        """
+        data = []
+        sm_files = []
+        sm_customer_files = []
+        swact_start = None
+        swact_in_progress = False
+        swact_end = None
+
+        for _, folder in self.hosts["controllers"].items():
+            sm_path = os.path.join(folder, "var", "log", "sm.log")
+            sm_files.append(sm_path)
+
+        sm_substrings = ["Swact has started,", "Swact update"]
+        data = self.substring(sm_substrings, sm_files)
+
+        for i, line in enumerate(data):
+            if "Swact has started," in line and not swact_in_progress:
+                swact_in_progress = True
+                swact_start = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
+            elif "Swact update" in line and swact_in_progress:
+                swact_in_progress = False
+                swact_end = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
+                line += f" SWACT TOOK {swact_end - swact_start} \n"
+                data[i] = line
+
+        for _, folder in self.hosts["controllers"].items():
+            sm_customer_path = os.path.join(folder, "var", "log", "sm-customer.log")
+            sm_customer_files.append(sm_customer_path)
+
+        sm_customer_substrings = ["swact"]
+        data += self.substring(sm_customer_substrings, sm_customer_files)
+
+        return sorted(data)
+
+    def puppet(self):
+        """Puppet error algorithm
+        Presents log errors from puppet logs
+        """
+        data = []
+        for _, folder in self.hosts["controllers"].items():
+            puppet_folder = os.path.join(folder, "var", "log", "puppet")
+            command = f"grep -rh 'Error:' {puppet_folder}"
+            self._evaluate_substring(data, command)
+        return sorted(data)
+
+    def process_failure(self):
+        """Process failure algorithm
+        Presents log errors from pmond
+        """
+        data = []
+        files = []
+        for host_type in self.hosts.keys():
+            for _, folder in self.hosts[host_type].items():
+                pmond = os.path.join(folder, "var", "log", "pmond.log")
+                files.append(pmond)
+        data = self.substring(["Error :"], files)
+        return data
+
+    def audit(self, start, end, audit_log_path):
+        """Counts audit events in dcmanager within a specified date range
+
+        Parameters:
+            start (string)          : start date in YYYY-MM-DD HH:MM:SS format
+            end (string)            : end date in YYYY-MM-DD HH:MM:SS format
+            audit_log_path (string) : absolute path of augit log file
+        """
+        if not shutil.which("lnav"):
+            raise ValueError("Lnav program not found")
+
+        SECONDS_PER_HOUR = 3600
+        fmt = "%Y-%m-%d %H:%M:%S"
+
+        d1 = datetime.strptime(start, fmt)
+        d2 = datetime.strptime(end, fmt)
+        seconds = (d2 - d1).total_seconds()
+
+        log_texts = [
+            "Triggered subcloud audit%",
+            "Trigger patch audit%",
+            "Trigger load audit%",
+            "Triggered firmware audit%",
+            "Triggered kubernetes audit%",
+            # Counts sum of audits from all subclouds
+        ]
+        INDEX_MIDDLE_WORD = 1
+        data = ["These rates and totals represent the sum of audits from all subclouds"]
+
+        def command(text):
+
+            return (
+                f'lnav -R -n -c ";SELECT count(log_body) AS {text.split(" ")[INDEX_MIDDLE_WORD]}_total'
+                f' from openstack_log WHERE (log_time > \\"{start}\\" AND not log_time > \\"{end}\\")'
+                f' AND log_body like \\"{text}\\"" "{audit_log_path}"'
+            )
+
+        for text in log_texts:
+            p = subprocess.Popen(command(text), shell=True, stdout=subprocess.PIPE)
+            for line in p.stdout:
+                line = line.decode("utf-8").strip()
+                if line.isnumeric():
+                    data.append(
+                        f"rate {round((int(line)/seconds * SECONDS_PER_HOUR), 3)} per hour.  total: {line}"
+                    )
+                else:
+                    data.append(line)
+        return data
+
+    # -----------------------------------
+
+    def _continue(self, file, compressed=False):
+        CONTINUE_CURRENT = 0  # don't analyze older files, continue with current file
+        CONTINUE_CURRENT_OLD = 1  # analyze older files, continue with current file
+        CONTINUE_OLD = 2  # don't analyze current file, continue to older files
+
+        # check date of first log event and compare with provided start end dates
+        first = ""
+
+        if not compressed:
+            with open(file) as f:
+                line = f.readline()
+                first = line[0:19]
+        else:
+            with gzip.open(file, "rb") as f:
+                line = f.readline().decode("utf-8")
+                first = line[0:19]
+        try:
+            datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
+            first = line[0:19]
+        except ValueError:
+            return CONTINUE_CURRENT_OLD
+
+        if first < self.opts.start:
+            return CONTINUE_CURRENT
+        elif first < self.opts.end and first > self.opts.start:
+            return CONTINUE_CURRENT_OLD
+        elif first > self.opts.end:
+            return CONTINUE_OLD
+
+    def _evaluate_substring(self, data, command):
+        p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
+        for line in p.stdout:
+            line = line.decode("utf-8")
+            dates = [line[0:19], line[2:21]]  # different date locations for log events
+            for date in dates:
+                try:
+                    datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
+                    if date > self.opts.start and date < self.opts.end:
+                        if line[0] == "|":  # sm-customer.log edge case
+                            line = line.replace("|", "").strip()
+                            line = re.sub("\s+", " ", line)
+                        data.append(line)
+                    break
+                except ValueError:
+                    if date == dates[-1]:
+                        data.append(line)
+
+    def _extract_subfunction(self, host_info_path):
+        GROUP_ONE = 1
+        with open(host_info_path) as file:
+            for line in file:
+                hostname_match = re.match("^hostname => (.+)", line)
+                subfunction_match = re.match("^subfunction => (.+)", line)
+                if subfunction_match:
+                    subfunction = subfunction_match.group(GROUP_ONE)
+                if hostname_match:
+                    hostname = hostname_match.group(GROUP_ONE)
+        return hostname, subfunction
+
+    def _create_output_file(self, filename, directory, events):
+        with open(os.path.join(directory, filename), "w") as file:
+            for i in events:
+                file.write(i + "\n")
+        logger.info("output at " + os.path.join(directory, filename))
--- a/tools/collector/report/plugin.py
+++ b/tools/collector/report/plugin.py
@ -0,0 +1,189 @@
+########################################################################
+#
+# Copyright (c) 2022 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+########################################################################
+#
+# This file contains the Plugin class.
+# The Plugin class contains all the labels and information of a plugin.
+#
+# Plugins contain labels to instruct the execution engine what to search
+# for and where to search.
+#
+########################################################################
+
+
+from datetime import datetime
+import algorithms
+import json
+import logging
+import os
+
+logger = logging.getLogger(__name__)
+
+
+class Plugin:
+    def __init__(self, file="", opts=None):
+        """Constructor for the Plugin class
+
+        Parameters:
+            file (string)    : Absolute filepath of the plugin
+            opts (dictionary): Options from command line when running algorithm
+        """
+        self.file = file
+        self.opts = opts
+        self.state = {
+            "algorithm": None,
+            "files": [],
+            "hosts": [],
+            "substring": [],
+            "alarm_ids": [],
+            "entity_ids": [],
+            "start": None,
+            "end": None,
+        }
+        if file:
+            try:
+                self._file_set_attributes()
+            except KeyError as e:
+                raise e
+        elif opts:
+            self._opts_set_attributes()
+
+        try:
+            self.verify()
+        except ValueError as e:
+            raise e
+
+    def _file_set_attributes(self):
+        """Sets plugin attributes from plugin files"""
+        with open(self.file) as f:
+            for line in f:
+                try:
+                    self.extract(line)
+                except Exception as e:
+                    raise e
+
+    def _opts_set_attributes(self):
+        """Sets plugin attributes from command line options"""
+        for k, v in self.opts.items():
+            self.state[k] = v
+
+    def extract(self, line):
+        """Extracts and sets attributes for this plugin
+
+        Parameters:
+            line (string): Line from plugin file to extract
+        """
+
+        # split string from first '=', left side is label right side is value
+        data = line.strip().split("=", 1)
+        if len(data) <= 1:
+            raise ValueError("Value not specified for label")
+        label = data[0]
+        value = data[1]
+        label = label.replace(" ", "")
+        try:
+            if label == "algorithm":
+                self.state["algorithm"] = value.replace(" ", "")
+            elif label == "substring":
+                self.state["substring"].append(data[1])
+            elif label == "hosts":
+                self.state["hosts"] = value.replace(" ", "").split(",")
+            elif label == "alarm_ids":
+                self.state["alarm_ids"] = value.replace(" ", "").split(",")
+            elif label == "entity_ids":
+                self.state["entity_ids"] = value.replace(" ", "").split(",")
+            elif label == "files":
+                self.state["files"] = value.replace(" ", "").split(",")
+            elif label == "start":
+                self.state["start"] = value
+            elif label == "end":
+                self.state["end"] = value
+            else:
+                logger.warning("unknown label: %s", label)
+
+        except KeyError:
+            logger.warning("unknown label: %s", label)
+
+    def verify(self):
+        """Verify if this plugin's attributes are viable
+
+        Errors:
+            ValueError if a value is incorrectly set
+        """
+
+        plugin_name = os.path.basename(self.file)
+
+        if self.state["algorithm"] == algorithms.SUBSTRING:
+            if len(self.state["files"]) == 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} needs files specified for substring algorithm"
+                )
+            if len(self.state["hosts"]) == 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} needs hosts specified for substring algorithm"
+                )
+            if len(self.state["substring"]) == 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} need substring specified for substring algorithm"
+                )
+        elif self.state["algorithm"] == algorithms.ALARM:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+        elif self.state["algorithm"] == algorithms.SYSTEM_INFO:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+        elif self.state["algorithm"] == algorithms.SWACT:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+        elif self.state["algorithm"] == algorithms.PUPPET:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+        elif self.state["algorithm"] == algorithms.PROCESS_FAILURE:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+        elif self.state["algorithm"] == algorithms.AUDIT:
+            if len(self.state["hosts"]) > 0:
+                raise ValueError(
+                    f"plugin: {plugin_name} should not have hosts to be specified"
+                )
+
+            try:
+                datetime.strptime(self.state["start"], "%Y-%m-%d %H:%M:%S")
+            except:
+                raise ValueError(
+                    f"plugin : {plugin_name} needs a start time in YYYY-MM-DD HH:MM:SS format"
+                )
+
+            try:
+                datetime.strptime(self.state["end"], "%Y-%m-%d %H:%M:%S")
+            except:
+                raise ValueError(
+                    f"plugin : {plugin_name} needs an end time in YYYY-MM-DD HH:MM:SS format"
+                )
+        else:
+            raise ValueError(
+                f"plugin: {plugin_name} unknown algorithm {self.state['algorithm']}"
+            )
+
+        for host in self.state["hosts"]:
+            if host not in ["controllers", "workers", "storages", "all"]:
+                raise ValueError(
+                    f"host not recognized: '{host}', accepted hosts are 'controllers', 'workers', 'storages', 'all'"
+                )
+
+    def __str__(self) -> str:
+        return f"{json.dumps(self.state)} File: {self.file}"
--- a/tools/collector/report/report.py
+++ b/tools/collector/report/report.py
@ -0,0 +1,257 @@
+#!/usr/bin/env python3
+########################################################################
+#
+# Copyright (c) 2022 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+########################################################################
+#
+# Description: The Report tool is used to gather relevant log events
+#              and information about the system from a collect bundle.
+#
+# The report tool allows user created plugins which decides relevance
+# for log events. Plugins contain an algorithm label which instructs the
+# tool what information to search and how to search for it.
+#
+# The report tool requires the collect bundle and host tarballs to be
+# untarred.
+#
+# The report tool reads user plugins from a plugins directory in the
+# top level of the collect bundle, and outputs files containing
+# relevant logs to a report directory in the top level as well.
+#
+# Typical Usage:
+#  command line                      functionality
+#  -------------------------------   ----------------------------------
+# > report.py                        - Run all plugins in directory
+# > report.py [plugin ...]           - Run only specified plugins
+# > report.py <algorithm> [labels]   - Run algorithm with labels
+# > report.py --help                 - help message
+# > report.py <algorithm> --help     - algorithm specific help
+#
+#    See --help output for a complete list of full and abbreviated
+#    command line options and examples of plugins.
+#
+# Refer to README file for more usage and output examples
+#######################################################################
+
+import argparse
+from cmath import log
+import logging
+import os
+import time
+
+from datetime import datetime
+from datetime import timezone
+from execution_engine import ExecutionEngine
+from plugin import Plugin
+
+
+now = datetime.now(timezone.utc)
+base_dir = os.path.realpath(__file__)
+default_path = os.path.join(os.path.dirname(base_dir), "..", "..")
+plugins = []
+
+parser = argparse.ArgumentParser(
+    description="Log Event Reporter",
+    epilog="Place plugins in 'plugins' directory at top level of collect bundle. Output files will be placed in 'report' directory."
+    "\nThis tool will create a report.log file along with other output files",
+)
+parser.add_argument(
+    "-s",
+    "--start",
+    default="20000101",
+    help="Specify a start date in YYYYMMDD format for analysis (default:20000101)",
+)
+parser.add_argument(
+    "-e",
+    "--end",
+    default=datetime.strftime(now, "%Y%m%d"),
+    help="Specify an end date in YYYYMMDD format for analysis (default: current date)",
+)
+parser.add_argument(
+    "-p",
+    "--plugin",
+    default=None,
+    nargs="*",
+    help="Specify what plugins to run (default: runs every plugin in plugins folder)",
+)
+parser.add_argument(
+    "-d",
+    "--directory",
+    default=default_path,
+    help="Specify top level of collect bundle to analyze (default: two levels above current location)",
+)
+subparsers = parser.add_subparsers(help="algorithms", dest="algorithm")
+
+# substring algorithm arguments
+parser_substring = subparsers.add_parser(
+    "substring",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="""Searches through specified files for lines containing specified substring.
+            There will be an output file for each host of the host type specified.""",
+    epilog="Plugin file example:\n"
+    "   algorithm=substring\n"
+    "   files=mtcAgent.log, sm.log\n"
+    "   hosts=controllers, workers\n"
+    "   substring=Swact in progress\n"
+    "   substring=Swact update",
+)
+substring_required = parser_substring.add_argument_group("required arguments")
+substring_required.add_argument(
+    "--files",
+    required=True,
+    nargs="+",
+    help="Files to perform substring analysis on (required)",
+)
+substring_required.add_argument(
+    "--substring", nargs="+", required=True, help="Substrings to search for (required)"
+)
+substring_required.add_argument(
+    "--hosts",
+    choices=["controllers", "workers", "storages", "all"],
+    required=True,
+    nargs="+",
+    help="Host types to perform analysis on (required)",
+)
+
+
+# alarm algorithm arguments
+parser_alarm = subparsers.add_parser(
+    "alarm",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Searches through fm.db.sql.txt for alarms and logs. There are 2 output files: 'alarm', and 'log'",
+    epilog="Plugin file example:\n"
+    "   algorithm=alarm\n"
+    "   alarm_ids=400.005,200.004\n"
+    "   entity_ids= host=controller-0,host=controller-1\n",
+)
+parser_alarm.add_argument(
+    "--alarm_ids",
+    nargs="+",
+    required=False,
+    default=[],
+    help="Alarm id patterns to search for (not required)",
+)
+parser_alarm.add_argument(
+    "--entity_ids",
+    nargs="+",
+    required=False,
+    default=[],
+    help="Entity id patterns to search for (not required)",
+)
+
+# system info algorithm
+parser_system_info = subparsers.add_parser(
+    "system_info",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Presents information about the system",
+    epilog="Plugin file example:\n" "   algorithm=system_info\n",
+)
+
+# swact activity algorithm
+parser_swact = subparsers.add_parser(
+    "swact",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Presents system swacting activity",
+    epilog="Plugin file example:\n" "   algorithm=swact\n",
+)
+
+# puppet errors algorithm
+parser_puppet = subparsers.add_parser(
+    "puppet",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Presents any puppet errors",
+    epilog="Plugin file example:\n" "   algorithm=puppet\n",
+)
+
+# process failure algorithm
+parser_process_failure = subparsers.add_parser(
+    "process_failure",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Presents any process failures from pmond.log",
+    epilog="Plugin file example:\n" "   algorithm=process_failure\n",
+)
+
+# audit algorithm
+parser_audit = subparsers.add_parser(
+    "audit",
+    formatter_class=argparse.RawTextHelpFormatter,
+    help="Presents information about audit events in dcmanager.\n"
+    "The rates and totals represents the sum of audits on all subclouds ",
+    epilog="Plugin file example:\n"
+    "   algorithm=audit\n"
+    "   start=2022-06-01 10:00:00\n"
+    "   end=2022-06-01 04:00:00\n",
+)
+parser_audit_required = parser_audit.add_argument_group("required arguments")
+parser_audit_required.add_argument("--start", required=True)
+parser_audit_required.add_argument(
+    "--end",
+    required=True,
+)
+
+
+args = parser.parse_args()
+args.start = datetime.strptime(args.start, "%Y%m%d").strftime("%Y-%m-%dT%H:%M:%S")
+args.end = datetime.strptime(args.end, "%Y%m%d").strftime("%Y-%m-%dT%H:%M:%S")
+
+output_directory = os.path.join(
+    args.directory, "report", "output", now.strftime("%Y%m%d.%H%M%S")
+)
+
+# creating report log
+os.makedirs(output_directory)
+open(os.path.join(output_directory, "report.log"), "w").close()
+
+# setting up logger
+formatter = logging.Formatter("%(message)s")
+logger = logging.getLogger()
+
+logging.basicConfig(
+    filename=os.path.join(output_directory, "report.log"),
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s: %(message)s",
+    datefmt="%Y-%m-%dT%H:%M:%S",
+)
+logging.Formatter.converter = time.gmtime
+
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+ch.setFormatter(formatter)
+
+logger.addHandler(ch)
+
+try:
+    engine = ExecutionEngine(args)
+except ValueError as e:
+    logger.error(str(e))
+
+if args.algorithm:
+    plugins.append(Plugin(opts=vars(args)))
+else:
+    if args.plugin:
+        for p in args.plugin:
+            path = os.path.join(args.directory, "plugins", p)
+            if os.path.exists(path):
+                try:
+                    plugins.append(Plugin(path))
+                except Exception as e:
+                    logger.error(str(e))
+
+            else:
+                logger.warning(f"{p} plugin does not exist")
+    else:
+        path = os.path.join(args.directory, "plugins")
+        if not os.path.exists(path):
+            os.mkdir(path)
+            logger.error("Plugins folder is empty")
+        else:
+            for file in os.listdir(path):
+                try:
+                    plugins.append(Plugin(os.path.join(path, file)))
+                except Exception as e:
+                    logger.error(str(e))
+
+engine.execute(plugins, output_directory)