Relocate hostdata-collectors to
stx-integ/tools/engtools/hostdata-collectors Move content from stx-utils into stx-integ or stx-update Packages will be relocated to stx-update: enable-dev-patch extras stx-integ: config-files/ io-scheduler filesystem/ filesystem-scripts grub/ grubby logging/ logmgmt tools/ collector monitor-tools tools/engtools/ hostdata-collectors parsers utilities/ build-info branding (formerly wrs-branding) platform-util Change-Id: I9796704d8ffc6590a971af9d41b626189e35ecc4 Story: 2002801 Task: 22687 Signed-off-by: Scott Little <scott.little@windriver.com>
This commit is contained in:
parent
ebc3e9efce
commit
02094afcfd
12
tools/engtools/hostdata-collectors/README
Normal file
12
tools/engtools/hostdata-collectors/README
Normal file
@ -0,0 +1,12 @@
|
||||
The Engineering tools is meant to be installed as a patch. Therefore, the RPM is generated as part
|
||||
of the build but is not included in the image. Assuming your development environment is fully set up,
|
||||
simply run patch-engtools.sh to generate the patch:
|
||||
|
||||
In this directory ($MY_REPO/addons/wr-cgcs/layers/cgcs/middleware/util/recipes-common/engtools/hostdata-collectors),
|
||||
enter the command:
|
||||
>./patch-engtools.sh
|
||||
|
||||
This generates ENGTOOLS-X.patch (X is Tis release version) which can be applied via sw-patch.
|
||||
|
||||
The patch is built with --all-nodes option by default. This can be changed to a combination of the following:
|
||||
--controller, --compute, --storage, --controller-compute, and --compute-lowlatency.
|
@ -0,0 +1,2 @@
|
||||
SRC_DIR="scripts"
|
||||
TIS_PATCH_VER=1
|
101
tools/engtools/hostdata-collectors/centos/collect-engtools.spec
Normal file
101
tools/engtools/hostdata-collectors/centos/collect-engtools.spec
Normal file
@ -0,0 +1,101 @@
|
||||
Summary: Host performance data collection tools package
|
||||
Name: engtools
|
||||
Version: 1.0
|
||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
||||
License: Apache-2.0
|
||||
Group: Tools
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: http://www.windriver.com/
|
||||
BuildArch: noarch
|
||||
Source: %{name}-%{version}.tar.gz
|
||||
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||
|
||||
Requires: iperf3
|
||||
|
||||
%description
|
||||
This package contains data collection tools to monitor host performance.
|
||||
Tools are general purpose engineering and debugging related. Includes
|
||||
overall memory, cpu occupancy, per-task cpu, per-task scheduling, per-task
|
||||
io.
|
||||
|
||||
# Don't try fancy stuff like debuginfo, which is useless on binary-only
|
||||
# packages. Don't strip binary too
|
||||
# Be sure buildpolicy set to do nothing
|
||||
%define __spec_install_post %{nil}
|
||||
%define debug_package %{nil}
|
||||
%define __os_install_post %{_dbpath}/brp-compress
|
||||
%define _binaries_in_noarch_packages_terminate_build 0
|
||||
|
||||
%define local_dir /usr/local
|
||||
%define local_bindir %{local_dir}/bin/
|
||||
%define local_initdir /etc/init.d/
|
||||
%define local_confdir /etc/engtools/
|
||||
%define local_systemddir /etc/systemd/system/
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
|
||||
%build
|
||||
# Empty section.
|
||||
|
||||
%install
|
||||
mkdir -p %{buildroot}
|
||||
install -d 755 %{buildroot}%{local_bindir}
|
||||
# Installing additional tools, memtop, occtop and schedtop are already in the image
|
||||
install -m 755 buddyinfo.py %{buildroot}%{local_bindir}
|
||||
install -m 755 chewmem %{buildroot}%{local_bindir}
|
||||
# Installing data collection scripts
|
||||
install -m 755 ceph.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 cleanup-engtools.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 collect-engtools.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 diskstats.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 engtools_util.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 filestats.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 iostat.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 linux_benchmark.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 memstats.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 netstats.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 postgres.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 rabbitmq.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 remote/rbzip2-engtools.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 remote/rstart-engtools.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 remote/rstop-engtools.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 remote/rsync-engtools-data.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 slab.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 ticker.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 top.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 vswitch.sh %{buildroot}%{local_bindir}
|
||||
install -m 755 live_stream.py %{buildroot}%{local_bindir}
|
||||
# Installing conf file
|
||||
install -d 755 %{buildroot}%{local_confdir}
|
||||
install -m 644 -p -D cfg/engtools.conf %{buildroot}%{local_confdir}
|
||||
# Installing init script
|
||||
install -d 755 %{buildroot}%{local_initdir}
|
||||
install -m 755 init.d/collect-engtools.sh %{buildroot}%{local_initdir}
|
||||
# Installing service file
|
||||
install -d 755 %{buildroot}%{local_systemddir}
|
||||
install -m 644 -p -D collect-engtools.service %{buildroot}%{local_systemddir}
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%defattr(-,root,root,-)
|
||||
%{local_bindir}/*
|
||||
%{local_confdir}/*
|
||||
%{local_initdir}/*
|
||||
%{local_systemddir}/*
|
||||
|
||||
%post
|
||||
/bin/systemctl enable collect-engtools.service > /dev/null 2>&1
|
||||
/bin/systemctl start collect-engtools.service > /dev/null 2>&1
|
||||
|
||||
%preun
|
||||
#/bin/systemctl --no-reload disable collect-engtools.sh.service > /dev/null 2>&1
|
||||
#/bin/systemctl stop collect-engtools.sh.service > /dev/null 2>&1
|
||||
%systemd_preun collect-engtools.service
|
||||
|
||||
%postun
|
||||
%systemd_postun_with_restart collect-engtools.service
|
33
tools/engtools/hostdata-collectors/patch-engtools.sh
Executable file
33
tools/engtools/hostdata-collectors/patch-engtools.sh
Executable file
@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
# Designer patches:
|
||||
# http://twiki.wrs.com/PBUeng/Patching
|
||||
|
||||
if [ -z $MY_WORKSPACE ] || [ -z $MY_REPO ]; then
|
||||
echo "Some dev environment variables are not set."
|
||||
echo "Refer to http://wiki.wrs.com/PBUeng/CentOSBuildProcess for instructions."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ENGTOOLS=$(ls ${MY_WORKSPACE}/std/rpmbuild/RPMS/engtools*noarch.rpm 2>/dev/null)
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Engtools RPM has not been built. Please run \"build-pkgs engtools\" first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source ${MY_REPO}/addons/wr-cgcs/layers/cgcs/middleware/recipes-common/build-info/release-info.inc
|
||||
#TiS_REL="16.10"
|
||||
#PATCH_ID="ENGTOOLS-${TiS_REL}"
|
||||
PATCH_ID="ENGTOOLS-${PLATFORM_RELEASE}"
|
||||
|
||||
PWD=$(pwd)
|
||||
|
||||
# Create CGCS Patch
|
||||
cd ${MY_WORKSPACE}
|
||||
PATCH_BUILD=${MY_REPO}/addons/wr-cgcs/layers/cgcs/extras.ND/scripts/patch_build.sh
|
||||
${PATCH_BUILD} --id ${PATCH_ID} --reboot-required=N \
|
||||
--summary "System engineering data collection and analysis tools." \
|
||||
--desc "System engineering data collection and analysis tools." \
|
||||
--all-nodes ${ENGTOOLS} \
|
||||
--warn "Intended for system engineering use only."
|
||||
cd ${PWD}
|
||||
exit 0
|
202
tools/engtools/hostdata-collectors/scripts/LICENSE
Normal file
202
tools/engtools/hostdata-collectors/scripts/LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
121
tools/engtools/hostdata-collectors/scripts/buddyinfo.py
Normal file
121
tools/engtools/hostdata-collectors/scripts/buddyinfo.py
Normal file
@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python
|
||||
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 textwidth=79 autoindent
|
||||
|
||||
"""
|
||||
Python source code
|
||||
Last modified: 15 Feb 2014 - 13:38
|
||||
Last author: lmwangi at gmail com
|
||||
Displays the available memory fragments
|
||||
by querying /proc/buddyinfo
|
||||
Example:
|
||||
# python buddyinfo.py
|
||||
"""
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self, log_level):
|
||||
self.log_level = log_level
|
||||
|
||||
def get_formatter(self):
|
||||
return logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
def get_handler(self):
|
||||
return logging.StreamHandler()
|
||||
|
||||
def get_logger(self):
|
||||
"""Returns a Logger instance for the specified module_name"""
|
||||
logger = logging.getLogger('main')
|
||||
logger.setLevel(self.log_level)
|
||||
log_handler = self.get_handler()
|
||||
log_handler.setFormatter(self.get_formatter())
|
||||
logger.addHandler(log_handler)
|
||||
return logger
|
||||
|
||||
|
||||
class BuddyInfo(object):
|
||||
"""BuddyInfo DAO"""
|
||||
def __init__(self, logger):
|
||||
super(BuddyInfo, self).__init__()
|
||||
self.log = logger
|
||||
self.buddyinfo = self.load_buddyinfo()
|
||||
|
||||
def parse_line(self, line):
|
||||
line = line.strip()
|
||||
self.log.debug("Parsing line: %s" % line)
|
||||
parsed_line = re.match("Node\s+(?P<numa_node>\d+).*zone\s+(?P<zone>\w+)\s+(?P<nr_free>.*)", line).groupdict()
|
||||
self.log.debug("Parsed line: %s" % parsed_line)
|
||||
return parsed_line
|
||||
|
||||
def read_buddyinfo(self):
|
||||
buddyhash = defaultdict(list)
|
||||
buddyinfo = open("/proc/buddyinfo").readlines()
|
||||
for line in map(self.parse_line, buddyinfo):
|
||||
numa_node = int(line["numa_node"])
|
||||
zone = line["zone"]
|
||||
free_fragments = map(int, line["nr_free"].split())
|
||||
max_order = len(free_fragments)
|
||||
fragment_sizes = self.get_order_sizes(max_order)
|
||||
usage_in_bytes = [block[0] * block[1] for block in zip(free_fragments, fragment_sizes)]
|
||||
buddyhash[numa_node].append({
|
||||
"zone": zone,
|
||||
"nr_free": free_fragments,
|
||||
"sz_fragment": fragment_sizes,
|
||||
"usage": usage_in_bytes })
|
||||
return buddyhash
|
||||
|
||||
def load_buddyinfo(self):
|
||||
buddyhash = self.read_buddyinfo()
|
||||
self.log.info(buddyhash)
|
||||
return buddyhash
|
||||
|
||||
def page_size(self):
|
||||
return os.sysconf("SC_PAGE_SIZE")
|
||||
|
||||
def get_order_sizes(self, max_order):
|
||||
return [self.page_size() * 2**order for order in range(0, max_order)]
|
||||
|
||||
def __str__(self):
|
||||
ret_string = ""
|
||||
width = 20
|
||||
for node in self.buddyinfo:
|
||||
ret_string += "Node: %s\n" % node
|
||||
for zoneinfo in self.buddyinfo.get(node):
|
||||
ret_string += " Zone: %s\n" % zoneinfo.get("zone")
|
||||
ret_string += " Free KiB in zone: %.2f\n" % (sum(zoneinfo.get("usage")) / (1024.0))
|
||||
ret_string += '\t{0:{align}{width}} {1:{align}{width}} {2:{align}{width}}\n'.format(
|
||||
"Fragment size", "Free fragments", "Total available KiB",
|
||||
width=width,
|
||||
align="<")
|
||||
for idx in range(len(zoneinfo.get("sz_fragment"))):
|
||||
ret_string += '\t{order:{align}{width}} {nr:{align}{width}} {usage:{align}{width}}\n'.format(
|
||||
width=width,
|
||||
align="<",
|
||||
order = zoneinfo.get("sz_fragment")[idx],
|
||||
nr = zoneinfo.get("nr_free")[idx],
|
||||
usage = zoneinfo.get("usage")[idx] / 1024.0)
|
||||
|
||||
return ret_string
|
||||
|
||||
def main():
|
||||
"""Main function. Called when this file is a shell script"""
|
||||
usage = "usage: %prog [options]"
|
||||
parser = optparse.OptionParser(usage)
|
||||
parser.add_option("-s", "--size", dest="size", choices=["B","K","M"],
|
||||
action="store", type="choice", help="Return results in bytes, kib, mib")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
logger = Logger(logging.DEBUG).get_logger()
|
||||
logger.info("Starting....")
|
||||
logger.info("Parsed options: %s" % options)
|
||||
print logger
|
||||
buddy = BuddyInfo(logger)
|
||||
print buddy
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
60
tools/engtools/hostdata-collectors/scripts/ceph.sh
Normal file
60
tools/engtools/hostdata-collectors/scripts/ceph.sh
Normal file
@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
# Usage: ceph.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print key ceph statistics
|
||||
function print_ceph()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
cmd='ceph -s'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
cmd='ceph osd tree'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
cmd='ceph df detail'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_ceph
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_ceph
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
77
tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf
Normal file
77
tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf
Normal file
@ -0,0 +1,77 @@
|
||||
# engtools configuration
|
||||
|
||||
# You may comment out any unwanted fields under the Intervals section, but do not comment out any other configuration options as the python parsing utility will complain. Please follow the comments
|
||||
|
||||
[LabConfiguration]
|
||||
# Set this option to Y/N depending on the setup of your lab
|
||||
CPE_LAB=N
|
||||
|
||||
[LiveStream]
|
||||
# Set this option to Y/N before patch creation to enable/disable live stats collection
|
||||
ENABLE_LIVE_STREAM=Y
|
||||
|
||||
# Set the duration of the live stream capture utility. Leave blank for continuous collection. Ex: 1s,1m,1h,1d
|
||||
DURATION=
|
||||
|
||||
[StaticCollection]
|
||||
# Set this option to Y/N before patch creation to enable/disable static stats collection
|
||||
ENABLE_STATIC_COLLECTION=Y
|
||||
|
||||
[CollectInternal]
|
||||
# controller external OAM interface used to communicate with remote server. If unset, the first interface from ifconfig will be used
|
||||
CONTROLLER0_EXTERNAL_INTERFACE=
|
||||
CONTROLLER1_EXTERNAL_INTERFACE=
|
||||
|
||||
[RemoteServer]
|
||||
# remote server influx and grafana info
|
||||
INFLUX_IP=128.224.186.61
|
||||
INFLUX_PORT=8086
|
||||
INFLUX_DB=
|
||||
GRAFANA_PORT=3000
|
||||
|
||||
# This key is created through Grafana. If deleted, a new key (with admin privileges) must be created and copied here
|
||||
GRAFANA_API_KEY=eyJrIjoiSkR1SXcxbkVVckd1dW9PMHFKS0EzQ2hQWTd1YUhtSkIiLCJuIjoiZGJfY3JlYXRvciIsImlkIjoxfQ==
|
||||
|
||||
[Intervals]
|
||||
# Set the collection interval (in seconds) to be used in the live_stream.py script. If unset or commented out, that field will not be collected
|
||||
memtop=10
|
||||
memstats=10
|
||||
occtop=10
|
||||
schedtop=10
|
||||
load_avg=3
|
||||
cpu_count=60
|
||||
diskstats=30
|
||||
iostat=10
|
||||
filestats=30
|
||||
netstats=10
|
||||
postgres=30
|
||||
rabbitmq=3600
|
||||
vswitch=30
|
||||
|
||||
[AdditionalOptions]
|
||||
# Set this option to Y/N to enable/disable Openstack API GET/POST collection
|
||||
API_REQUESTS=N
|
||||
|
||||
# Set this option to Y/N to enable/disable the collection of all services and not just the ones listed below. Note that this hasn't been tested thoroughly
|
||||
ALL_SERVICES=N
|
||||
|
||||
# Set this option to Y/N to enable/disable fast postgres connections collection. By default, postgres connections use the same collection interval as postgres DB size (set above), this option will set the collection interval to 0 seconds while not affecting the above postgres collection interval
|
||||
FAST_POSTGRES_CONNECTIONS=N
|
||||
|
||||
# Set this option to Y/N to enable/disable automatic database deletion for InfluxDB and Grafana. As of now, this feature does not work with the engtools patch
|
||||
AUTO_DELETE_DB=N
|
||||
|
||||
[ControllerServices]
|
||||
CONTROLLER_SERVICE_LIST=aodh-api aodh-listener aodh-notifier aodh-evaluator beam.smp ceilometer-api ceilometer-collector ceilometer-agent-notification ceilometer-mem-db ceph-mon ceph-rest-api ceph-alarm-manager cinder-api cinder-volume cinder-scheduler glance-api glance-registry heat-api heat-engine heat-api-cfn heat-api-cloudwatch hbsAgent ironic-api ironic-conductor keystone-all magnum-api magnum-conductor neutron-server nova-api nova-api-proxy nova-compute nova-scheduler nova-conductor nova-console-auth nova-novncproxy nova-placement-api panko-api sysinv-api sysinv-conductor postgres fmManager rabbitmq-server gunicorn postgres snmpd patch-alarm-manager lighttpd sw-patch-controller-daemon nfv-vim nfv-vim-api nfv-vim-webserver slapd mtcAgent guestAgent
|
||||
|
||||
[ComputeServices]
|
||||
COMPUTE_SERVICE_LIST=nova-compute neutron-dhcp-agent neutron-metadata-agent neutron-sriov-nic-agent kvm libvirtd guestServer host_agent
|
||||
|
||||
[StorageServices]
|
||||
STORAGE_SERVICE_LIST=ceph-mon ceph-osd ceph-manager ceph-rest-api
|
||||
|
||||
[RabbitmqServices]
|
||||
RABBITMQ_QUEUE_LIST=notifications.info versioned_notifications.info
|
||||
|
||||
[CommonServices]
|
||||
COMMON_SERVICE_LIST=dnsmasq ceilometer-polling haproxy hwmond pmond rmond fsmond sw-patch-agent sysinv-agent syslog-ng hostwd iscsid io-monitor-manager acpid hbsClient logmgmt mtcClient mtcalarmd mtclogd sshd ntpd smartd sm sm-eru sm-watchdog sm-api ceilometer keyring cinder-rtstool
|
86
tools/engtools/hostdata-collectors/scripts/chewmem
Normal file
86
tools/engtools/hostdata-collectors/scripts/chewmem
Normal file
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/perl
|
||||
# Usage:
|
||||
# ./chewmem.pl <MiB>
|
||||
|
||||
# Description:
|
||||
# This will create a character array requiring "MiB" actual memory.
|
||||
# Summarize high-level memory usage.
|
||||
|
||||
# Ideally we can demonstate creating larger and larger
|
||||
# successful memory allocations until Avail is near 0.
|
||||
# It is very likely to trigger OOM Killer or cause reset
|
||||
# if we run completely out of memory.
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use POSIX qw(strftime);
|
||||
|
||||
sub show_memusage() {
|
||||
our $count;
|
||||
$::count++; $::count %= 15;
|
||||
|
||||
my $Ki = 1024.0;
|
||||
my ($MemTotal, $MemFree, $Buffers, $Cached, $CommitLimit, $Committed_AS, $Slab, $SReclaimable);
|
||||
# Process all entries of MEMINFO
|
||||
my $file = '/proc/meminfo';
|
||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
||||
MEMINFO_LOOP: while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
last MEMINFO_LOOP if (/^\s*$/); # end at blank-line
|
||||
if (/\bMemTotal:\s+(\d+)\s+kB/) {
|
||||
$MemTotal = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bMemFree:\s+(\d+)\s+kB/) {
|
||||
$MemFree = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bBuffers:\s+(\d+)\s+kB/) {
|
||||
$Buffers = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bCached:\s+(\d+)\s+kB/) {
|
||||
$Cached = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bCommitLimit:\s+(\d+)\s+kB/) {
|
||||
$CommitLimit = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bCommitted_AS:\s+(\d+)\s+kB/) {
|
||||
$Committed_AS = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bSlab:\s+(\d+)\s+kB/) {
|
||||
$Slab = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
if (/\bSReclaimable:\s+(\d+)\s+kB/) {
|
||||
$SReclaimable = $1; next MEMINFO_LOOP;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
|
||||
my $Avail_MiB = ($MemFree + $Cached + $Buffers + $SReclaimable)/$Ki;
|
||||
my $Strict_MiB = ($CommitLimit - $Committed_AS)/$Ki;
|
||||
my $now = strftime "%Y-%m-%d %H:%M:%S", localtime();
|
||||
if ($::count == 1) {
|
||||
printf "%19s %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
|
||||
'yyyy-mm-dd hh:mm:ss', 'Tot', 'Free', 'Ca', 'Buf', 'Slab', 'CAS', 'CLim', 'Avail', 'Strict';
|
||||
}
|
||||
printf "%19s %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f\n",
|
||||
$now, $MemTotal/$Ki, $MemFree/$Ki, $Cached/$Ki, $Buffers/$Ki, $Slab/$Ki,
|
||||
$Committed_AS/$Ki, $CommitLimit/$Ki, $Avail_MiB, $Strict_MiB;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN PROGRAM
|
||||
# Autoflush output
|
||||
select(STDERR);
|
||||
$| = 1;
|
||||
select(STDOUT); # default
|
||||
$| = 1;
|
||||
|
||||
my $MiB = $ARGV[0] ||=0.0;
|
||||
my $A = "A" x (1024*1024*$MiB/2);
|
||||
print "Allocating $MiB MiB character array.\n";
|
||||
while(1) {
|
||||
sleep(1);
|
||||
show_memusage();
|
||||
}
|
||||
exit 0;
|
||||
|
||||
1;
|
@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
# Purpose:
|
||||
# Some of the engtools scripts are not shutting down gracefully.
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERRLOG "Require sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
declare -a TOOLS
|
||||
TOOLS=()
|
||||
TOOLS+=('collect-engtools.sh')
|
||||
TOOLS+=('ceph.sh')
|
||||
TOOLS+=('diskstats.sh')
|
||||
TOOLS+=('iostat.sh')
|
||||
TOOLS+=('rabbitmq.sh')
|
||||
TOOLS+=('ticker.sh')
|
||||
TOOLS+=('top.sh')
|
||||
TOOLS+=('memstats.sh')
|
||||
TOOLS+=('netstats.sh')
|
||||
TOOLS+=('postgres.sh')
|
||||
TOOLS+=('vswitch.sh')
|
||||
TOOLS+=('filestats.sh')
|
||||
TOOLS+=('live_stream.py')
|
||||
|
||||
LOG "Cleanup engtools:"
|
||||
|
||||
# Brute force methods (assume trouble with: service collect-engtools.sh stop)
|
||||
# ( be sure not to clobber /etc/init.d/collect-engtools.sh )
|
||||
LOG "kill processes brute force"
|
||||
pids=( $(pidof -x /usr/local/bin/collect-engtools.sh) )
|
||||
if [ ${#pids[@]} -ne 0 ]
|
||||
then
|
||||
LOG "killing: ${pids[@]}"
|
||||
for pid in ${pids[@]}
|
||||
do
|
||||
LOG "kill: [ ${pid} ] "
|
||||
pkill -KILL -P ${pid}
|
||||
kill -9 ${pid}
|
||||
done
|
||||
pkill -KILL iostat
|
||||
pkill -KILL top
|
||||
else
|
||||
LOG "no pids found"
|
||||
fi
|
||||
|
||||
LOG "remove pidfiles"
|
||||
for TOOL in "${TOOLS[@]}"
|
||||
do
|
||||
rm -f -v /var/run/${TOOL}.pid
|
||||
done
|
||||
LOG "done"
|
||||
|
||||
exit 0
|
@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=Engineering data collection tools to monitor host performance
|
||||
After=network.service
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
ExecStart=/etc/init.d/collect-engtools.sh start
|
||||
ExecStop=/etc/init.d/collect-engtools.sh stop
|
||||
ExecReload=/etc/init.d/collect-engtools.sh reload
|
||||
PIDFile=/var/run/collect-engtools.sh.pid
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
334
tools/engtools/hostdata-collectors/scripts/collect-engtools.sh
Normal file
334
tools/engtools/hostdata-collectors/scripts/collect-engtools.sh
Normal file
@ -0,0 +1,334 @@
|
||||
#!/bin/bash
|
||||
# Usage:
|
||||
# collect-engtools.sh [-f] [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
|
||||
# ENABLE DEBUG (0=disable, 1=enable)
|
||||
OPT_DEBUG=0
|
||||
|
||||
# Set options for long soak (vs, shorter collection)
|
||||
#OPT_SOAK=0 # long soak
|
||||
OPT_SOAK=1 # few hour soak
|
||||
#OPT_SOAK=2 # < hour soak
|
||||
|
||||
# Define command to set nice + ionice
|
||||
CMD_IDLE=$( cmd_idle_priority )
|
||||
|
||||
# Purge configuration options
|
||||
# - how much data may be created per cycle
|
||||
PURGE_HEADROOM_MB=100
|
||||
# - how much remaining space to leave
|
||||
PURGE_HEADROOM_PERCENT=15
|
||||
# - maximum size of data collection
|
||||
PURGE_MAXUSAGE_MB=1000
|
||||
|
||||
# Affine to pinned cores
|
||||
AFFINE_PINNED=1
|
||||
|
||||
# Line-buffer stream output (instead of buffered)
|
||||
STDBUF="stdbuf -oL"
|
||||
|
||||
# Define some common durations
|
||||
DUR_60MIN_IN_SEC=$[60*60]
|
||||
DUR_30MIN_IN_SEC=$[30*60]
|
||||
DUR_15MIN_IN_SEC=$[15*60]
|
||||
DUR_10MIN_IN_SEC=$[10*60]
|
||||
DUR_5MIN_IN_SEC=$[5*60]
|
||||
DUR_1MIN_IN_SEC=$[1*60]
|
||||
|
||||
# Global variables
|
||||
declare -a parallel_outfiles
|
||||
declare df_size_bytes
|
||||
declare df_avail_bytes
|
||||
declare du_used_bytes
|
||||
declare tgt_avail_bytes
|
||||
declare tgt_used_bytes
|
||||
|
||||
# do_parallel_commands - launch parallel tools with separate output files
|
||||
function do_parallel_commands()
|
||||
{
|
||||
parallel_outfiles=()
|
||||
for elem in "${tlist[@]}"
|
||||
do
|
||||
tool=""; period=""; repeat=""; interval=""
|
||||
my_hash="elem[*]"
|
||||
local ${!my_hash}
|
||||
if [ ! -z "${name}" ]; then
|
||||
fname="${TOOL_DEST_DIR}/${HOSTNAME}_${timestamp}_${name}"
|
||||
parallel_outfiles+=( $fname )
|
||||
LOG "collecting ${tool}, ${interval} second intervals, to: ${fname}"
|
||||
if [ ! -z "${period}" ]; then
|
||||
${STDBUF} ${tool} -p ${period} -i ${interval} > ${fname} 2>/dev/null &
|
||||
elif [ ! -z "${repeat}" ]; then
|
||||
${STDBUF} ${tool} --repeat=${repeat} --delay=${interval} > ${fname} 2>/dev/null &
|
||||
fi
|
||||
else
|
||||
# run without file output (eg., ticker)
|
||||
${STDBUF} ${tool} -p ${period} -i ${interval} 2>/dev/null &
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# get_current_avail_usage() - get output destination file-system usage and
|
||||
# availability.
|
||||
# - updates: df_size_bytes, df_avail_bytes, du_used_bytes
|
||||
function get_current_avail_usage()
|
||||
{
|
||||
local -a df_arr_bytes=( $(df -P --block-size=1 ${TOOL_DEST_DIR} | awk 'NR==2 {print $2, $4}') )
|
||||
df_size_bytes=${df_arr_bytes[0]}
|
||||
df_avail_bytes=${df_arr_bytes[1]}
|
||||
du_used_bytes=$(du --block-size=1 ${TOOL_DEST_DIR} | awk 'NR==1 {print $1}')
|
||||
}
|
||||
|
||||
# purge_oldest_files() - remove oldest files based on file-system available space,
|
||||
# and maximum collection size
|
||||
function purge_oldest_files()
|
||||
{
|
||||
# get current file-system usage
|
||||
get_current_avail_usage
|
||||
msg=$(printf "avail %d MB, headroom %d MB; used %d MB, max %d MB" \
|
||||
$[$df_avail_bytes/1024/1024] $[$tgt_avail_bytes/1024/1024] \
|
||||
$[$du_used_bytes/1024/1024] $[$tgt_used_bytes/1024/1024])
|
||||
LOG "usage: ${msg}"
|
||||
|
||||
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]] || \
|
||||
[[ $du_used_bytes -gt $tgt_used_bytes ]]; then
|
||||
# wait for compression to complete
|
||||
wait
|
||||
|
||||
get_current_avail_usage
|
||||
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]]; then
|
||||
msg=$(printf "purge: avail %d MB < target %d MB" \
|
||||
$[$df_avail_bytes/1024/1024] $[$tgt_avail_bytes/1024/1024] )
|
||||
LOG "purge: ${msg}"
|
||||
fi
|
||||
if [[ $du_used_bytes -gt $tgt_used_bytes ]]; then
|
||||
msg=$(printf "purge: used %d MB > target %d MB" \
|
||||
$[$du_used_bytes/1024/1024] $[$tgt_used_bytes/1024/1024] )
|
||||
LOG "purge: ${msg}"
|
||||
fi
|
||||
else
|
||||
return
|
||||
fi
|
||||
|
||||
# remove files in oldest time sorted order until we meet usage targets,
|
||||
# incrementally updating usage as we remve files
|
||||
for file in $( ls -rt ${TOOL_DEST_DIR}/${HOSTNAME}_* 2>/dev/null )
|
||||
do
|
||||
if [[ $df_avail_bytes -ge $tgt_avail_bytes ]] && \
|
||||
[[ $du_used_bytes -le $tgt_used_bytes ]]; then
|
||||
break
|
||||
fi
|
||||
|
||||
if [ ${OPT_DEBUG} -eq 1 ]; then
|
||||
msg="purge: file=$file"
|
||||
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]]; then
|
||||
msg="${msg}, < AVAIL"
|
||||
fi
|
||||
if [[ $du_used_bytes -gt $tgt_used_bytes ]]; then
|
||||
msg="${msg}, > MAXUSAGE"
|
||||
fi
|
||||
LOG "${msg}"
|
||||
fi
|
||||
|
||||
sz_bytes=$(stat --printf="%s" $file)
|
||||
((df_avail_bytes += sz_bytes))
|
||||
((du_used_bytes -= sz_bytes))
|
||||
rm -fv ${file}
|
||||
done
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Read configuration variable file if it is present
|
||||
NAME=collect-engtools.sh
|
||||
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
|
||||
|
||||
# Initialize tool
|
||||
tools_init
|
||||
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
|
||||
# Affine tools to NOVA pinned cores (i.e., non-cpu 0)
|
||||
# - remove interference with cpu 0
|
||||
if [ "${AFFINE_PINNED}" -eq 1 ]; then
|
||||
NOVA_CONF=/etc/nova/compute_extend.conf
|
||||
if [ -f "${NOVA_CONF}" ]; then
|
||||
source "${NOVA_CONF}"
|
||||
CPULIST=${compute_pinned_cpulist}
|
||||
else
|
||||
CPULIST=""
|
||||
fi
|
||||
fi
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
# Define output directory
|
||||
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
|
||||
TOOL_DEST_DIR=/scratch/syseng_data/${HOSTNAME}
|
||||
elif [[ "${HOSTNAME}" =~ "compute-" ]]; then
|
||||
TOOL_DEST_DIR=/tmp/syseng_data/${HOSTNAME}
|
||||
else
|
||||
TOOL_DEST_DIR=/tmp/syseng_data/${HOSTNAME}
|
||||
fi
|
||||
mkdir -p ${TOOL_DEST_DIR}
|
||||
|
||||
# Define daemon log output
|
||||
timestamp=$( date +"%Y-%0m-%0e_%H%M" )
|
||||
DAEMON_OUT="${TOOL_DEST_DIR}/${HOSTNAME}_${timestamp}_${TOOLNAME}.log"
|
||||
|
||||
# Redirect stdout and append to log if not connected to TTY
|
||||
if test ! -t 1 ; then
|
||||
exec 1>> ${DAEMON_OUT}
|
||||
fi
|
||||
|
||||
# Get current availability and usage
|
||||
get_current_avail_usage
|
||||
|
||||
# Calculate disk usage and availability purge targets
|
||||
df_offset_bytes=$[$PURGE_HEADROOM_MB*1024*1024]
|
||||
tgt_used_bytes=$[$PURGE_MAXUSAGE_MB*1024*1024]
|
||||
((tgt_avail_bytes = df_size_bytes/100*PURGE_HEADROOM_PERCENT + df_offset_bytes))
|
||||
|
||||
# Set granularity based on duration
|
||||
if [ $PERIOD_MIN -le 30 ]; then
|
||||
GRAN_MIN=5
|
||||
else
|
||||
GRAN_MIN=60
|
||||
fi
|
||||
|
||||
# Adjust repeats and intervals based on GRAN_MIN granularity
|
||||
PERIOD_MIN=$[($PERIOD_MIN+(GRAN_MIN-1))/GRAN_MIN*GRAN_MIN]
|
||||
((REPEATS = PERIOD_MIN/GRAN_MIN))
|
||||
GRAN_MIN_IN_SEC=$[$GRAN_MIN*60]
|
||||
if [ ${INTERVAL_SEC} -gt ${GRAN_MIN_IN_SEC} ]; then
|
||||
INTERVAL_SEC=${GRAN_MIN_IN_SEC}
|
||||
fi
|
||||
|
||||
# Define tools and options
|
||||
# [ JGAULD - need config file for customization; long soak vs specific tools ]
|
||||
# [ Ideally sample < 5 second granularity, but files get big, and tool has cpu overhead ]
|
||||
# [ Need < 5 second granularity to see cache pressure/flush issues ]
|
||||
# [ Desire 60 sec interval for soak ]
|
||||
if [ ${OPT_SOAK} -eq 1 ]; then
|
||||
# Desire 60 second or greater interval for longer term data collections,
|
||||
# otherwise collection files get too big.
|
||||
schedtop_interval=20
|
||||
occtop_interval=60
|
||||
memtop_interval=60
|
||||
netstats_interval=60
|
||||
# JGAULD: temporarily increase frequency to 1 min
|
||||
postgres_interval=${DUR_1MIN_IN_SEC}
|
||||
#postgres_interval=${DUR_15MIN_IN_SEC}
|
||||
rabbitmq_interval=${DUR_15MIN_IN_SEC}
|
||||
ceph_interval=${DUR_15MIN_IN_SEC}
|
||||
diskstats_interval=${DUR_15MIN_IN_SEC}
|
||||
memstats_interval=${DUR_15MIN_IN_SEC}
|
||||
filestats_interval=${DUR_15MIN_IN_SEC}
|
||||
elif [ ${OPT_SOAK} -eq 2 ]; then
|
||||
# Assume much shorter collection (eg, < hours)
|
||||
schedtop_interval=2 # i.e., 2 second interval
|
||||
occtop_interval=2 # i.e., 2 second interval
|
||||
memtop_interval=1 # i.e., 1 second interval
|
||||
netstats_interval=30 # i.e., 30 second interval
|
||||
postgres_interval=${DUR_5MIN_IN_SEC}
|
||||
rabbitmq_interval=${DUR_5MIN_IN_SEC}
|
||||
ceph_interval=${DUR_5MIN_IN_SEC}
|
||||
diskstats_interval=${DUR_5MIN_IN_SEC}
|
||||
memstats_interval=${DUR_5MIN_IN_SEC}
|
||||
filestats_interval=${DUR_5MIN_IN_SEC}
|
||||
else
|
||||
# Assume shorter collection (eg, < a few hours)
|
||||
schedtop_interval=5 # i.e., 5 second interval
|
||||
occtop_interval=5 # i.e., 5 second interval
|
||||
memtop_interval=5 # i.e., 5 second interval
|
||||
netstats_interval=30 # i.e., 30 second interval
|
||||
postgres_interval=${DUR_5MIN_IN_SEC}
|
||||
rabbitmq_interval=${DUR_5MIN_IN_SEC}
|
||||
ceph_interval=${DUR_5MIN_IN_SEC}
|
||||
diskstats_interval=${DUR_5MIN_IN_SEC}
|
||||
memstats_interval=${DUR_5MIN_IN_SEC}
|
||||
filestats_interval=${DUR_5MIN_IN_SEC}
|
||||
fi
|
||||
schedtop_repeat=$[ $PERIOD_MIN * 60 / $schedtop_interval ]
|
||||
occtop_repeat=$[ $PERIOD_MIN * 60 / $occtop_interval ]
|
||||
memtop_repeat=$[ $PERIOD_MIN * 60 / $memtop_interval ]
|
||||
netstats_repeat=$[ $PERIOD_MIN * 60 / $netstats_interval ]
|
||||
|
||||
# Disable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=0
|
||||
|
||||
# Define parallel engtools configuration
|
||||
# - tool name, filename, and collection interval attributes
|
||||
BINDIR=/usr/bin
|
||||
LBINDIR=/usr/local/bin
|
||||
|
||||
while IFS='' read -r line || [[ -n "$line" ]]; do
|
||||
if [[ $line =~ 'ENABLE_STATIC_COLLECTION'* ]]; then
|
||||
static_collection=${line:25:1}
|
||||
fi
|
||||
done < /etc/engtools/engtools.conf
|
||||
|
||||
declare -a tlist
|
||||
if [[ $static_collection == "Y" ]] || [[ $static_collection == "y" ]]; then
|
||||
tlist+=( "tool=${LBINDIR}/top.sh name=top period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
|
||||
tlist+=( "tool=${LBINDIR}/iostat.sh name=iostat period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
|
||||
tlist+=( "tool=${LBINDIR}/netstats.sh name=netstats period=${PERIOD_MIN} interval=${netstats_interval}" )
|
||||
tlist+=( "tool=${BINDIR}/occtop name=occtop repeat=${occtop_repeat} interval=${occtop_interval}" )
|
||||
tlist+=( "tool=${BINDIR}/memtop name=memtop repeat=${memtop_repeat} interval=${memtop_interval}" )
|
||||
tlist+=( "tool=${BINDIR}/schedtop name=schedtop repeat=${schedtop_repeat} interval=${schedtop_interval}" )
|
||||
tlist+=( "tool=${LBINDIR}/diskstats.sh name=diskstats period=${PERIOD_MIN} interval=${diskstats_interval}" )
|
||||
tlist+=( "tool=${LBINDIR}/memstats.sh name=memstats period=${PERIOD_MIN} interval=${memstats_interval}" )
|
||||
tlist+=( "tool=${LBINDIR}/filestats.sh name=filestats period=${PERIOD_MIN} interval=${filestats_interval}" )
|
||||
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
|
||||
tlist+=( "tool=${LBINDIR}/ceph.sh name=ceph period=${PERIOD_MIN} interval=${ceph_interval}" )
|
||||
tlist+=( "tool=${LBINDIR}/postgres.sh name=postgres period=${PERIOD_MIN} interval=${postgres_interval}" )
|
||||
# tlist+=( "tool=${LBINDIR}/rabbitmq.sh name=rabbitmq period=${PERIOD_MIN} interval=${rabbitmq_interval}" )
|
||||
elif [[ "${HOSTNAME}" =~ "compute-" ]]; then
|
||||
tlist+=( "tool=${LBINDIR}/vswitch.sh name=vswitch period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
|
||||
fi
|
||||
fi
|
||||
|
||||
# ticker - shows progress on the screen
|
||||
tlist+=( "tool=${LBINDIR}/ticker.sh name= period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Main loop
|
||||
#-------------------------------------------------------------------------------
|
||||
OPT_DEBUG=0
|
||||
REP=0
|
||||
while [[ ${TOOL_USR1_SIGNAL} -eq 0 ]] &&
|
||||
[[ ${OPT_FOREVER} -eq 1 || ${REP} -lt ${REPEATS} ]]
|
||||
do
|
||||
# increment loop counter
|
||||
((REP++))
|
||||
|
||||
# purge oldest files
|
||||
purge_oldest_files
|
||||
|
||||
# define filename timestamp
|
||||
timestamp=$( date +"%Y-%0m-%0e_%H%M" )
|
||||
|
||||
# collect tools in parallel to separate output files
|
||||
LOG "collecting ${TOOLNAME} at ${timestamp} for ${PERIOD_MIN} mins, repeat=${REP}"
|
||||
do_parallel_commands
|
||||
wait
|
||||
|
||||
# Compress latest increment
|
||||
LOG "compressing: ${parallel_outfiles[@]}"
|
||||
${CMD_IDLE} bzip2 -q -f ${parallel_outfiles[@]} 2>/dev/null &
|
||||
done
|
||||
|
||||
# wait for compression to complete
|
||||
wait
|
||||
|
||||
tools_cleanup 0
|
||||
exit 0
|
122
tools/engtools/hostdata-collectors/scripts/diskstats.sh
Normal file
122
tools/engtools/hostdata-collectors/scripts/diskstats.sh
Normal file
@ -0,0 +1,122 @@
|
||||
#!/bin/bash
|
||||
# Usage: diskstats.sh
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print disk summary
|
||||
function print_disk()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
# NOTES:
|
||||
# --total (grand-total) is a new option, but don't necessarily want to add tmpfs
|
||||
# or dummy filesystems.
|
||||
# - use -H to print in SI (eg, GB, vs GiB)
|
||||
# - can use -a to print all filesystems including dummy filesystems, but then
|
||||
# there can be double-counting:
|
||||
print_separator
|
||||
cmd='df -h -H -T --local -t ext2 -t ext3 -t ext4 -t xfs --total'
|
||||
${ECHO} "Disk space usage ext2,ext3,ext4,xfs,tmpfs (SI):"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
print_separator
|
||||
cmd='df -h -H -T --local -i -t ext2 -t ext3 -t ext4 -t xfs --total'
|
||||
${ECHO} "Disk inodes usage ext2,ext3,ext4,xfs,tmpfs (SI):"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
print_separator
|
||||
cmd='drbd-overview'
|
||||
${ECHO} "drbd disk usage and status:"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
print_separator
|
||||
cmd='lvs'
|
||||
${ECHO} "logical volumes usage and status:"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
print_separator
|
||||
cmd='pvs'
|
||||
${ECHO} "physical volumes usage and status:"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
|
||||
print_separator
|
||||
cmd='vgs'
|
||||
${ECHO} "volume groups usage and status:"
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
}
|
||||
|
||||
# Print disk static summary
|
||||
function print_disk_static()
|
||||
{
|
||||
print_separator
|
||||
cmd='cat /proc/scsi/scsi'
|
||||
${ECHO} "Attached devices: ${cmd}"
|
||||
${cmd}
|
||||
${ECHO}
|
||||
|
||||
# fdisk - requires sudo/root
|
||||
print_separator
|
||||
cmd='fdisk -l'
|
||||
if [ $UID -eq 0 ]; then
|
||||
${ECHO} "List disk devices: ${cmd}"
|
||||
${cmd}
|
||||
else
|
||||
WARNLOG "Skipping cmd=${cmd}, root/sudo passwd required"
|
||||
fi
|
||||
${ECHO}
|
||||
|
||||
# parted - requires sudo/root
|
||||
print_separator
|
||||
cmd='parted -l'
|
||||
if [ $UID -eq 0 ]; then
|
||||
${ECHO} "List disk devices: ${cmd}"
|
||||
${cmd}
|
||||
else
|
||||
WARNLOG "Skipping cmd=${cmd}, root/sudo passwd required"
|
||||
fi
|
||||
${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Print static disk information
|
||||
print_disk_static
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_disk
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_disk
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
478
tools/engtools/hostdata-collectors/scripts/engtools_util.sh
Normal file
478
tools/engtools/hostdata-collectors/scripts/engtools_util.sh
Normal file
@ -0,0 +1,478 @@
|
||||
#!/bin/bash
|
||||
TOOLNAME=$(basename $0)
|
||||
PIDFILE=/var/run/${TOOLNAME}.pid
|
||||
TOOL_DEBUG=1
|
||||
TOOL_EXIT_SIGNAL=0
|
||||
TOOL_USR1_SIGNAL=0
|
||||
TOOL_USR2_SIGNAL=0
|
||||
TOOL_TTY=0
|
||||
if tty 1>/dev/null ; then
|
||||
TOOL_TTY=1
|
||||
fi
|
||||
|
||||
# [ JGAULD : SHOULD RENAME TO TOOL_X ]
|
||||
OPT_USE_INTERVALS=0
|
||||
OPT_FOREVER=0
|
||||
PERIOD_MIN=5
|
||||
INTERVAL_SEC=60
|
||||
CPULIST=0
|
||||
|
||||
# Include lsb functions
|
||||
if [ -d /lib/lsb ]; then
|
||||
. /lib/lsb/init-functions
|
||||
else
|
||||
. /etc/init.d/functions
|
||||
fi
|
||||
# Lightweight replacement for pidofproc -p <pid>
|
||||
function check_pidfile ()
|
||||
{
|
||||
local pidfile pid
|
||||
|
||||
OPTIND=1
|
||||
while getopts p: opt ; do
|
||||
case "$opt" in
|
||||
p)
|
||||
pidfile="$OPTARG"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
read pid < "${pidfile}"
|
||||
if [ -n "${pid:-}" ]; then
|
||||
if $(kill -0 "${pid:-}" 2> /dev/null); then
|
||||
echo "$pid"
|
||||
return 0
|
||||
elif ps "${pid:-}" >/dev/null 2>&1; then
|
||||
echo "$pid"
|
||||
return 0 # program is running, but not owned by this user
|
||||
else
|
||||
return 1 # program is dead and /var/run pid file exists
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# tools_init - initialize tool resources
|
||||
function tools_init ()
|
||||
{
|
||||
local rc=0
|
||||
local error=0
|
||||
TOOLNAME=$(basename $0)
|
||||
|
||||
# Check for sufficient priviledges
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERRLOG "${NAME} requires sudo/root access."
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check for essential binaries
|
||||
ECHO=$(which echo 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ECHO=echo # use bash built-in echo
|
||||
${ECHO} "FATAL, 'echo' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
DATE=$(which date 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
${ECHO} "FATAL, 'date' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
# Check for standard linux binaries, at least can use LOG functions now
|
||||
# - these are used in tools_header
|
||||
CAT=$(which cat 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'cat' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
ARCH=$(which arch 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'arch' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
SED=$(which sed 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'sed' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
GREP=$(which grep 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'grep' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
WC=$(which wc 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'wc' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
UNAME=$(which uname 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'uname' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
SORT=$(which sort 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'sort' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
TR=$(which tr 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'tr' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
AWK=$(which awk 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'awk' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
PKILL=$(which pkill 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'pkill' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
LS=$(which ls 2>/dev/null)
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ERRLOG "'ls' not found, rc=$rc";
|
||||
error=$rc
|
||||
fi
|
||||
|
||||
# The following block is needed for LSB systems such as Windriver Linux.
|
||||
# The utility is not available on CentOS so comment it out.
|
||||
# Generic utility, but may not be available
|
||||
# LSB=$(which lsb_release 2>/dev/null)
|
||||
# rc=$?
|
||||
# if [ $rc -ne 0 ]; then
|
||||
# WARNLOG "'lsb_release' not found, rc=$rc";
|
||||
# fi
|
||||
|
||||
# Let parent program decide what to do with the errors,
|
||||
# give ominous warning
|
||||
if [ $error -eq 1 ]; then
|
||||
WARNLOG "possibly cannot continue, missing linux binaries"
|
||||
fi
|
||||
|
||||
# Check if tool was previously running
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
# [ JGAULD - remove pidofproc() / LSB compatibility issue ]
|
||||
if check_pidfile -p "${PIDFILE}" >/dev/null; then
|
||||
ERRLOG "${PIDFILE} exists and ${TOOLNAME} is running"
|
||||
return 1
|
||||
else
|
||||
# remove pid file
|
||||
WARNLOG "${PIDFILE} exists but ${TOOLNAME} is not running; cleaning up"
|
||||
rm -f ${PIDFILE}
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create pid file
|
||||
echo $$ > ${PIDFILE}
|
||||
|
||||
# Setup trap handler - these signals trigger child shutdown and cleanup
|
||||
trap tools_exit_handler INT HUP TERM EXIT
|
||||
trap tools_usr1_handler USR1
|
||||
trap tools_usr2_handler USR2
|
||||
|
||||
return ${rc}
|
||||
}
|
||||
|
||||
# tools_cleanup() - terminate child processes
|
||||
function tools_cleanup() {
|
||||
# restore signal handling to default behaviour
|
||||
trap - INT HUP TERM EXIT
|
||||
trap - USR1 USR2
|
||||
|
||||
local VERBOSE_OPT=''
|
||||
if [ "$1" -ne "0" ]; then
|
||||
LOG "cleanup invoked with code: $1"
|
||||
if [ ${TOOL_DEBUG} -ne 0 ]; then
|
||||
VERBOSE_OPT='-v'
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# stop all processes launched from this process
|
||||
pkill -TERM -P $$
|
||||
if [ "$1" -ne "0" ]; then
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# OK, if the above didn't work, use force
|
||||
pkill -KILL -P $$
|
||||
|
||||
# remove pid file
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
rm -f ${VERBOSE_OPT} ${PIDFILE}
|
||||
fi
|
||||
exit $1
|
||||
}
|
||||
|
||||
# tools_exit_handler() - exit handler routine
|
||||
function tools_exit_handler() {
|
||||
TOOL_EXIT_SIGNAL=1
|
||||
tools_cleanup 128
|
||||
}
|
||||
# tools_usr1_handler() - USR1 handler routine
|
||||
function tools_usr1_handler() {
|
||||
TOOL_USR1_SIGNAL=1
|
||||
LOG "caught USR1"
|
||||
}
|
||||
# tools_usr2_handler() - USR2 handler routine
|
||||
function tools_usr2_handler() {
|
||||
TOOL_USR2_SIGNAL=1
|
||||
LOG "caught USR1"
|
||||
}
|
||||
|
||||
# LOG(), WARNLOG(), ERRLOG() - simple print log functions (not logger)
|
||||
function LOG ()
|
||||
{
|
||||
local tstamp_H=$( date +"%Y-%0m-%0e %H:%M:%S" )
|
||||
echo "${tstamp_H} ${HOSTNAME} $0($$): $@";
|
||||
}
|
||||
function LOG_NOCR ()
|
||||
{
|
||||
local tstamp_H=$( date +"%Y-%0m-%0e %H:%M:%S" )
|
||||
echo -n "${tstamp_H} ${HOSTNAME} $0($$): $@";
|
||||
}
|
||||
function WARNLOG () { LOG "WARN $@"; }
|
||||
function ERRLOG () { LOG "ERROR $@"; }
|
||||
|
||||
# TOOL_HIRES_TIME() - easily parsed date/timestamp and hi-resolution uptime
|
||||
function TOOL_HIRES_TIME()
|
||||
{
|
||||
echo "time: " $( ${DATE} +"%a %F %H:%M:%S.%N %Z %z" ) "uptime: " $( cat /proc/uptime )
|
||||
}
|
||||
|
||||
# set_affinity() - set affinity for current script if a a CPULIST is defined
|
||||
function set_affinity() {
|
||||
local CPULIST=$1
|
||||
if [ -z "${CPULIST}" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Set cpu affinity for current program
|
||||
local TASKSET=$(which taskset 2>/dev/null)
|
||||
if [ -x "${TASKSET}" ]; then
|
||||
${TASKSET} -pc ${CPULIST} $$ 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# cmd_idle_priority() - command to set nice + ionice
|
||||
function cmd_idle_priority() {
|
||||
local NICE=""
|
||||
local IONICE=""
|
||||
|
||||
NICE=$( which nice 2>/dev/null )
|
||||
if [ $? -eq 0 ]; then
|
||||
NICE="${NICE} -n 19"
|
||||
else
|
||||
NICE=""
|
||||
fi
|
||||
IONICE=$( which ionice 2>/dev/null )
|
||||
if [ $? -eq 0 ]; then
|
||||
IONICE="${IONICE} -c 3"
|
||||
else
|
||||
IONICE=""
|
||||
fi
|
||||
echo "${NICE} ${IONICE}"
|
||||
}
|
||||
|
||||
|
||||
# print_separator() - print a horizontal separation line '\u002d' is '-'
|
||||
function print_separator () {
|
||||
printf '\u002d%.s' {1..80}
|
||||
printf '\n'
|
||||
}
|
||||
|
||||
# tools_header() - print out common GenWare tools header
|
||||
function tools_header() {
|
||||
local TOOLNAME=$(basename $0)
|
||||
|
||||
# Get timestamp
|
||||
#local tstamp=$( date +"%Y-%0m-%0e %H:%M:%S" 2>/dev/null )
|
||||
local tstamp=$( date --rfc-3339=ns | cut -c1-23 2>/dev/null )
|
||||
|
||||
# Linux Generic
|
||||
local UPTIME=/proc/uptime
|
||||
|
||||
# Get number of online cpus
|
||||
local CPUINFO=/proc/cpuinfo
|
||||
local online_cpus=$( cat ${CPUINFO} | grep -i ^processor | wc -l 2>/dev/null )
|
||||
|
||||
# Get load average, run-queue size, and number of threads
|
||||
local LOADAVG=/proc/loadavg
|
||||
local LDAVG=( `cat ${LOADAVG} | sed -e 's#[/]# #g' 2>/dev/null` )
|
||||
|
||||
# Get current architecture
|
||||
local arch=$( uname -m )
|
||||
|
||||
# Determine processor name (there are many different formats... *sigh* )
|
||||
# - build up info from multiple lines
|
||||
local processor='unk'
|
||||
local NAME=$( cat ${CPUINFO} | grep \
|
||||
-e '^cpu\W\W:' \
|
||||
-e ^'cpu model' \
|
||||
-e ^'model name' \
|
||||
-e ^'system type' \
|
||||
-e ^Processor \
|
||||
-e ^[Mm]achine | \
|
||||
sort -u | awk 'BEGIN{FS=":";} {print $2;}' | \
|
||||
tr '\n' ' ' | tr -s [:blank:] 2>/dev/null )
|
||||
if [ ! -z "${NAME}" ]; then
|
||||
processor=${NAME}
|
||||
fi
|
||||
|
||||
# Determine processor speed (abort grep after first match)
|
||||
local speed='unk'
|
||||
local BOGO=$( cat ${CPUINFO} | grep -m1 -e ^BogoMIPS -e ^bogomips | \
|
||||
awk 'BEGIN{FS=":";} {printf "%.1f", $2;}' 2>/dev/null )
|
||||
local MHZ=$( cat ${CPUINFO} | grep -m1 -e ^'cpu MHz' -e ^clock | \
|
||||
awk 'BEGIN{FS=":";} {printf "%.1f", $2;}' 2>/dev/null )
|
||||
local MHZ2=$( cat ${CPUINFO} | grep -m1 -e ^Cpu0ClkTck -e ^'cycle frequency' | \
|
||||
awk 'BEGIN{FS=":";} {printf "%.1f", $2/1.0E6;}' 2>/dev/null )
|
||||
if [ ! -z "${MHZ}" ]; then
|
||||
speed=${MHZ}
|
||||
elif [ ! -z "${MHZ2}" ]; then
|
||||
speed=${MHZ2}
|
||||
elif [ ! -z ${BOGO} ]; then
|
||||
speed=${BOGO}
|
||||
fi
|
||||
|
||||
# Determine OS and kernel version
|
||||
local os_name=$( uname -s 2>/dev/null )
|
||||
local os_release=$( uname -r 2>/dev/null )
|
||||
|
||||
declare -a arr
|
||||
|
||||
local dist_id=""
|
||||
# Determine OS distribution ID
|
||||
if [ lsb_pres == "yes" ]; then
|
||||
arr=( $( lsb_release -i 2>/dev/null ) )
|
||||
dist_id=${arr[2]}
|
||||
else
|
||||
local dist_id=$(cat /etc/centos-release | awk '{print $1}' 2>/dev/null)
|
||||
fi
|
||||
|
||||
local dist_rel=""
|
||||
if [ lsb_pres == "yes" ]; then
|
||||
# Determine OS distribution release
|
||||
arr=( $( cat /proc/version | awk '{print $3}' 2>/dev/null ) )
|
||||
local dist_rel=${arr[1]}
|
||||
else
|
||||
local dist_rel=$(cat /etc/centos-release | awk '{print $4}' 2>/dev/null)
|
||||
fi
|
||||
# Print generic header
|
||||
echo "${TOOLNAME} -- ${tstamp} load average:${LDAVG[0]}, ${LDAVG[1]}, ${LDAVG[2]} runq:${LDAVG[3]} nproc:${LDAVG[4]}"
|
||||
echo " host:${HOSTNAME} Distribution:${dist_id} ${dist_rel} ${os_name} ${os_release}"
|
||||
echo " arch:${arch} processor:${processor} speed:${speed} MHz CPUs:${online_cpus}"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# tools_usage() - show generic tools tool usage
|
||||
function tools_usage() {
|
||||
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
|
||||
echo "usage: ${TOOLNAME} [-f] [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]"
|
||||
else
|
||||
echo "Usage: ${TOOLNAME} [-f] [-p <period_mins>] [-c <cpulist>] [-h]"
|
||||
fi
|
||||
}
|
||||
|
||||
# tools_print_help() - print generic tool help
|
||||
function tools_print_help() {
|
||||
tools_usage
|
||||
echo
|
||||
echo "Options:";
|
||||
echo " -f : collect forever : default: none"
|
||||
echo " -p <period_minutes> : overall collection period (minutes) : default: ${DEFAULT_PERIOD_MIN}"
|
||||
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
|
||||
echo " -i <interval_seconds> : sample interval (seconds) : default: ${DEFAULT_INTERVAL_SEC}"
|
||||
fi
|
||||
echo " -c <cpulist> : cpu list where tool runs (e.g., 0-1,8) : default: none"
|
||||
echo
|
||||
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
|
||||
echo "Example: collect 5 minute period, sample every 30 seconds interval"
|
||||
echo " ${TOOLNAME} -p 5 -i 30"
|
||||
else
|
||||
echo "Example: collect 5 minute period"
|
||||
echo " ${TOOLNAME} -p 5"
|
||||
fi
|
||||
}
|
||||
|
||||
# tools_parse_options() -- parse common options for tools scripts
|
||||
function tools_parse_options() {
|
||||
# check for no arguments, print usage
|
||||
if [ $# -eq "0" ]; then
|
||||
tools_usage
|
||||
tools_cleanup 0
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# parse the input arguments
|
||||
while getopts "fp:i:c:h" Option
|
||||
do
|
||||
case $Option in
|
||||
f)
|
||||
OPT_FOREVER=1
|
||||
PERIOD_MIN=60
|
||||
;;
|
||||
p) PERIOD_MIN=$OPTARG ;;
|
||||
i)
|
||||
OPT_USE_INTERVALS=1
|
||||
INTERVAL_SEC=$OPTARG
|
||||
;;
|
||||
c) CPULIST=$OPTARG ;;
|
||||
h)
|
||||
tools_print_help
|
||||
tools_cleanup 0
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
tools_usage
|
||||
tools_cleanup 0
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# validate input arguments
|
||||
PERIOD_MAX=$[4*24*60]
|
||||
INTERVAL_MAX=$[60*60]
|
||||
|
||||
error=0
|
||||
if [[ ${PERIOD_MIN} -lt 1 || ${PERIOD_MIN} -gt ${PERIOD_MAX} ]]; then
|
||||
echo "-p <period_mid> must be > 0 and <= ${PERIOD_MAX}."
|
||||
error=1
|
||||
fi
|
||||
if [[ ${INTERVAL_SEC} -lt 1 || ${INTERVAL_SEC} -gt ${INTERVAL_MAX} ]]; then
|
||||
echo "-i <interval> must be > 0 and <= ${INTERVAL_MAX}."
|
||||
error=1
|
||||
fi
|
||||
if [ ${error} -eq 1 ]; then
|
||||
tools_cleanup 0
|
||||
exit 1
|
||||
fi
|
||||
}
|
98
tools/engtools/hostdata-collectors/scripts/filestats.sh
Normal file
98
tools/engtools/hostdata-collectors/scripts/filestats.sh
Normal file
@ -0,0 +1,98 @@
|
||||
#!/bin/bash
|
||||
# Usage: filestats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
PAGE_SIZE=$(getconf PAGE_SIZE)
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
|
||||
function print_files()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
${ECHO} "# ls -l /proc/*/fd"
|
||||
sudo ls -l /proc/*/fd 2>/dev/null | awk \
|
||||
'$11 ~ /socket/ {a += 1} ; \
|
||||
$11 ~ /null/ {b += 1} ; \
|
||||
{c += 1} \
|
||||
END {\
|
||||
{printf "%-10s %-10s %-10s %-10s\n", "TOTAL", "FILES", "SOCKETS", "NULL PIPES"} \
|
||||
{printf "%-10s %-10s %-10s %-10s\n", c, c-(a+b) , a, b}}'
|
||||
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# lsof"
|
||||
printf "%-7s %-7s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %s\n" "PID" "TOTAL" "FD" "U" "W" "R" "CWD" "RTD" "TXT" "MEM" "DEL" "TCP" "CMD"
|
||||
sudo lsof +c 15| awk '$3 !~ /^[0-9]+/{ {pids[$2]["COMMAND"]=$1}\
|
||||
{pids[$2]["PID"]=$2}\
|
||||
{pids[$2]["TOTAL"]+=1}\
|
||||
{pids[$2]["TCP"]+=($8=="TCP")? 1 : 0}\
|
||||
{($4 ~ /^[0-9][0-9]*[urw]/ )? \
|
||||
pids[$2][substr($4, length($4),1)]+=1 : pids[$2][$4]+=1} }
|
||||
END {
|
||||
{ for (i in pids) \
|
||||
if(pids[i]["PID"]!="PID") {
|
||||
{printf "%-7s %-7s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %s\n", \
|
||||
pids[i]["PID"], \
|
||||
pids[i]["TOTAL"],\
|
||||
((pids[i]["u"]!="")? pids[i]["u"] : 0) + ((pids[i]["w"]!="")? pids[i]["w"] : 0 )+ ((pids[i]["r"]!="")? pids[i]["r"] : 0),\
|
||||
(pids[i]["u"]!="")? pids[i]["u"] : 0,\
|
||||
(pids[i]["w"]!="")? pids[i]["w"] : 0,\
|
||||
(pids[i]["r"]!="")? pids[i]["r"] : 0,\
|
||||
(pids[i]["cwd"]!="")? pids[i]["cwd"] : 0,\
|
||||
(pids[i]["rtd"]!="")? pids[i]["rtd"] : 0,\
|
||||
(pids[i]["txt"]!="")? pids[i]["txt"] : 0,\
|
||||
(pids[i]["mem"]!="")? pids[i]["mem"] : 0,\
|
||||
(pids[i]["DEL"]!="")? pids[i]["DEL"] : 0,\
|
||||
(pids[i]["TCP"]!="")? pids[i]["TCP"] : 0,\
|
||||
pids[i]["COMMAND"]} }}}' | sort -n -r -k3
|
||||
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# lsof -nP +L1"
|
||||
sudo lsof -nP +L1
|
||||
${ECHO}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_files
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_files
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
@ -0,0 +1,120 @@
|
||||
#!/bin/bash
|
||||
### BEGIN INIT INFO
|
||||
# Provides: collect-engtools
|
||||
# Required-Start: $local_fs $network $syslog postgresql
|
||||
# Required-Stop: $local_fs $network $syslog postgresql
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Short-Description: initscript to launch engineering tools data collection daemon
|
||||
# Description: initscript to launch engineering tools data collection daemon
|
||||
# Blah.
|
||||
### END INIT INFO
|
||||
|
||||
PATH=/sbin:/usr/sbin:/bin:/usr/bin
|
||||
DESC="collect engtools service"
|
||||
NAME="collect-engtools.sh"
|
||||
DAEMON=/usr/local/bin/${NAME}
|
||||
DAEMON_ARGS="-f"
|
||||
PIDFILE=/var/run/${NAME}.pid
|
||||
SCRIPTNAME=/etc/init.d/${NAME}
|
||||
DEFAULTFILE=/etc/default/${NAME}
|
||||
|
||||
# Exit if the package is not installed
|
||||
[ -x "$DAEMON" ] || exit 0
|
||||
. /etc/init.d/functions
|
||||
# Read configuration variable file if it is present
|
||||
[ -r $DEFAULTFILE ] && . $DEFAULTFILE
|
||||
|
||||
# Load the VERBOSE setting and other rcS variables
|
||||
#. /lib/init/vars.sh
|
||||
|
||||
# Define lsb fallback versions of:
|
||||
# log_daemon_msg(), log_end_msg()
|
||||
log_daemon_msg() { echo -n "${1:-}: ${2:-}"; }
|
||||
log_end_msg() { echo "."; }
|
||||
|
||||
# Use lsb functions to perform the operations.
|
||||
if [ -f /lib/lsb/init-functions ]; then
|
||||
. /lib/lsb/init-functions
|
||||
fi
|
||||
|
||||
# Check for sufficient priviledges
|
||||
# [ JGAULD : possibly provide user = 'operator' option instead... ]
|
||||
if [ $UID -ne 0 ]; then
|
||||
log_daemon_msg "Starting ${NAME} requires sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case $1 in
|
||||
start)
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
pid=$(pidof -x ${NAME})
|
||||
if test "${pid}" != ""
|
||||
then
|
||||
echo_success "${NAME} already running"
|
||||
exit
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
log_daemon_msg "Starting ${NAME}"
|
||||
if start-stop-daemon --start --background --quiet --oknodo --pidfile ${PIDFILE} \
|
||||
--exec ${DAEMON} -- ${DAEMON_ARGS} ; then
|
||||
./usr/local/bin/live_stream.py &
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
fi
|
||||
;;
|
||||
|
||||
stop)
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
pids=$(pidof -x ${NAME})
|
||||
if [[ ! -z "${pids}" ]]
|
||||
then
|
||||
echo_success "Stopping ${NAME} [$pid]"
|
||||
start-stop-daemon --stop --quiet --oknodo --pidfile ${PIDFILE} --retry=TERM/3/KILL/5
|
||||
# [ JGAULD: none of the following should be necessary ]
|
||||
/usr/local/bin/cleanup-engtools.sh
|
||||
else
|
||||
echo_failure "${NAME} is not running"
|
||||
fi
|
||||
else
|
||||
echo_failure "${PIDFILE} does not exist"
|
||||
fi
|
||||
;;
|
||||
|
||||
restart)
|
||||
$0 stop && sleep 2 && $0 start
|
||||
;;
|
||||
|
||||
status)
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
pid=$(pidof -x ${NAME})
|
||||
if test "${pid}" != ""
|
||||
then
|
||||
echo_success "${NAME} is running"
|
||||
else
|
||||
echo_success "${NAME} is not running"
|
||||
fi
|
||||
else
|
||||
echo_success "${NAME} is not running"
|
||||
fi
|
||||
;;
|
||||
|
||||
reload)
|
||||
if [ -e ${PIDFILE} ]; then
|
||||
start-stop-daemon --stop --signal USR1 --quiet --pidfile ${PIDFILE} --name ${NAME}
|
||||
echo_success "${NAME} reloaded successfully"
|
||||
else
|
||||
echo_success "${PIDFILE} does not exist"
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 {start|stop|restart|reload|status}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
49
tools/engtools/hostdata-collectors/scripts/iostat.sh
Normal file
49
tools/engtools/hostdata-collectors/scripts/iostat.sh
Normal file
@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
# Usage: iostat.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
IOSTAT=$( which iostat 2>/dev/null )
|
||||
if [ $? -ne 0 ]; then
|
||||
print_separator
|
||||
WARNLOG "iostat not available"
|
||||
tools_cleanup 0
|
||||
fi
|
||||
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
((REP = REPEATS + 1))
|
||||
|
||||
# Execute tool for specified duration
|
||||
CMD="${IOSTAT} -k -x -t ${INTERVAL_SEC} ${REP}"
|
||||
#LOG "CMD: ${CMD}"
|
||||
${CMD}
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
547
tools/engtools/hostdata-collectors/scripts/linux_benchmark.sh
Normal file
547
tools/engtools/hostdata-collectors/scripts/linux_benchmark.sh
Normal file
@ -0,0 +1,547 @@
|
||||
#!/bin/bash
|
||||
|
||||
username="wrsroot"
|
||||
password="Li69nux*"
|
||||
test_duration="30"
|
||||
wait_duration="5"
|
||||
udp_find_0_frameloss="1"
|
||||
udp_max_iter="20"
|
||||
udp_granularity="100000"
|
||||
result_dir="/home/${username}/benchmark_results"
|
||||
summary_file="${result_dir}/benchmark_summary.xls"
|
||||
host=""
|
||||
remote=""
|
||||
controllers=()
|
||||
computes=()
|
||||
nodes=()
|
||||
max_compute_node="10"
|
||||
interfaces=("")
|
||||
# udp header total length: Ethernet header ( 14 ) + CRC ( 4 ) + IPv4 header ( 20 ) + UDP header ( 8 )
|
||||
udp_header_len="46"
|
||||
# icmp header total length: ICMP header ( 8 ) + IPv4 header ( 20 )
|
||||
icmp_header_len="28"
|
||||
frame_sizes=(64 128 256 512 1024 1280 1518)
|
||||
ssh_opt="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q"
|
||||
# ports used for different kind of traffics except hiprio. these are chosen randomly since they are not used
|
||||
# 8000 - storage; 8001 - migration; 8002 - default; 8003 - drbd
|
||||
controller_ports=(8000 8001 8002 8003)
|
||||
compute_ports=(8000 8001 8002)
|
||||
traffic_types=(storage migration default drbd)
|
||||
flow_ids=(1:20 1:30 1:40 1:50)
|
||||
|
||||
function exec_cmd ()
|
||||
{
|
||||
node="$1"
|
||||
cmd="$2"
|
||||
|
||||
if [[ "${node}" == *"${host}"* ]]; then
|
||||
echo "$(bash -c "${cmd}")"
|
||||
else
|
||||
echo "$(ssh ${ssh_opt} ${username}@${node} "${cmd}")"
|
||||
fi
|
||||
}
|
||||
|
||||
function iperf3_server_start ()
|
||||
{
|
||||
local server="$1"
|
||||
local result="$2"
|
||||
local port="$3"
|
||||
local cmd="iperf3 -s"
|
||||
|
||||
if [ "${port}" ]; then
|
||||
cmd="${cmd} -p ${port}"
|
||||
fi
|
||||
cmd="nohup ${cmd} > ${result} 2>&1 &"
|
||||
$(exec_cmd "${server}" "${cmd}")
|
||||
}
|
||||
|
||||
function iperf3_client_tcp_start ()
|
||||
{
|
||||
local result="${result_dir}/throughput"
|
||||
local cmd=""
|
||||
local client="$1"
|
||||
local server="$2"
|
||||
local port="$3"
|
||||
|
||||
cmd="iperf3 -t ${test_duration} -c $(get_ip_addr "${server}")"
|
||||
if [ "${port}" ]; then
|
||||
cmd="${cmd} -p ${port} -O ${wait_duration}"
|
||||
result="${result}_parallel_${port}"
|
||||
else
|
||||
result="${result}_tcp"
|
||||
if [[ "${server}" == *"infra"* ]]; then
|
||||
result="${result}_infra"
|
||||
fi
|
||||
fi
|
||||
$(exec_cmd "${client}" "${cmd} > ${result} 2>&1")
|
||||
}
|
||||
|
||||
function iperf3_client_udp_start ()
|
||||
{
|
||||
local result="${result_dir}/throughput_udp"
|
||||
local cmd=""
|
||||
local client="$1"
|
||||
local server="$2"
|
||||
local frame_size="$3"
|
||||
local bw="0"
|
||||
|
||||
if [ "${4}" ]; then
|
||||
bw="${4}"
|
||||
fi
|
||||
|
||||
cmd="iperf3 -u -t ${test_duration} -c $(get_ip_addr ${server})"
|
||||
if [ ${frame_size} ]; then
|
||||
cmd="${cmd} -l ${frame_size}"
|
||||
result="${result}_$[${frame_size}+${udp_header_len}]"
|
||||
fi
|
||||
|
||||
if [[ ${server} == *"infra"* ]]; then
|
||||
result="${result}_infra"
|
||||
fi
|
||||
|
||||
$(exec_cmd "${client}" "${cmd} -b ${bw} >> ${result} 2>&1" )
|
||||
}
|
||||
|
||||
function iperf3_stop ()
|
||||
{
|
||||
local node="$1"
|
||||
local cmd="pkill iperf3"
|
||||
$(exec_cmd "${node}" "${cmd}")
|
||||
}
|
||||
|
||||
function get_ip_addr ()
|
||||
{
|
||||
arp -a | grep -oP "(?<=$1 \()[^)]*" | head -n 1
|
||||
}
|
||||
|
||||
function throughput_tcp_test()
|
||||
{
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local interface_name="management"
|
||||
local interface_suffix=""
|
||||
local result_suffix=""
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_name="infrastructure"
|
||||
interface_suffix="-infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
local result_file="${result_dir}/throughput_tcp${result_suffix}"
|
||||
printf "Running TCP throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network..."
|
||||
iperf3_server_start ${nodes[$[${i}+1]]}${interface_suffix} ${result_file}
|
||||
iperf3_client_tcp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix}
|
||||
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
|
||||
result=$(exec_cmd "${nodes[${i}]}" "awk '/sender/ {print \$7 \" \" \$8}' ${result_file}")
|
||||
printf " Done (${result})\n"
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function throughput_udp_test ()
|
||||
{
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local interface_name="management"
|
||||
local interface_suffix=""
|
||||
local result_suffix=""
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_name="infrastructure"
|
||||
interface_suffix="-infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
echo "Running UDP throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network"
|
||||
for frame_size in "${frame_sizes[@]}"; do
|
||||
local max_bw="0"
|
||||
local min_bw="0"
|
||||
local cur_bw="0"
|
||||
local old_bw="0"
|
||||
local result=""
|
||||
local result_unit=""
|
||||
local frame_loss=""
|
||||
local max_result=""
|
||||
local max_result_unit=""
|
||||
local max_frame_loss=""
|
||||
local result_file="${result_dir}/throughput_udp_${frame_size}${result_suffix}"
|
||||
local iter="0"
|
||||
local diff=""
|
||||
printf "\tFrame size = ${frame_size}..."
|
||||
while true; do
|
||||
iperf3_server_start ${nodes[$[${i}+1]]}${interface_suffix} ${result_file}
|
||||
iperf3_client_udp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix} $[${frame_size}-${udp_header_len}] ${cur_bw}
|
||||
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
|
||||
result=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$7}' ${result_file} | tail -n1")
|
||||
result_unit=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$8}' ${result_file} | tail -n1")
|
||||
frame_loss=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$12}' ${result_file} | tail -n1 | tr -d '()%'")
|
||||
if [ "${udp_find_0_frameloss}" == "1" ]; then
|
||||
if [ "${iter}" -eq "0" ]; then
|
||||
max_result="${result}"
|
||||
max_result_unit="${result_unit}"
|
||||
max_frame_loss="${frame_loss}"
|
||||
fi
|
||||
if [ $(echo ${frame_loss} | grep e) ]; then
|
||||
frame_loss="$(echo ${frame_loss} | sed 's/e/*10^/g;s/ /*/' )"
|
||||
fi
|
||||
if [ "$(echo "${frame_loss} > 0" | bc -l)" -eq "1" ]; then
|
||||
max_bw="${result}"
|
||||
if [ "${result_unit}" == "Kbits/sec" ]; then
|
||||
max_bw="$(echo "(${max_bw} * 1000) / 1" | bc)"
|
||||
elif [ "${result_unit}" == "Mbits/sec" ]; then
|
||||
max_bw="$(echo "(${max_bw} * 1000000) / 1" | bc)"
|
||||
elif [ "${result_unit}" == "Gbits/sec" ]; then
|
||||
max_bw="$(echo "(${max_bw} * 1000000000) / 1" | bc)"
|
||||
fi
|
||||
else
|
||||
if [ "${iter}" -eq "0" ]; then
|
||||
break
|
||||
else
|
||||
min_bw="${result}"
|
||||
if [ "${result_unit}" == "Kbits/sec" ]; then
|
||||
min_bw="$(echo "(${min_bw} * 1000) / 1" | bc)"
|
||||
elif [ "${result_unit}" == "Mbits/sec" ]; then
|
||||
min_bw="$(echo "(${min_bw} * 1000000) / 1" | bc)"
|
||||
elif [ "${result_unit}" == "Gbits/sec" ]; then
|
||||
min_bw="$(echo "(${min_bw} * 1000000000) / 1" | bc)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
old_bw="${cur_bw}"
|
||||
cur_bw="$[(${max_bw} + ${min_bw}) / 2]"
|
||||
diff="$(echo "$[${cur_bw} - ${old_bw}]" | tr -d '-')"
|
||||
#break
|
||||
((iter++))
|
||||
if [ "${diff}" -lt "${udp_granularity}" ]; then
|
||||
break
|
||||
fi
|
||||
if [ "${udp_max_iter}" -ne "0" ] && [ "${iter}" -ge "${udp_max_iter}" ]; then
|
||||
break
|
||||
fi
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "${udp_find_0_frameloss}" == "1" ]; then
|
||||
printf " Done (%s %s @ %s%% & %s %s @ %s%%)\n" "${max_result}" "${max_result_unit}" "${max_frame_loss}" "${result}" "${result_unit}" "${frame_loss}"
|
||||
else
|
||||
printf " Done (%s %s @ %s%%)\n" "${result}" "${result_unit}" "${frame_loss}"
|
||||
fi
|
||||
done
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function throughput_parallel_test ()
|
||||
{
|
||||
local dev=""
|
||||
local ip_addr=""
|
||||
local interface_name=""
|
||||
local interface_suffix=""
|
||||
local result_file="${result_dir}/throughput_parallel"
|
||||
# get device name of the interface
|
||||
if [ "${#interfaces[@]}" -gt "1" ]; then
|
||||
interface_name="infrastructure"
|
||||
interface_suffix="-infra"
|
||||
ip_addr=$(ping -c1 ${host}-infra | awk -F'[()]' '/PING/{print $2}')
|
||||
else
|
||||
interface_name="management"
|
||||
ip_addr=$(ping -c1 ${host} | awk -F'[()]' '/PING/{print $2}')
|
||||
fi
|
||||
dev=$(ifconfig | grep -B1 "inet ${ip_addr}" | awk '$1!="inet" && $1!="--" {print $1}')
|
||||
|
||||
|
||||
# set all the filters
|
||||
for node in ${nodes[@]}; do
|
||||
local ports=("${controller_ports[@]}")
|
||||
if [[ "${node}" == *"compute"* ]]; then
|
||||
ports=("${compute_ports[@]}")
|
||||
fi
|
||||
for i in $(seq 0 $[${#ports[@]} - 1]); do
|
||||
if [ ${traffic_types[i]} != "default" ]; then
|
||||
tc_dport="tc filter add dev ${dev} protocol ip parent 1:0 prio 1 u32 match ip protocol 6 0xff match ip dport ${ports[i]} 0xffff flowid ${flow_ids[i]}"
|
||||
tc_sport="tc filter add dev ${dev} protocol ip parent 1:0 prio 1 u32 match ip protocol 6 0xff match ip sport ${ports[i]} 0xffff flowid ${flow_ids[i]}"
|
||||
$(exec_cmd "${node}" "echo ${password} | sudo -S bash -c '${tc_dport}; ${tc_sport}' > /dev/null 2>&1")
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# run the tests
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
local ports=("${controller_ports[@]}")
|
||||
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
|
||||
ports=("${compute_ports[@]}")
|
||||
fi
|
||||
printf "Running parallel throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network..."
|
||||
|
||||
# start the servers
|
||||
for port in "${ports[@]}"; do
|
||||
iperf3_server_start "${nodes[$[${i}+1]]}${interface_suffix}" "${result_file}_${port}" "${port}"
|
||||
done
|
||||
#start the clients
|
||||
for port in "${controller_ports[@]}"; do
|
||||
iperf3_client_tcp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix} ${port} &
|
||||
done
|
||||
sleep $[${test_duration} + ${wait_duration} + 1]
|
||||
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
|
||||
printf " Done\n"
|
||||
|
||||
# get results
|
||||
for j in $(seq 0 $[${#ports[@]} - 1]); do
|
||||
result=$(exec_cmd "${nodes[${i}]}" "awk '/sender/ {print \$7 \" \" \$8}' ${result_file}_${ports[${j}]}")
|
||||
printf "\t${traffic_types[$j]} = ${result}\n"
|
||||
done
|
||||
done
|
||||
|
||||
# remove all the filters
|
||||
for node in ${nodes[@]}; do
|
||||
local handles=()
|
||||
local ports=("${controller_ports[@]}")
|
||||
if [[ "${node}" == *"compute"* ]]; then
|
||||
ports=("${compute_ports[@]}")
|
||||
fi
|
||||
handles=($(exec_cmd "${node}" "/usr/sbin/tc filter show dev ${dev} | awk '/filter/ {print \$10}' | tail -n $[(${#ports[@]} - 1) * 2 ]"))
|
||||
for handle in "${handles[@]}"; do
|
||||
$(exec_cmd "${node}" "echo ${password} | sudo -S /usr/sbin/tc filter delete dev ${dev} parent 1: handle ${handle} prio 1 u32 > /dev/null 2>&1")
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function latency_test ()
|
||||
{
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local interface_name="management"
|
||||
local interface_suffix=""
|
||||
local result_suffix=""
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_name="infrastructure"
|
||||
interface_suffix="-infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
echo "Running latency test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network"
|
||||
for frame_size in "${frame_sizes[@]}"; do
|
||||
local result_file="${result_dir}/latency_${frame_size}${result_suffix}"
|
||||
printf "\tFrame size = ${frame_size}..."
|
||||
$(exec_cmd "${nodes[${i}]}" "ping -s $[${frame_size}-8] -w ${test_duration} -i 0.2 ${nodes[$[${i}+1]]}${interface_suffix} > ${result_file} 2>&1")
|
||||
result=$(exec_cmd "${nodes[${i}]}" "awk '/rtt/ {print \$2 \" = \" \$4 \" \" \$5}' ${result_file}")
|
||||
printf " Done (%s)\n" "${result}"
|
||||
done
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function setup ()
|
||||
{
|
||||
for node in ${nodes[@]}; do
|
||||
iperf3_stop "${node}"
|
||||
$(exec_cmd "${node}" "rm -rf ${result_dir}; mkdir -p ${result_dir}")
|
||||
done
|
||||
}
|
||||
|
||||
function get_remote_results ()
|
||||
{
|
||||
for node in ${nodes[@]}; do
|
||||
if [ "${node}" != "${host}" ]; then
|
||||
mkdir ${result_dir}/${node}
|
||||
scp ${ssh_opt} ${username}@${node}:${result_dir}/* ${result_dir}/${node} > /dev/null 2>&1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function get_interface_info ()
|
||||
{
|
||||
local dev=""
|
||||
local ip_addr=""
|
||||
printf "Network interfaces info\n" >> ${summary_file}
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local interface_suffix=""
|
||||
local interface_name="management"
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_name="infrastructure"
|
||||
interface_suffix="-infra"
|
||||
fi
|
||||
ip_addr=$(ping -c1 ${host}${interface_suffix} | awk -F'[()]' '/PING/{print $2}')
|
||||
dev=$(ifconfig | grep -B1 "inet ${ip_addr}" | awk '$1!="inet" && $1!="--" {print $1}')
|
||||
printf "%s network interface\n" "${interface_name}" >> ${summary_file}
|
||||
echo ${password} | sudo -S ethtool ${dev} >> ${summary_file}
|
||||
done
|
||||
}
|
||||
|
||||
function generate_summary ()
|
||||
{
|
||||
local header=""
|
||||
local result=""
|
||||
local result_file=""
|
||||
|
||||
printf "Summary\n\n" > ${summary_file}
|
||||
printf "Throughput TCP\n" >> ${summary_file}
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local node_type="controller"
|
||||
local interface_type="mgmt"
|
||||
local result_suffix=""
|
||||
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
|
||||
node_type="compute"
|
||||
fi
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_type="infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
header="${header},${node_type}'s ${interface_type}"
|
||||
result_file="${result_dir}"
|
||||
if [ ${node_type} == "compute" ]; then
|
||||
result_file="${result_file}/${nodes[${i}]}"
|
||||
fi
|
||||
result_file="${result_file}/throughput_tcp${result_suffix}"
|
||||
result="${result},$(awk '/sender/ {print $7 " " $8}' ${result_file})"
|
||||
done
|
||||
done
|
||||
printf "%s\n%s\n\n" "${header}" "${result}" >> ${summary_file}
|
||||
|
||||
printf "Throughput UDP\n" >> ${summary_file}
|
||||
header=",frame,max throughput,max frameloss"
|
||||
if [ "${udp_find_0_frameloss}" == "1" ]; then
|
||||
header="${header},final throughput, final frameloss"
|
||||
fi
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local node_type="controller"
|
||||
local interface_type="mgmt"
|
||||
local result_suffix=""
|
||||
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
|
||||
node_type="compute"
|
||||
fi
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_type="infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
printf "%s's %s\n%s\n" "${node_type}" "${interface_type}" "${header}" >> ${summary_file}
|
||||
result_file=${result_dir}
|
||||
if [ ${node_type} == "compute" ]; then
|
||||
result_file="${result_file}/${nodes[${i}]}"
|
||||
fi
|
||||
for frame in ${frame_sizes[@]}; do
|
||||
result="${frame},$(awk '/%/ {print $7 " " $8}' ${result_file}/throughput_udp_${frame}${result_suffix} | head -n1),$(awk '/%/ {print $12}' ${result_file}/throughput_udp_${frame}${result_suffix} | head -n1 | tr -d '()')"
|
||||
if [ "${udp_find_0_frameloss}" == "1" ]; then
|
||||
result="${result},$(awk '/%/ {print $7 " " $8}' ${result_file}/throughput_udp_${frame}${result_suffix} | tail -n1),$(awk '/%/ {print $12}' ${result_file}/throughput_udp_${frame}${result_suffix} | tail -n1 | tr -d '()')"
|
||||
fi
|
||||
printf ",%s\n" "${result}" >> ${summary_file}
|
||||
done
|
||||
printf "\n" >> ${summary_file}
|
||||
done
|
||||
done
|
||||
|
||||
printf "Parallel throughput result\n" >> ${summary_file}
|
||||
header=",Node type"
|
||||
for traffic_type in "${traffic_types[@]}"; do
|
||||
header="${header},${traffic_type}"
|
||||
done
|
||||
printf "%s\n" "${header}" >> ${summary_file}
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
local node_type="controller"
|
||||
local ports=("${controller_ports[@]}")
|
||||
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
|
||||
node_type="compute"
|
||||
fi
|
||||
result_file=${result_dir}
|
||||
if [ ${node_type} == "compute" ]; then
|
||||
ports=("${compute_ports[@]}")
|
||||
result_file="${result_file}/${nodes[${i}]}"
|
||||
fi
|
||||
result=",${node_type}"
|
||||
for port in "${ports[@]}"; do
|
||||
result="${result},$(awk '/sender/ {print $7 " " $8}' ${result_file}/throughput_parallel_${port})"
|
||||
done
|
||||
printf "%s\n" "${result}" >> ${summary_file}
|
||||
done
|
||||
|
||||
printf "\nLatency result in ms\n" >> ${summary_file}
|
||||
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
|
||||
for interface in "${interfaces[@]}"; do
|
||||
local node_type="controller"
|
||||
local interface_type="mgmt"
|
||||
local result_suffix=""
|
||||
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
|
||||
node_type="compute"
|
||||
fi
|
||||
if [ "${interface}" == "infra" ]; then
|
||||
interface_type="infra"
|
||||
result_suffix="_infra"
|
||||
fi
|
||||
printf "%s's %s network\n" "${node_type}" "${interface_type}" >> ${summary_file}
|
||||
result_file=${result_dir}
|
||||
if [ ${node_type} == "compute" ]; then
|
||||
result_file="${result_file}/${nodes[${i}]}"
|
||||
fi
|
||||
result_file="${result_file}/latency"
|
||||
printf ",frame size,%s\n" "$(awk '/rtt/ {print $2}' ${result_file}_${frame_sizes}${result_suffix} | tr '/' ',' )" >> ${summary_file}
|
||||
for frame_size in "${frame_sizes[@]}"; do
|
||||
printf ",%s,%s\n" "${frame_size}" "$(awk '/rtt/ {print $4}' ${result_file}_${frame_size}${result_suffix} | tr '/' ',' )" >> ${summary_file}
|
||||
done
|
||||
|
||||
printf "latency distribution\n" >> ${summary_file}
|
||||
printf ",frame size" >> ${summary_file}
|
||||
for (( j = 1; j < "20" ; j+=1 )); do
|
||||
printf ",%s" "$(echo "scale=3;${j}/100" | bc | awk '{printf "%.3f", $0}')" >> ${summary_file}
|
||||
done
|
||||
printf "\n" >> ${summary_file}
|
||||
for frame_size in "${frame_sizes[@]}"; do
|
||||
printf ",%s" "${frame_size}" >> ${summary_file}
|
||||
for (( j = 1; j < "20" ; j+=1 )); do
|
||||
printf ",%s" "$(grep -c "time=$(echo "scale=2;${j}/100" | bc | awk '{printf "%.2f", $0}')" ${result_file}_${frame_size}${result_suffix})" >> ${summary_file}
|
||||
done
|
||||
printf "\n" >> ${summary_file}
|
||||
done
|
||||
printf "\n" >> ${summary_file}
|
||||
done
|
||||
done
|
||||
|
||||
get_interface_info
|
||||
}
|
||||
|
||||
echo "Starting linux interface benchmark test. ($(date))"
|
||||
|
||||
# find the nodes to test
|
||||
host=${HOSTNAME}
|
||||
if [ "${host}" == "controller-1" ]; then
|
||||
remote="controller-0"
|
||||
else
|
||||
remote="controller-1"
|
||||
fi
|
||||
|
||||
# at least another controller needs to be reachable
|
||||
ping -c1 ${remote} > /dev/null 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
controllers=(${host} ${remote})
|
||||
nodes+=("${controllers[@]}")
|
||||
else
|
||||
echo "Stopping test as ${remote} is not reachable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check if infrastructure interface is provisioned
|
||||
ping -c1 "${remote}-infra" > /dev/null 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Infrastructure network is provisioned"
|
||||
interfaces+=("infra")
|
||||
fi
|
||||
|
||||
# check if there are any compute nodes
|
||||
for i in $(seq 0 $[${max_compute_node} - 1]); do
|
||||
ping -c1 compute-${i} > /dev/null 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
computes+=("compute-${i}")
|
||||
if [ ${#computes[@]} -ge "2" ]; then
|
||||
nodes+=("${computes[@]}")
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
setup
|
||||
throughput_tcp_test
|
||||
throughput_udp_test
|
||||
throughput_parallel_test
|
||||
latency_test
|
||||
get_remote_results
|
||||
generate_summary
|
||||
echo "Linux interface benchmark test finished. ($(date))"
|
||||
|
1578
tools/engtools/hostdata-collectors/scripts/live_stream.py
Normal file
1578
tools/engtools/hostdata-collectors/scripts/live_stream.py
Normal file
File diff suppressed because it is too large
Load Diff
112
tools/engtools/hostdata-collectors/scripts/memstats.sh
Normal file
112
tools/engtools/hostdata-collectors/scripts/memstats.sh
Normal file
@ -0,0 +1,112 @@
|
||||
#!/bin/bash
|
||||
# Usage: memstats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
PAGE_SIZE=$(getconf PAGE_SIZE)
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print key networking device statistics
|
||||
function print_memory()
|
||||
{
|
||||
# Configuration for netcmds
|
||||
MEMINFO=/proc/meminfo
|
||||
NODEINFO=/sys/devices/system/node/node?/meminfo
|
||||
BUDDYINFO=/proc/buddyinfo
|
||||
SLABINFO=/proc/slabinfo
|
||||
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
${ECHO} "# ${MEMINFO}"
|
||||
${CAT} ${MEMINFO}
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# ${NODEINFO}"
|
||||
${CAT} ${NODEINFO}
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# ${BUDDYINFO}"
|
||||
${CAT} ${BUDDYINFO}
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# PSS"
|
||||
cat /proc/*/smaps 2>/dev/null | \
|
||||
awk '/^Pss:/ {a += $2;} END {printf "%d MiB\n", a/1024.0;}'
|
||||
${ECHO}
|
||||
|
||||
# use old slabinfo format (i.e. slub not enabled in kernel)
|
||||
${ECHO} "# ${SLABINFO}"
|
||||
${CAT} ${SLABINFO} | \
|
||||
awk -v page_size_B=${PAGE_SIZE} '
|
||||
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
|
||||
(NF == 17) {
|
||||
gsub(/[<>]/, "");
|
||||
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
|
||||
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
|
||||
}
|
||||
(NF == 16) {
|
||||
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
|
||||
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
|
||||
TOT_KiB += KiB;
|
||||
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
|
||||
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
|
||||
}
|
||||
END {
|
||||
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
|
||||
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
|
||||
}
|
||||
' 2>/dev/null
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "# disk usage: rootfs, tmpfs"
|
||||
cmd='df -h -H -T --local -t rootfs -t tmpfs'
|
||||
${ECHO} "Disk space usage rootfs,tmpfs (SI):"
|
||||
${ECHO} "${cmd}"
|
||||
${cmd}
|
||||
${ECHO}
|
||||
|
||||
CMD='ps -e -o ppid,pid,nlwp,rss:10,vsz:10,cmd --sort=-rss'
|
||||
${ECHO} "# ${CMD}"
|
||||
${CMD}
|
||||
${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_memory
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_memory
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
66
tools/engtools/hostdata-collectors/scripts/netstats.sh
Normal file
66
tools/engtools/hostdata-collectors/scripts/netstats.sh
Normal file
@ -0,0 +1,66 @@
|
||||
#!/bin/bash
|
||||
# Usage: netstats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print key networking device statistics
|
||||
function print_netcmds()
|
||||
{
|
||||
# Configuration for netcmds
|
||||
DEV=/proc/net/dev
|
||||
NETSTAT=/proc/net/netstat
|
||||
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
for net in \
|
||||
${DEV} ${NETSTAT}
|
||||
do
|
||||
if [ -e "${net}" ]
|
||||
then
|
||||
${ECHO} "# ${net}"
|
||||
${CAT} ${net}
|
||||
${ECHO}
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_netcmds
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_netcmds
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
141
tools/engtools/hostdata-collectors/scripts/postgres.sh
Normal file
141
tools/engtools/hostdata-collectors/scripts/postgres.sh
Normal file
@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
# Usage: postgres.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print key networking device statistics
|
||||
function print_postgres()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
# postgressql command: set user, disable pagination, and be quiet
|
||||
PSQL="sudo -u postgres psql --pset pager=off -q"
|
||||
|
||||
# List postgres databases
|
||||
db_list=( $(${PSQL} -t -c "SELECT datname FROM pg_database WHERE datistemplate = false;") )
|
||||
${ECHO} "# postgres databases"
|
||||
echo "db_list = ${db_list[@]}"
|
||||
${ECHO}
|
||||
|
||||
# List sizes of all postgres databases (similar to "\l+")
|
||||
${ECHO} "# postgres database sizes"
|
||||
${PSQL} -c "
|
||||
SELECT
|
||||
pg_database.datname,
|
||||
pg_database_size(pg_database.datname),
|
||||
pg_size_pretty(pg_database_size(pg_database.datname))
|
||||
FROM pg_database
|
||||
ORDER BY pg_database_size DESC;
|
||||
"
|
||||
|
||||
# For each database, list tables and their sizes (similar to "\dt+")
|
||||
for db in "${db_list[@]}"
|
||||
do
|
||||
${ECHO} "# postgres database: ${db}"
|
||||
${PSQL} -d ${db} -c "
|
||||
SELECT
|
||||
table_schema,
|
||||
table_name,
|
||||
pg_size_pretty(table_size) AS table_size,
|
||||
pg_size_pretty(indexes_size) AS indexes_size,
|
||||
pg_size_pretty(total_size) AS total_size,
|
||||
live_tuples,
|
||||
dead_tuples
|
||||
FROM (
|
||||
SELECT
|
||||
table_schema,
|
||||
table_name,
|
||||
pg_table_size(table_name) AS table_size,
|
||||
pg_indexes_size(table_name) AS indexes_size,
|
||||
pg_total_relation_size(table_name) AS total_size,
|
||||
pg_stat_get_live_tuples(table_name::regclass) AS live_tuples,
|
||||
pg_stat_get_dead_tuples(table_name::regclass) AS dead_tuples
|
||||
FROM (
|
||||
SELECT
|
||||
table_schema,
|
||||
table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema='public'
|
||||
AND table_type='BASE TABLE'
|
||||
) AS all_tables
|
||||
ORDER BY total_size DESC
|
||||
) AS pretty_sizes;
|
||||
"
|
||||
|
||||
${ECHO} "# postgres database vacuum: ${db}"
|
||||
${PSQL} -d ${db} -c "
|
||||
SELECT
|
||||
relname,
|
||||
n_live_tup,
|
||||
n_dead_tup,
|
||||
last_vacuum,
|
||||
last_autovacuum,
|
||||
last_analyze,
|
||||
last_autoanalyze
|
||||
FROM pg_stat_user_tables;
|
||||
"
|
||||
done
|
||||
|
||||
# Specific table counts (This is very SLOW, look at "live tuples" instead)
|
||||
# Number of keystone tokens
|
||||
#${ECHO} "# keystone token count"
|
||||
|
||||
# Number of postgres connections
|
||||
${ECHO} "# postgres database connections"
|
||||
CONN=$(ps -C postgres -o cmd= | wc -l)
|
||||
CONN_T=$(ps -C postgres -o cmd= | awk '/postgres: / {print $3}' | awk '{for(i=1;i<=NF;i++) a[$i]++} END {for(k in a) print k, a[k]}' | sort -k 2 -nr )
|
||||
${ECHO} "connections total = ${CONN}"
|
||||
${ECHO}
|
||||
${ECHO} "connections breakdown:"
|
||||
${ECHO} "${CONN_T}"
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "connections breakdown (query):"
|
||||
${PSQL} -c "SELECT datname,state,count(*) from pg_stat_activity group by datname,state;"
|
||||
${ECHO}
|
||||
|
||||
${ECHO} "connections idle age:"
|
||||
${PSQL} -c "SELECT datname,age(now(),state_change) from pg_stat_activity where state='idle';"
|
||||
${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_postgres
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_postgres
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
85
tools/engtools/hostdata-collectors/scripts/rabbitmq.sh
Normal file
85
tools/engtools/hostdata-collectors/scripts/rabbitmq.sh
Normal file
@ -0,0 +1,85 @@
|
||||
#!/bin/bash
|
||||
# Usage: rabbitmq.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
#Need this workaround
|
||||
MQOPT="-n rabbit@localhost"
|
||||
# Print key networking device statistics
|
||||
function print_rabbitmq()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
# IMPORTANT:
|
||||
# - Difficulty getting rabbitmqctl to work from init.d script;
|
||||
# apparently it requires a psuedo-TTY, which is something you don't have
|
||||
# until post-init.
|
||||
# - WORKAROUND: run command using 'sudo', even if you are 'root'
|
||||
|
||||
# Dump various rabbitmq related stats
|
||||
MQ_STATUS="rabbitmqctl ${MQOPT} status"
|
||||
${ECHO} "# ${MQ_STATUS}"
|
||||
sudo ${MQ_STATUS} | grep -e '{memory' -A30
|
||||
${ECHO}
|
||||
|
||||
# THe following is useful in diagnosing rabbit memory leaks
|
||||
# when end-users do not drain their queues (eg, due to RPC timeout issues, etc)
|
||||
MQ_QUEUES="rabbitmqctl ${MQOPT} list_queues messages name pid messages_ready messages_unacknowledged memory consumers"
|
||||
${ECHO} "# ${MQ_QUEUES}"
|
||||
sudo ${MQ_QUEUES}
|
||||
${ECHO}
|
||||
|
||||
num_queues=$(sudo rabbitmqctl ${MQOPT} list_queues | wc -l); ((num_queues-=2))
|
||||
num_bindings=$(sudo rabbitmqctl ${MQOPT} list_bindings | wc -l); ((num_bindings-=2))
|
||||
num_exchanges=$(sudo rabbitmqctl ${MQOPT} list_exchanges | wc -l); ((num_exchanges-=2))
|
||||
num_connections=$(sudo rabbitmqctl ${MQOPT} list_connections | wc -l); ((num_connections-=2))
|
||||
num_channels=$(sudo rabbitmqctl ${MQOPT} list_channels | wc -l); ((num_channels-=2))
|
||||
arr=($(sudo rabbitmqctl ${MQOPT} list_queues messages consumers memory | \
|
||||
awk '/^[0-9]/ {a+=$1; b+=$2; c+=$3} END {print a, b, c}'))
|
||||
messages=${arr[0]}; consumers=${arr[1]}; memory=${arr[2]}
|
||||
printf "%6s %8s %9s %11s %8s %8s %9s %10s\n" \
|
||||
"queues" "bindings" "exchanges" "connections" "channels" "messages" "consumers" "memory"
|
||||
printf "%6d %8d %9d %11d %8d %8d %9d %10d\n" \
|
||||
$num_queues $num_bindings $num_exchanges $num_connections $num_channels $messages $consumers $memory
|
||||
${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_rabbitmq
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_rabbitmq
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
# Purpose:
|
||||
# bzip2 compress engtools data on all nodes.
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
if [ $UID -eq 0 ]; then
|
||||
ERRLOG "Do not start $0 using sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# environment for system commands
|
||||
source /etc/nova/openrc
|
||||
|
||||
declare -a CONTROLLER
|
||||
declare -a COMPUTE
|
||||
declare -a STORAGE
|
||||
CONTROLLER=( $(system host-list | awk '(/controller/) {print $4;}') )
|
||||
COMPUTE=( $(system host-list | awk '(/compute/) {print $4;}') )
|
||||
STORAGE=( $(system host-list | awk '(/storage/) {print $4;}') )
|
||||
|
||||
LOG "Remote bzip2 engtools data on all blades:"
|
||||
for blade in ${CONTROLLER[@]}; do
|
||||
ping -c1 ${blade} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "bzip2 on $blade:"
|
||||
ssh -q -t -o StrictHostKeyChecking=no \
|
||||
${blade} sudo bzip2 /scratch/syseng_data/${blade}/*
|
||||
else
|
||||
WARNLOG "cannot ping: ${blade}"
|
||||
fi
|
||||
done
|
||||
for blade in ${STORAGE[@]} ${COMPUTE[@]} ; do
|
||||
ping -c1 ${blade} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "bzip2 on $blade:"
|
||||
ssh -q -t -o StrictHostKeyChecking=no \
|
||||
${blade} sudo bzip2 /tmp/syseng_data/${blade}/*
|
||||
else
|
||||
WARNLOG "cannot ping: ${blade}"
|
||||
fi
|
||||
done
|
||||
LOG "done"
|
||||
|
||||
exit 0
|
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Purpose:
|
||||
# Remote start engtools on all blades.
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
if [ $UID -eq 0 ]; then
|
||||
ERRLOG "Do not start $0 using sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# environment for system commands
|
||||
source /etc/nova/openrc
|
||||
|
||||
declare -a BLADES
|
||||
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
|
||||
|
||||
LOG "Remote start engtools on all blades:"
|
||||
for blade in ${BLADES[@]}; do
|
||||
if [ "${blade}" == "${HOSTNAME}" ]; then
|
||||
LOG "start on $blade:"
|
||||
sudo service collect-engtools.sh start
|
||||
else
|
||||
ping -c1 ${blade} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "start on $blade:"
|
||||
ssh -q -t -o StrictHostKeyChecking=no \
|
||||
${blade} sudo service collect-engtools.sh start
|
||||
else
|
||||
WARNLOG "cannot ping: ${blade}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
LOG "done"
|
||||
|
||||
exit 0
|
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Purpose:
|
||||
# Remote stop engtools on all blades.
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
if [ $UID -eq 0 ]; then
|
||||
ERRLOG "Do not start $0 using sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# environment for system commands
|
||||
source /etc/nova/openrc
|
||||
|
||||
declare -a BLADES
|
||||
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
|
||||
|
||||
LOG "Remote stop engtools on all blades:"
|
||||
for blade in ${BLADES[@]}; do
|
||||
if [ "${blade}" == "${HOSTNAME}" ]; then
|
||||
LOG "stop on $blade:"
|
||||
sudo service collect-engtools.sh stop
|
||||
else
|
||||
ping -c1 ${blade} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "stop on $blade:"
|
||||
ssh -q -t -o StrictHostKeyChecking=no \
|
||||
${blade} sudo service collect-engtools.sh stop
|
||||
else
|
||||
WARNLOG "cannot ping: ${blade}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
LOG "done"
|
||||
|
||||
exit 0
|
@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
# Purpose:
|
||||
# rsync data from all nodes to backup location.
|
||||
|
||||
# Define common utility functions
|
||||
TOOLBIN=$(dirname $0)
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
if [ $UID -eq 0 ]; then
|
||||
ERRLOG "Do not start $0 using sudo/root access."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# environment for system commands
|
||||
source /etc/nova/openrc
|
||||
|
||||
declare -a BLADES
|
||||
declare -a CONTROLLER
|
||||
declare -a STORAGE
|
||||
declare -a COMPUTE
|
||||
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
|
||||
CONTROLLER=( $(system host-list | awk '(/controller/) {print $4;}') )
|
||||
COMPUTE=( $(system host-list | awk '(/compute/) {print $4;}') )
|
||||
STORAGE=( $(system host-list | awk '(/storage/) {print $4;}') )
|
||||
|
||||
DEST=/opt/backups/syseng_data/
|
||||
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
|
||||
LOG "rsync DEST=${DEST}"
|
||||
else
|
||||
LOG "*ERROR* only run this on controller"
|
||||
exit 1
|
||||
fi
|
||||
sudo mkdir -p ${DEST}
|
||||
|
||||
# rsync options
|
||||
USER=wrsroot
|
||||
RSYNC_OPT="-r -l --safe-links -h -P --stats --exclude=*.pyc"
|
||||
|
||||
# Rsync data from multiple locations
|
||||
LOG "rsync engtools data from all blades:"
|
||||
|
||||
# controllers
|
||||
SRC=/scratch/syseng_data/
|
||||
DEST=/opt/backups/syseng_data/
|
||||
for HOST in ${CONTROLLER[@]}
|
||||
do
|
||||
ping -c1 ${HOST} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}"
|
||||
sudo rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}
|
||||
else
|
||||
WARNLOG "cannot ping: ${HOST}"
|
||||
fi
|
||||
done
|
||||
|
||||
# computes & storage
|
||||
SRC=/tmp/syseng_data/
|
||||
DEST=/opt/backups/syseng_data/
|
||||
for HOST in ${STORAGE[@]} ${COMPUTE[@]}
|
||||
do
|
||||
ping -c1 ${HOST} 1>/dev/null 2>/dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
LOG "rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}"
|
||||
sudo rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}
|
||||
else
|
||||
WARNLOG "cannot ping: ${HOST}"
|
||||
fi
|
||||
done
|
||||
LOG 'done'
|
||||
|
||||
exit 0
|
23
tools/engtools/hostdata-collectors/scripts/slab.sh
Normal file
23
tools/engtools/hostdata-collectors/scripts/slab.sh
Normal file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
PAGE_SIZE=$(getconf PAGE_SIZE)
|
||||
cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} '
|
||||
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
|
||||
(NF == 17) {
|
||||
gsub(/[<>]/, "");
|
||||
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
|
||||
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
|
||||
}
|
||||
(NF == 16) {
|
||||
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
|
||||
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
|
||||
TOT_KiB += KiB;
|
||||
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
|
||||
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
|
||||
}
|
||||
END {
|
||||
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
|
||||
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
|
||||
}
|
||||
' 2>/dev/null
|
||||
|
||||
exit 0
|
50
tools/engtools/hostdata-collectors/scripts/ticker.sh
Normal file
50
tools/engtools/hostdata-collectors/scripts/ticker.sh
Normal file
@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# Usage: ticker.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
((REP_LOG = 10 * 60 / INTERVAL_SEC))
|
||||
|
||||
LOG_NOCR "collecting "
|
||||
t=0
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
((t++))
|
||||
sleep ${INTERVAL_SEC}
|
||||
if [ ${t} -ge ${REP_LOG} ]; then
|
||||
t=0
|
||||
echo "."
|
||||
LOG_NOCR "collecting "
|
||||
else
|
||||
echo -n "."
|
||||
fi
|
||||
done
|
||||
echo "."
|
||||
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
43
tools/engtools/hostdata-collectors/scripts/top.sh
Normal file
43
tools/engtools/hostdata-collectors/scripts/top.sh
Normal file
@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
# Usage: top.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
((REP = REPEATS + 1))
|
||||
|
||||
# Execute tool for specified duration
|
||||
CMD="top -b -c -H -n ${REP} -d ${INTERVAL_SEC}"
|
||||
#LOG "CMD: ${CMD}"
|
||||
${CMD}
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
68
tools/engtools/hostdata-collectors/scripts/vswitch.sh
Normal file
68
tools/engtools/hostdata-collectors/scripts/vswitch.sh
Normal file
@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
# Usage: vswitch.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
|
||||
TOOLBIN=$(dirname $0)
|
||||
|
||||
# Initialize tools environment variables, and define common utility functions
|
||||
. ${TOOLBIN}/engtools_util.sh
|
||||
tools_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "FATAL, tools_init - could not setup environment"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# Enable use of INTERVAL_SEC sample interval
|
||||
OPT_USE_INTERVALS=1
|
||||
|
||||
# Print key networking device statistics
|
||||
function print_vswitch()
|
||||
{
|
||||
print_separator
|
||||
TOOL_HIRES_TIME
|
||||
|
||||
cmd='vshell engine-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell engine-stats-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell port-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell port-stats-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell network-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell network-stats-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell interface-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
cmd='vshell interface-stats-list'
|
||||
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program:
|
||||
#-------------------------------------------------------------------------------
|
||||
# Parse input options
|
||||
tools_parse_options "${@}"
|
||||
|
||||
# Set affinity of current script
|
||||
CPULIST=""
|
||||
set_affinity ${CPULIST}
|
||||
|
||||
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
|
||||
|
||||
# Print tools generic tools header
|
||||
tools_header
|
||||
|
||||
# Calculate number of sample repeats based on overall interval and sampling interval
|
||||
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
|
||||
|
||||
for ((rep=1; rep <= REPEATS ; rep++))
|
||||
do
|
||||
print_vswitch
|
||||
sleep ${INTERVAL_SEC}
|
||||
done
|
||||
print_vswitch
|
||||
LOG "done"
|
||||
|
||||
# normal program exit
|
||||
tools_cleanup 0
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user