Further enhancement of API stats collection

Decompose gunicorn related stats to individual service stats. Add new
and missing services to config file.

Functional tests completed by Mathieu Godin.

Change-Id: Ifcb81aa82a57c2a414fb99d43afc856a07d3846d
Story: 2002895
Task: 22858
Signed-off-by: Tee Ngo <tee.ngo@windriver.com>
This commit is contained in:
Mathieu Godin 2018-07-09 17:33:44 -04:00 committed by Tee Ngo
parent 8071ead500
commit 6bcaec220a
2 changed files with 39 additions and 25 deletions

View File

@ -93,6 +93,7 @@ EXCLUDE_LIST=python python2 bash perl sudo init
DB_PORT_NUMBER=5432
RABBIT_PORT_NUMBER=5672
# The api stats data structure has three fields: the name displayed in ps -ef, the name displayed in lsof -Pn -i tcp and the specific api port of the service.
[ApiStatsServices]
API_STATS_STRUCTURE=gunicorn;gunicorn;5000|sysinv-conductor;sysinv-co ;|neutron-server;neutron-s;9696|nova-conductor;nova-cond ;|sysinv-agent;sysinv-ag;|sysinv-api;sysinv-ap;6385|nova-api;nova-api ;18774|cinder-api;cinder-a;8776|glance-api;glance-a;9292|ceilometer;ceilomete;8777|vim;nfv-vim;4545|heat-api;heat-a;8004|heat-engine;heat-e;8004
API_STATS_STRUCTURE=ironic-conductor;ironic-co;|ironic-api;ironic-ap;6485|radosgw-swift;radosgw;8|magnum-conductor;magnum-co;|magnum-api;magnum-ap;9511|murano-api;murano-ap;8082|murano-engine;murano-en;|keystone-public;gunicorn;5000|openstack_dashboard.wsgi;gunicorn;8080|gnocchi-api;gunicorn;8041|aodh-api;gunicorn;8042|panko-api;gunicorn;8977|sysinv-conductor;sysinv-co ;|neutron-server;neutron-s;9696|nova-conductor;nova-cond ;|sysinv-agent;sysinv-ag;|sysinv-api;sysinv-ap;6385|nova-api;nova-api ;18774|cinder-api;cinder-a;8776|glance-api;glance-a;9292|vim;nfv-vim;4545|heat-api;heat-a;8004|heat-engine;heat-e;8004

View File

@ -1116,14 +1116,7 @@ def collectCpuCount(influx_info, node, ci):
except Exception:
logging.error("cpu_count collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info()))
def countApiStatsServices(lsof_lines, service_port, service_name):
service_count = 0
for line in lsof_lines:
if service_port is not None and service_name is not None and service_port in line and service_name in line:
service_count += 1
return service_count
def collectApiStats(influx_info, node, ci, services):
def collectApiStats(influx_info, node, ci, services, db_port, rabbit_port):
logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
logging.info("api_request data starting collection with a collection interval of {}s".format(ci["cpu_count"]))
measurement = "api_requests"
@ -1132,19 +1125,39 @@ def collectApiStats(influx_info, node, ci, services):
lsof_args = ['lsof', '-Pn', '-i', 'tcp']
while True:
try:
fields = {}
lsof_result = Popen(lsof_args, shell=False, stdout=PIPE)
lsof_lines = list()
while True:
line = lsof_result.stdout.readline().strip("\n")
if not line:
break
lsof_lines.append(line)
lsof_result.kill()
for name, service in services.iteritems():
api_count = countApiStatsServices(lsof_lines, service['api-port'], service['name'])
db_count = countApiStatsServices(lsof_lines, service['db-port'], service['name'])
rabbit_count = countApiStatsServices(lsof_lines, service['rabbit-port'], service['name'])
fields = {}
lsof_result = Popen(lsof_args, shell=False, stdout=PIPE)
lsof_lines = list()
while True:
line = lsof_result.stdout.readline().strip("\n")
if not line:
break
lsof_lines.append(line)
lsof_result.kill()
for name, service in services.iteritems():
pid_list = list()
check_pid = False
if name == "keystone-public":
check_pid = True
ps_result = Popen("pgrep -f --delimiter=' ' keystone-public", shell=True, stdout=PIPE)
pid_list = ps_result.stdout.readline().strip().split(' ')
ps_result.kill()
elif name == "gnocchi-api":
check_pid = True
ps_result = Popen("pgrep -f --delimiter=' ' gnocchi-api", shell=True, stdout=PIPE)
pid_list = ps_result.stdout.readline().strip().split(' ')
ps_result.kill()
api_count = 0
db_count = 0
rabbit_count = 0
for line in lsof_lines:
if service['name'] is not None and service['name'] in line and (not check_pid or any(pid in line for pid in pid_list)):
if service['api-port'] is not None and service['api-port'] in line:
api_count += 1
elif db_port is not None and db_port in line:
db_count += 1
elif rabbit_port is not None and rabbit_port in line:
rabbit_count += 1
fields[name] = {"api": api_count, "db": db_count, "rabbit": rabbit_count}
influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", name, "api", fields[name]["api"], "db", fields[name]["db"], "rabbit", fields[name]["rabbit"]) + "\n"
p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True)
@ -1414,9 +1427,9 @@ if __name__ == "__main__":
for service_string in SERVICES_INFO:
service_tuple = tuple(service_string.split(';'))
if service_tuple[2] != "" and service_tuple[2] != None:
SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'db-port': DB_PORT_NUMBER, 'rabbit-port': RABBIT_PORT_NUMBER, 'api-port': service_tuple[2]}
SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': service_tuple[2]}
else:
SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'db-port': DB_PORT_NUMBER, 'rabbit-port': RABBIT_PORT_NUMBER, 'api-port': None}
SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': None}
except Exception:
print "An error has occurred when parsing the engtools.conf configuration file: {}".format(sys.exc_info())
sys.exit(0)
@ -1539,7 +1552,7 @@ if __name__ == "__main__":
tasks.append(p)
p.start()
if collect_api_requests is True and node_type == "controller":
p = Process(target=collectApiStats, args=(influx_info, node, collection_intervals, SERVICES), name="api_requests")
p = Process(target=collectApiStats, args=(influx_info, node, collection_intervals, SERVICES, DB_PORT_NUMBER, RABBIT_PORT_NUMBER), name="api_requests")
tasks.append(p)
p.start()