Add basic tools for benchmarking
Adds a horribly written, just hacked together little tool to help provide sizing insight into an ironic deployment's state and underlying performance. Key data: * Queries the list of node from a pure python interface level with the database and reports timeing for the list of nodes to be returned. This information helps convey how long a periodic hits the database just for the query. * Requests *all* nodes using the query pattern/structure of the nova resource tracker, and uses the marker to make any additional requsts. The data is parsed, and collected, and counts identified vendors, if any. * Collects basic data on conductors in terms of running, conductor groups as well as currently loaded drivers in the deployment. All of this information provides operational insight into *what* conditions exist within the deployment allowing developers to try and identify solutions based on the unique circumstances of larger deployments. Also adds a utility to generate and semi-randomize data to allow us to create a benchmark job in CI. Change-Id: Iae660aea82db8f1c4567ee2982595ccfdf434fe3
This commit is contained in:
parent
97ceb7bd15
commit
ffff76a682
13
tools/benchmark/README
Normal file
13
tools/benchmark/README
Normal file
@ -0,0 +1,13 @@
|
||||
This folder contains two files:
|
||||
|
||||
* do_not_run_create_benchmark_data.py - This script will destroy your
|
||||
ironic database. DO NOT RUN IT. You have been warned!
|
||||
It is is intended to generate a semi-random database of node data
|
||||
which can be used for benchmarks, instead of crafting a raw SQL file
|
||||
representing a test model
|
||||
|
||||
* generate-statistics.py - This is a utility some statistics to both
|
||||
aid in basic benchmarking of ironic operations *and* provide developers
|
||||
with conceptual information regarding a deployment's size. It operates
|
||||
only by reading the data present and timing how long the result take to
|
||||
return as well as isolating some key details about the deployment.
|
99
tools/benchmark/do_not_run_create_benchmark_data.py
Normal file
99
tools/benchmark/do_not_run_create_benchmark_data.py
Normal file
@ -0,0 +1,99 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
from oslo_db.sqlalchemy import enginefacade
|
||||
from sqlalchemy import sql
|
||||
|
||||
from ironic.common import service
|
||||
from ironic.conf import CONF # noqa To Load Configuration
|
||||
from ironic.objects import node
|
||||
|
||||
|
||||
def _create_test_nodes():
|
||||
print("Starting creation of fake nodes.")
|
||||
start = time.time()
|
||||
node_count = 10000
|
||||
checkin = time.time()
|
||||
for i in range(0, node_count):
|
||||
|
||||
new_node = node.Node({
|
||||
'power_state': 'power off',
|
||||
'driver': 'ipmi',
|
||||
'driver_internal_info': {'test-meow': i},
|
||||
'name': 'BenchmarkTestNode-%s' % i,
|
||||
'driver_info': {
|
||||
'ipmi_username': 'admin',
|
||||
'ipmi_password': 'admin',
|
||||
'ipmi_address': 'testhost%s.env.top.level.domain' % i},
|
||||
'resource_class': 'CUSTOM_BAREMETAL',
|
||||
'properties': {
|
||||
'cpu': 4,
|
||||
'memory': 32,
|
||||
'cats': i,
|
||||
'meowing': True}})
|
||||
new_node.create()
|
||||
delta = time.time() - checkin
|
||||
if delta > 10:
|
||||
checkin = time.time()
|
||||
print('* At %s nodes, %0.02f seconds. Total elapsed: %s'
|
||||
% (i, delta, time.time() - start))
|
||||
created = time.time()
|
||||
elapse = created - start
|
||||
print('Created %s nodes in %s seconds.\n' % (node_count, elapse))
|
||||
|
||||
|
||||
def _mix_up_nodes_data():
|
||||
engine = enginefacade.writer.get_engine()
|
||||
conn = engine.connect()
|
||||
|
||||
# A list of commands to mix up indexed field data a bit to emulate what
|
||||
# a production database may somewhat look like.
|
||||
commands = [
|
||||
"UPDATE nodes set maintenance = True where RAND() < 0.1", # noqa Easier to read this way
|
||||
"UPDATE nodes set driver = 'redfish' where RAND() < 0.5", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor01' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor02' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor03' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor04' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor05' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set reservation = 'fake_conductor06' where RAND() < 0.02", # noqa Easier to read this way
|
||||
"UPDATE nodes set provision_state = 'active' where RAND() < 0.8", # noqa Easier to read this way
|
||||
"UPDATE nodes set power_state = 'power on' where provision_state = 'active' and RAND() < 0.95", # noqa Easier to read this way
|
||||
"UPDATE nodes set provision_state = 'available' where RAND() < 0.1", # noqa Easier to read this way
|
||||
"UPDATE nodes set provision_state = 'manageable' where RAND() < 0.1", # noqa Easier to read this way
|
||||
"UPDATE nodes set provision_state = 'clean wait' where RAND() < 0.05", # noqa Easier to read this way
|
||||
"UPDATE nodes set provision_state = 'error' where RAND() < 0.05", # noqa Easier to read this way
|
||||
"UPDATE nodes set owner = (select UUID()) where RAND() < 0.2", # noqa Easier to read this way
|
||||
"UPDATE nodes set lessee = (select UUID()) where RAND() < 0.2", # noqa Easier to read this way
|
||||
"UPDATE nodes set instance_uuid = (select UUID()) where RAND() < 0.95 and provision_state = 'active'", # noqa Easier to read this way
|
||||
"UPDATE nodes set last_error = (select UUID()) where RAND() <0.05", # noqa Easier to read this way
|
||||
]
|
||||
start = time.time()
|
||||
for command in commands:
|
||||
print("Executing SQL command: \\" + command + ";\n")
|
||||
conn.execute(sql.text(command))
|
||||
print("* Completed command. %0.04f elapsed since start of commands."
|
||||
% (time.time() - start))
|
||||
|
||||
|
||||
def main():
|
||||
service.prepare_service()
|
||||
CONF.set_override('debug', False)
|
||||
_create_test_nodes()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
195
tools/benchmark/generate-statistics.py
Normal file
195
tools/benchmark/generate-statistics.py
Normal file
@ -0,0 +1,195 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import datetime
|
||||
import sys
|
||||
import time
|
||||
from unittest import mock
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
import oslo_policy
|
||||
from oslo_utils import timeutils
|
||||
|
||||
from ironic.api.controllers.v1 import node as node_api
|
||||
from ironic.api.controllers.v1 import utils as api_utils
|
||||
from ironic.common import context
|
||||
from ironic.common import service
|
||||
from ironic.conf import CONF # noqa To Load Configuration
|
||||
from ironic.db import api as db_api
|
||||
from ironic.objects import conductor
|
||||
from ironic.objects import node
|
||||
|
||||
|
||||
def _calculate_delta(start, finish):
|
||||
return finish - start
|
||||
|
||||
|
||||
def _add_a_line():
|
||||
print('------------------------------------------------------------')
|
||||
|
||||
|
||||
def _assess_db_performance():
|
||||
start = time.time()
|
||||
dbapi = db_api.get_instance()
|
||||
print('Phase - Assess DB performance')
|
||||
_add_a_line()
|
||||
got_connection = time.time()
|
||||
nodes = dbapi.get_node_list()
|
||||
node_count = len(nodes)
|
||||
query_complete = time.time()
|
||||
delta = _calculate_delta(start, got_connection)
|
||||
print('Obtained DB client in %s seconds.' % delta)
|
||||
delta = _calculate_delta(got_connection, query_complete)
|
||||
print('Returned %s nodes in python %s seconds from the DB.\n' %
|
||||
(node_count, delta))
|
||||
# return node count for future use.
|
||||
return node_count
|
||||
|
||||
|
||||
def _assess_db_and_object_performance():
|
||||
print('Phase - Assess DB & Object conversion Performance')
|
||||
_add_a_line()
|
||||
start = time.time()
|
||||
node_list = node.Node().list(context.get_admin_context())
|
||||
got_list = time.time()
|
||||
delta = _calculate_delta(start, got_list)
|
||||
print('Obtained list of node objects in %s seconds.' % delta)
|
||||
count = 0
|
||||
tbl_size = 0
|
||||
# In a sense, this helps provide a relative understanding if the
|
||||
# database is the bottleneck, or the objects post conversion.
|
||||
# converting completely to json and then measuring the size helps
|
||||
# ensure that everything is "assessed" while not revealing too
|
||||
# much detail.
|
||||
for node_obj in node_list:
|
||||
# Just looping through the entire set to count should be
|
||||
# enough to ensure that the entry is loaded from the db
|
||||
# and then converted to an object.
|
||||
tbl_size = tbl_size + sys.getsizeof(node_obj.as_dict(secure=True))
|
||||
count = count + 1
|
||||
delta = _calculate_delta(got_list, time.time())
|
||||
print('Took %s seconds to iterate through %s node objects.' %
|
||||
(delta, count))
|
||||
print('Nodes table is roughly %s bytes of JSON.\n' % tbl_size)
|
||||
observed_vendors = []
|
||||
for node_obj in node_list:
|
||||
vendor = node_obj.driver_internal_info.get('vendor')
|
||||
if vendor:
|
||||
observed_vendors.append(vendor)
|
||||
|
||||
|
||||
@mock.patch('ironic.api.request') # noqa patch needed for the object model
|
||||
@mock.patch.object(metrics_utils, 'get_metrics_logger', lambda *_: mock.Mock)
|
||||
@mock.patch.object(api_utils, 'check_list_policy', lambda *_: None)
|
||||
@mock.patch.object(api_utils, 'check_allow_specify_fields', lambda *_: None)
|
||||
@mock.patch.object(api_utils, 'check_allowed_fields', lambda *_: None)
|
||||
@mock.patch.object(oslo_policy.policy, 'LOG', autospec=True)
|
||||
def _assess_db_object_and_api_performance(mock_log, mock_request):
|
||||
print('Phase - Assess DB & Object conversion Performance')
|
||||
_add_a_line()
|
||||
# Just mock it to silence it since getting the logger to update
|
||||
# config seems like not a thing once started. :\
|
||||
mock_log.debug = mock.Mock()
|
||||
# Internal logic requires major/minor versions and a context to
|
||||
# proceed. This is just to make the NodesController respond properly.
|
||||
mock_request.context = context.get_admin_context()
|
||||
mock_request.version.major = 1
|
||||
mock_request.version.minor = 71
|
||||
|
||||
start = time.time()
|
||||
node_api_controller = node_api.NodesController()
|
||||
node_api_controller.context = context.get_admin_context()
|
||||
fields = ("uuid,power_state,target_power_state,provision_state,"
|
||||
"target_provision_state,last_error,maintenance,properties,"
|
||||
"instance_uuid,traits,resource_class")
|
||||
|
||||
total_nodes = 0
|
||||
|
||||
res = node_api_controller._get_nodes_collection(
|
||||
chassis_uuid=None,
|
||||
instance_uuid=None,
|
||||
associated=None,
|
||||
maintenance=None,
|
||||
retired=None,
|
||||
provision_state=None,
|
||||
marker=None,
|
||||
limit=None,
|
||||
sort_key="id",
|
||||
sort_dir="asc",
|
||||
fields=fields.split(','))
|
||||
total_nodes = len(res['nodes'])
|
||||
while len(res['nodes']) != 1:
|
||||
print(" ** Getting nodes ** %s Elapsed: %s seconds." %
|
||||
(total_nodes, _calculate_delta(start, time.time())))
|
||||
res = node_api_controller._get_nodes_collection(
|
||||
chassis_uuid=None,
|
||||
instance_uuid=None,
|
||||
associated=None,
|
||||
maintenance=None,
|
||||
retired=None,
|
||||
provision_state=None,
|
||||
marker=res['nodes'][-1]['uuid'],
|
||||
limit=None,
|
||||
sort_key="id",
|
||||
sort_dir="asc",
|
||||
fields=fields.split(','))
|
||||
new_nodes = len(res['nodes'])
|
||||
if new_nodes == 0:
|
||||
break
|
||||
total_nodes = total_nodes + new_nodes
|
||||
|
||||
delta = _calculate_delta(start, time.time())
|
||||
print('Took %s seconds to return all %s nodes via '
|
||||
'nodes API call pattern.\n' % (delta, total_nodes))
|
||||
|
||||
|
||||
def _report_conductors():
|
||||
print('Phase - identifying conductors/drivers')
|
||||
_add_a_line()
|
||||
conductors = conductor.Conductor().list(
|
||||
context.get_admin_context(),
|
||||
)
|
||||
drivers = []
|
||||
groups = []
|
||||
online_count = 0
|
||||
online_by = timeutils.utcnow(with_timezone=True) - \
|
||||
datetime.timedelta(seconds=90)
|
||||
for conductor_obj in conductors:
|
||||
if conductor_obj.conductor_group:
|
||||
groups.append(conductor_obj.conductor_group)
|
||||
if conductor_obj.updated_at > online_by:
|
||||
online_count = online_count + 1
|
||||
for driver in conductor_obj.drivers:
|
||||
drivers.append(driver)
|
||||
conductor_count = len(conductors)
|
||||
print('Conductor count: %s' % conductor_count)
|
||||
print('Online conductor count: %s' % online_count)
|
||||
running_with_groups = len(groups)
|
||||
print('Conductors with conductor_groups: %s' % running_with_groups)
|
||||
group_count = len(set(groups))
|
||||
print('Conductor group count: %s' % group_count)
|
||||
driver_list = list(set(drivers))
|
||||
print('Presently supported drivers: %s' % driver_list)
|
||||
|
||||
|
||||
def main():
|
||||
service.prepare_service()
|
||||
CONF.set_override('debug', False)
|
||||
_assess_db_performance()
|
||||
_assess_db_and_object_performance()
|
||||
_assess_db_object_and_api_performance()
|
||||
_report_conductors()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue
Block a user