From 22d6ca07bdf5111806957b2523ab0d9ead0459aa Mon Sep 17 00:00:00 2001 From: Scott Hussey Date: Fri, 30 Jun 2017 14:03:44 -0500 Subject: [PATCH] Add polling interval logic Add option for defaults.poll_interval for subtask polling Add option for maasdriver.poll_interval for MaaS status polling Add option group for pyghmi_driver Add option for pyghmi_driver.poll_interval for IPMI status polling Update all tasks with subtask polling to respect poll interval --- drydock_provisioner/config.py | 9 +++- .../drivers/node/maasdriver/driver.py | 53 +++++++++++++------ .../drivers/oob/pyghmi_driver/__init__.py | 21 +++++--- drydock_provisioner/orchestrator/__init__.py | 5 +- 4 files changed, 63 insertions(+), 25 deletions(-) diff --git a/drydock_provisioner/config.py b/drydock_provisioner/config.py index 094ea06c..f9033889 100644 --- a/drydock_provisioner/config.py +++ b/drydock_provisioner/config.py @@ -41,6 +41,11 @@ class DrydockConfig(object): """ Initialize all the core options """ + # Default options + options = [ + cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for checking subtask or downstream status'), + ] + # Logging options logging_options = [ cfg.StrOpt('log_level', default='INFO', help='Global log level for Drydock'), @@ -86,6 +91,7 @@ class DrydockConfig(object): self.conf = cfg.ConfigOpts() def register_options(self): + self.conf.register_opts(DrydockConfig.options) self.conf.register_opts(DrydockConfig.logging_options, group='logging') self.conf.register_opts(DrydockConfig.auth_options, group='authentication') self.conf.register_opts(DrydockConfig.plugin_options, group='plugins') @@ -97,7 +103,8 @@ conf = config_mgr.conf IGNORED_MODULES = ('drydock', 'config') def list_opts(): - opts = {'logging': DrydockConfig.logging_options, + opts = {'DEFAULT': DrydockConfig.options, + 'logging': DrydockConfig.logging_options, 'authentication': DrydockConfig.auth_options, 'plugins': DrydockConfig.plugin_options, 'timeouts': DrydockConfig.timeout_options diff --git a/drydock_provisioner/drivers/node/maasdriver/driver.py b/drydock_provisioner/drivers/node/maasdriver/driver.py index e38bfe68..69669801 100644 --- a/drydock_provisioner/drivers/node/maasdriver/driver.py +++ b/drydock_provisioner/drivers/node/maasdriver/driver.py @@ -38,6 +38,7 @@ class MaasNodeDriver(NodeDriver): maasdriver_options = [ cfg.StrOpt('maas_api_key', help='The API key for accessing MaaS', secret=True), cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'), + cfg.IntOpt('poll_interval', default=10, help='Polling interval for querying MaaS status in seconds'), ] driver_name = 'maasdriver' @@ -185,9 +186,12 @@ class MaasNodeDriver(NodeDriver): running_subtasks = len(subtasks) attempts = 0 + max_attempts = config.conf.timeouts.identify_node * (60 // config.conf.poll_interval) worked = failed = False - while running_subtasks > 0 and attempts < config.conf.timeouts.identify_node: + self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." % + (config.conf.poll_interval, max_attempts)) + while running_subtasks > 0 and attempts < max_attempts: for t in subtasks: subtask = self.state_manager.get_task(t) @@ -205,7 +209,7 @@ class MaasNodeDriver(NodeDriver): elif subtask.result == hd_fields.ActionResult.PartialSuccess: worked = failed = True - time.sleep(1 * 60) + time.sleep(config.conf.maasdriver.poll_interval) attempts = attempts + 1 if running_subtasks > 0: @@ -254,10 +258,13 @@ class MaasNodeDriver(NodeDriver): running_subtasks = len(subtasks) attempts = 0 + max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.poll_interval) worked = failed = False + self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." % + (config.conf.poll_interval, max_attempts)) #TODO Add timeout to config - while running_subtasks > 0 and attempts < config.conf.timeouts.configure_hardware: + while running_subtasks > 0 and attempts < max_attempts: for t in subtasks: subtask = self.state_manager.get_task(t) @@ -275,7 +282,7 @@ class MaasNodeDriver(NodeDriver): elif subtask.result == hd_fields.ActionResult.PartialSuccess: worked = failed = True - time.sleep(1 * 60) + time.sleep(config.conf.maasdriver.poll_interval) attempts = attempts + 1 if running_subtasks > 0: @@ -324,9 +331,12 @@ class MaasNodeDriver(NodeDriver): running_subtasks = len(subtasks) attempts = 0 + max_attempts = config.conf.timeouts.apply_node_networking * (60 // config.conf.poll_interval) worked = failed = False - while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_networking: + self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." % + (config.conf.poll_interval, max_attempts)) + while running_subtasks > 0 and attempts < max_attempts: for t in subtasks: subtask = self.state_manager.get_task(t) @@ -344,7 +354,7 @@ class MaasNodeDriver(NodeDriver): elif subtask.result == hd_fields.ActionResult.PartialSuccess: worked = failed = True - time.sleep(1 * 60) + time.sleep(config.conf.poll_interval) attempts = attempts + 1 if running_subtasks > 0: @@ -393,9 +403,13 @@ class MaasNodeDriver(NodeDriver): running_subtasks = len(subtasks) attempts = 0 + max_attempts = config.conf.timeouts.apply_node_platform * (60 // config.conf.poll_interval) worked = failed = False - while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_platform: + self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." % + (config.conf.poll_interval, max_attempts)) + + while running_subtasks > 0 and attempts < max_attempts: for t in subtasks: subtask = self.state_manager.get_task(t) @@ -413,7 +427,7 @@ class MaasNodeDriver(NodeDriver): elif subtask.result == hd_fields.ActionResult.PartialSuccess: worked = failed = True - time.sleep(1 * 60) + time.sleep(config.conf.poll_interval) attempts = attempts + 1 if running_subtasks > 0: @@ -462,9 +476,13 @@ class MaasNodeDriver(NodeDriver): running_subtasks = len(subtasks) attempts = 0 + max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.poll_interval) worked = failed = False - while running_subtasks > 0 and attempts < config.conf.timeouts.deploy_node: + self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." % + (config.conf.poll_interval, max_attempts)) + + while running_subtasks > 0 and attempts < max_attempts: for t in subtasks: subtask = self.state_manager.get_task(t) @@ -482,7 +500,7 @@ class MaasNodeDriver(NodeDriver): elif subtask.result == hd_fields.ActionResult.PartialSuccess: worked = failed = True - time.sleep(1 * 60) + time.sleep(max_attempts) attempts = attempts + 1 if running_subtasks > 0: @@ -817,13 +835,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner): # Poll machine status attempts = 0 + max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.maasdriver.poll_interval) - while attempts < config.conf.timeouts.configure_hardware and machine.status_name != 'Ready': + while attempts < max_attempts and machine.status_name != 'Ready': attempts = attempts + 1 - time.sleep(1 * 60) + time.sleep(config.conf.maasdriver.poll_interval) try: machine.refresh() - self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name)) + self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name)) except: self.logger.warning("Error updating node %s status during commissioning, will re-attempt." % (n)) @@ -1184,12 +1203,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner): continue attempts = 0 - while attempts < config.conf.timeouts.deploy_node and not machine.status_name.startswith('Deployed'): + max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.maasdriver.poll_interval) + + while attempts < max_attempts and not machine.status_name.startswith('Deployed'): attempts = attempts + 1 - time.sleep(1 * 60) + time.sleep(config.conf.maasdriver.poll_interval) try: machine.refresh() - self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name)) + self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name)) except: self.logger.warning("Error updating node %s status during commissioning, will re-attempt." % (n)) diff --git a/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py b/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py index d9975859..fed07f60 100644 --- a/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py +++ b/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py @@ -14,6 +14,8 @@ import time import logging +from oslo_config import cfg + from pyghmi.ipmi.command import Command from pyghmi.exceptions import IpmiException @@ -28,15 +30,20 @@ import drydock_provisioner.drivers as drivers class PyghmiDriver(oob.OobDriver): + pyghmi_driver_options = [ + cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for querying IPMI status'), + ] oob_types_supported = ['ipmi'] + driver_name = "pyghmi_driver" + driver_key = "pyghmi_driver" + driver_desc = "Pyghmi OOB Driver" + def __init__(self, **kwargs): super(PyghmiDriver, self).__init__(**kwargs) - self.driver_name = "pyghmi_driver" - self.driver_key = "pyghmi_driver" - self.driver_desc = "Pyghmi OOB Driver" + config.conf.register_opts(PyghmiDriver.pyghmi_driver_options, group=PyghmiDriver.driver_key) self.logger = logging.getLogger("%s.%s" % (config.conf.logging.oobdriver_logger_name, self.driver_key)) @@ -101,15 +108,15 @@ class PyghmiDriver(oob.OobDriver): runner.start() attempts = 0 - while (len(incomplete_subtasks) > 0 and - attempts <= getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout)): + max_attempts = getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout) * (60 / config.conf.pyghmi_driver.poll_interval) + while (len(incomplete_subtasks) > 0 and attempts <= max_attempts): for n in incomplete_subtasks: t = self.state_manager.get_task(n) if t.get_status() in [hd_fields.TaskStatus.Terminated, hd_fields.TaskStatus.Complete, hd_fields.TaskStatus.Errored]: incomplete_subtasks.remove(n) - time.sleep(1 * 60) + time.sleep(config.conf.pyghmi_driver.poll_interval) attempts = attempts + 1 task = self.state_manager.get_task(task.get_id()) @@ -382,3 +389,5 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner): time.sleep(15) attempts = attempts + 1 +def list_opts(): + return {PyghmiDriver.driver_key: PyghmiDriver.pyghmi_driver_options} diff --git a/drydock_provisioner/orchestrator/__init__.py b/drydock_provisioner/orchestrator/__init__.py index cd354ae5..32871f0a 100644 --- a/drydock_provisioner/orchestrator/__init__.py +++ b/drydock_provisioner/orchestrator/__init__.py @@ -356,6 +356,7 @@ class Orchestrator(object): # Each attempt is a new task which might make the final task tree a bit confusing node_identify_attempts = 0 + max_attempts = config.conf.timeouts.identify_node * (60 / config.conf.poll_interval) while True: @@ -379,11 +380,11 @@ class Orchestrator(object): elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess, hd_fields.ActionResult.Failure]: # TODO This threshold should be a configurable default and tunable by task API - if node_identify_attempts > 10: + if node_identify_attempts > max_attempts: failed = True break - time.sleep(1 * 60) + time.sleep(config.conf.poll_interval) # We can only commission nodes that were successfully identified in the provisioner if len(node_identify_task.result_detail['successful_nodes']) > 0: