Add polling interval logic

Add option for defaults.poll_interval for subtask polling
Add option for maasdriver.poll_interval for MaaS status polling
Add option group for pyghmi_driver
Add option for pyghmi_driver.poll_interval for IPMI status polling
Update all tasks with subtask polling to respect poll interval
This commit is contained in:
Scott Hussey 2017-06-30 14:03:44 -05:00
parent 84f054610a
commit 22d6ca07bd
4 changed files with 63 additions and 25 deletions

View File

@ -41,6 +41,11 @@ class DrydockConfig(object):
"""
Initialize all the core options
"""
# Default options
options = [
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for checking subtask or downstream status'),
]
# Logging options
logging_options = [
cfg.StrOpt('log_level', default='INFO', help='Global log level for Drydock'),
@ -86,6 +91,7 @@ class DrydockConfig(object):
self.conf = cfg.ConfigOpts()
def register_options(self):
self.conf.register_opts(DrydockConfig.options)
self.conf.register_opts(DrydockConfig.logging_options, group='logging')
self.conf.register_opts(DrydockConfig.auth_options, group='authentication')
self.conf.register_opts(DrydockConfig.plugin_options, group='plugins')
@ -97,7 +103,8 @@ conf = config_mgr.conf
IGNORED_MODULES = ('drydock', 'config')
def list_opts():
opts = {'logging': DrydockConfig.logging_options,
opts = {'DEFAULT': DrydockConfig.options,
'logging': DrydockConfig.logging_options,
'authentication': DrydockConfig.auth_options,
'plugins': DrydockConfig.plugin_options,
'timeouts': DrydockConfig.timeout_options

View File

@ -38,6 +38,7 @@ class MaasNodeDriver(NodeDriver):
maasdriver_options = [
cfg.StrOpt('maas_api_key', help='The API key for accessing MaaS', secret=True),
cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'),
cfg.IntOpt('poll_interval', default=10, help='Polling interval for querying MaaS status in seconds'),
]
driver_name = 'maasdriver'
@ -185,9 +186,12 @@ class MaasNodeDriver(NodeDriver):
running_subtasks = len(subtasks)
attempts = 0
max_attempts = config.conf.timeouts.identify_node * (60 // config.conf.poll_interval)
worked = failed = False
while running_subtasks > 0 and attempts < config.conf.timeouts.identify_node:
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
(config.conf.poll_interval, max_attempts))
while running_subtasks > 0 and attempts < max_attempts:
for t in subtasks:
subtask = self.state_manager.get_task(t)
@ -205,7 +209,7 @@ class MaasNodeDriver(NodeDriver):
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(1 * 60)
time.sleep(config.conf.maasdriver.poll_interval)
attempts = attempts + 1
if running_subtasks > 0:
@ -254,10 +258,13 @@ class MaasNodeDriver(NodeDriver):
running_subtasks = len(subtasks)
attempts = 0
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.poll_interval)
worked = failed = False
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
(config.conf.poll_interval, max_attempts))
#TODO Add timeout to config
while running_subtasks > 0 and attempts < config.conf.timeouts.configure_hardware:
while running_subtasks > 0 and attempts < max_attempts:
for t in subtasks:
subtask = self.state_manager.get_task(t)
@ -275,7 +282,7 @@ class MaasNodeDriver(NodeDriver):
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(1 * 60)
time.sleep(config.conf.maasdriver.poll_interval)
attempts = attempts + 1
if running_subtasks > 0:
@ -324,9 +331,12 @@ class MaasNodeDriver(NodeDriver):
running_subtasks = len(subtasks)
attempts = 0
max_attempts = config.conf.timeouts.apply_node_networking * (60 // config.conf.poll_interval)
worked = failed = False
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_networking:
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
(config.conf.poll_interval, max_attempts))
while running_subtasks > 0 and attempts < max_attempts:
for t in subtasks:
subtask = self.state_manager.get_task(t)
@ -344,7 +354,7 @@ class MaasNodeDriver(NodeDriver):
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(1 * 60)
time.sleep(config.conf.poll_interval)
attempts = attempts + 1
if running_subtasks > 0:
@ -393,9 +403,13 @@ class MaasNodeDriver(NodeDriver):
running_subtasks = len(subtasks)
attempts = 0
max_attempts = config.conf.timeouts.apply_node_platform * (60 // config.conf.poll_interval)
worked = failed = False
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_platform:
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
(config.conf.poll_interval, max_attempts))
while running_subtasks > 0 and attempts < max_attempts:
for t in subtasks:
subtask = self.state_manager.get_task(t)
@ -413,7 +427,7 @@ class MaasNodeDriver(NodeDriver):
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(1 * 60)
time.sleep(config.conf.poll_interval)
attempts = attempts + 1
if running_subtasks > 0:
@ -462,9 +476,13 @@ class MaasNodeDriver(NodeDriver):
running_subtasks = len(subtasks)
attempts = 0
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.poll_interval)
worked = failed = False
while running_subtasks > 0 and attempts < config.conf.timeouts.deploy_node:
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
(config.conf.poll_interval, max_attempts))
while running_subtasks > 0 and attempts < max_attempts:
for t in subtasks:
subtask = self.state_manager.get_task(t)
@ -482,7 +500,7 @@ class MaasNodeDriver(NodeDriver):
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(1 * 60)
time.sleep(max_attempts)
attempts = attempts + 1
if running_subtasks > 0:
@ -817,13 +835,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
# Poll machine status
attempts = 0
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.maasdriver.poll_interval)
while attempts < config.conf.timeouts.configure_hardware and machine.status_name != 'Ready':
while attempts < max_attempts and machine.status_name != 'Ready':
attempts = attempts + 1
time.sleep(1 * 60)
time.sleep(config.conf.maasdriver.poll_interval)
try:
machine.refresh()
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
except:
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
(n))
@ -1184,12 +1203,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
continue
attempts = 0
while attempts < config.conf.timeouts.deploy_node and not machine.status_name.startswith('Deployed'):
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.maasdriver.poll_interval)
while attempts < max_attempts and not machine.status_name.startswith('Deployed'):
attempts = attempts + 1
time.sleep(1 * 60)
time.sleep(config.conf.maasdriver.poll_interval)
try:
machine.refresh()
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
except:
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
(n))

View File

@ -14,6 +14,8 @@
import time
import logging
from oslo_config import cfg
from pyghmi.ipmi.command import Command
from pyghmi.exceptions import IpmiException
@ -28,15 +30,20 @@ import drydock_provisioner.drivers as drivers
class PyghmiDriver(oob.OobDriver):
pyghmi_driver_options = [
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for querying IPMI status'),
]
oob_types_supported = ['ipmi']
driver_name = "pyghmi_driver"
driver_key = "pyghmi_driver"
driver_desc = "Pyghmi OOB Driver"
def __init__(self, **kwargs):
super(PyghmiDriver, self).__init__(**kwargs)
self.driver_name = "pyghmi_driver"
self.driver_key = "pyghmi_driver"
self.driver_desc = "Pyghmi OOB Driver"
config.conf.register_opts(PyghmiDriver.pyghmi_driver_options, group=PyghmiDriver.driver_key)
self.logger = logging.getLogger("%s.%s" %
(config.conf.logging.oobdriver_logger_name, self.driver_key))
@ -101,15 +108,15 @@ class PyghmiDriver(oob.OobDriver):
runner.start()
attempts = 0
while (len(incomplete_subtasks) > 0 and
attempts <= getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout)):
max_attempts = getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout) * (60 / config.conf.pyghmi_driver.poll_interval)
while (len(incomplete_subtasks) > 0 and attempts <= max_attempts):
for n in incomplete_subtasks:
t = self.state_manager.get_task(n)
if t.get_status() in [hd_fields.TaskStatus.Terminated,
hd_fields.TaskStatus.Complete,
hd_fields.TaskStatus.Errored]:
incomplete_subtasks.remove(n)
time.sleep(1 * 60)
time.sleep(config.conf.pyghmi_driver.poll_interval)
attempts = attempts + 1
task = self.state_manager.get_task(task.get_id())
@ -382,3 +389,5 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
time.sleep(15)
attempts = attempts + 1
def list_opts():
return {PyghmiDriver.driver_key: PyghmiDriver.pyghmi_driver_options}

View File

@ -356,6 +356,7 @@ class Orchestrator(object):
# Each attempt is a new task which might make the final task tree a bit confusing
node_identify_attempts = 0
max_attempts = config.conf.timeouts.identify_node * (60 / config.conf.poll_interval)
while True:
@ -379,11 +380,11 @@ class Orchestrator(object):
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
hd_fields.ActionResult.Failure]:
# TODO This threshold should be a configurable default and tunable by task API
if node_identify_attempts > 10:
if node_identify_attempts > max_attempts:
failed = True
break
time.sleep(1 * 60)
time.sleep(config.conf.poll_interval)
# We can only commission nodes that were successfully identified in the provisioner
if len(node_identify_task.result_detail['successful_nodes']) > 0: