Add polling interval logic
Add option for defaults.poll_interval for subtask polling Add option for maasdriver.poll_interval for MaaS status polling Add option group for pyghmi_driver Add option for pyghmi_driver.poll_interval for IPMI status polling Update all tasks with subtask polling to respect poll interval
This commit is contained in:
parent
84f054610a
commit
22d6ca07bd
@ -41,6 +41,11 @@ class DrydockConfig(object):
|
||||
"""
|
||||
Initialize all the core options
|
||||
"""
|
||||
# Default options
|
||||
options = [
|
||||
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for checking subtask or downstream status'),
|
||||
]
|
||||
|
||||
# Logging options
|
||||
logging_options = [
|
||||
cfg.StrOpt('log_level', default='INFO', help='Global log level for Drydock'),
|
||||
@ -86,6 +91,7 @@ class DrydockConfig(object):
|
||||
self.conf = cfg.ConfigOpts()
|
||||
|
||||
def register_options(self):
|
||||
self.conf.register_opts(DrydockConfig.options)
|
||||
self.conf.register_opts(DrydockConfig.logging_options, group='logging')
|
||||
self.conf.register_opts(DrydockConfig.auth_options, group='authentication')
|
||||
self.conf.register_opts(DrydockConfig.plugin_options, group='plugins')
|
||||
@ -97,7 +103,8 @@ conf = config_mgr.conf
|
||||
IGNORED_MODULES = ('drydock', 'config')
|
||||
|
||||
def list_opts():
|
||||
opts = {'logging': DrydockConfig.logging_options,
|
||||
opts = {'DEFAULT': DrydockConfig.options,
|
||||
'logging': DrydockConfig.logging_options,
|
||||
'authentication': DrydockConfig.auth_options,
|
||||
'plugins': DrydockConfig.plugin_options,
|
||||
'timeouts': DrydockConfig.timeout_options
|
||||
|
@ -38,6 +38,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
maasdriver_options = [
|
||||
cfg.StrOpt('maas_api_key', help='The API key for accessing MaaS', secret=True),
|
||||
cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'),
|
||||
cfg.IntOpt('poll_interval', default=10, help='Polling interval for querying MaaS status in seconds'),
|
||||
]
|
||||
|
||||
driver_name = 'maasdriver'
|
||||
@ -185,9 +186,12 @@ class MaasNodeDriver(NodeDriver):
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.identify_node * (60 // config.conf.poll_interval)
|
||||
worked = failed = False
|
||||
|
||||
while running_subtasks > 0 and attempts < config.conf.timeouts.identify_node:
|
||||
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||
(config.conf.poll_interval, max_attempts))
|
||||
while running_subtasks > 0 and attempts < max_attempts:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
@ -205,7 +209,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.maasdriver.poll_interval)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
@ -254,10 +258,13 @@ class MaasNodeDriver(NodeDriver):
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.poll_interval)
|
||||
worked = failed = False
|
||||
|
||||
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||
(config.conf.poll_interval, max_attempts))
|
||||
#TODO Add timeout to config
|
||||
while running_subtasks > 0 and attempts < config.conf.timeouts.configure_hardware:
|
||||
while running_subtasks > 0 and attempts < max_attempts:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
@ -275,7 +282,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.maasdriver.poll_interval)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
@ -324,9 +331,12 @@ class MaasNodeDriver(NodeDriver):
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.apply_node_networking * (60 // config.conf.poll_interval)
|
||||
worked = failed = False
|
||||
|
||||
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_networking:
|
||||
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||
(config.conf.poll_interval, max_attempts))
|
||||
while running_subtasks > 0 and attempts < max_attempts:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
@ -344,7 +354,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.poll_interval)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
@ -393,9 +403,13 @@ class MaasNodeDriver(NodeDriver):
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.apply_node_platform * (60 // config.conf.poll_interval)
|
||||
worked = failed = False
|
||||
|
||||
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_platform:
|
||||
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||
(config.conf.poll_interval, max_attempts))
|
||||
|
||||
while running_subtasks > 0 and attempts < max_attempts:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
@ -413,7 +427,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.poll_interval)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
@ -462,9 +476,13 @@ class MaasNodeDriver(NodeDriver):
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.poll_interval)
|
||||
worked = failed = False
|
||||
|
||||
while running_subtasks > 0 and attempts < config.conf.timeouts.deploy_node:
|
||||
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||
(config.conf.poll_interval, max_attempts))
|
||||
|
||||
while running_subtasks > 0 and attempts < max_attempts:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
@ -482,7 +500,7 @@ class MaasNodeDriver(NodeDriver):
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(max_attempts)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
@ -817,13 +835,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
|
||||
|
||||
# Poll machine status
|
||||
attempts = 0
|
||||
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.maasdriver.poll_interval)
|
||||
|
||||
while attempts < config.conf.timeouts.configure_hardware and machine.status_name != 'Ready':
|
||||
while attempts < max_attempts and machine.status_name != 'Ready':
|
||||
attempts = attempts + 1
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.maasdriver.poll_interval)
|
||||
try:
|
||||
machine.refresh()
|
||||
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
|
||||
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
|
||||
except:
|
||||
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
||||
(n))
|
||||
@ -1184,12 +1203,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
|
||||
continue
|
||||
|
||||
attempts = 0
|
||||
while attempts < config.conf.timeouts.deploy_node and not machine.status_name.startswith('Deployed'):
|
||||
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.maasdriver.poll_interval)
|
||||
|
||||
while attempts < max_attempts and not machine.status_name.startswith('Deployed'):
|
||||
attempts = attempts + 1
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.maasdriver.poll_interval)
|
||||
try:
|
||||
machine.refresh()
|
||||
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
|
||||
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
|
||||
except:
|
||||
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
||||
(n))
|
||||
|
@ -14,6 +14,8 @@
|
||||
import time
|
||||
import logging
|
||||
|
||||
from oslo_config import cfg
|
||||
|
||||
from pyghmi.ipmi.command import Command
|
||||
from pyghmi.exceptions import IpmiException
|
||||
|
||||
@ -28,15 +30,20 @@ import drydock_provisioner.drivers as drivers
|
||||
|
||||
|
||||
class PyghmiDriver(oob.OobDriver):
|
||||
pyghmi_driver_options = [
|
||||
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for querying IPMI status'),
|
||||
]
|
||||
|
||||
oob_types_supported = ['ipmi']
|
||||
|
||||
driver_name = "pyghmi_driver"
|
||||
driver_key = "pyghmi_driver"
|
||||
driver_desc = "Pyghmi OOB Driver"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(PyghmiDriver, self).__init__(**kwargs)
|
||||
|
||||
self.driver_name = "pyghmi_driver"
|
||||
self.driver_key = "pyghmi_driver"
|
||||
self.driver_desc = "Pyghmi OOB Driver"
|
||||
config.conf.register_opts(PyghmiDriver.pyghmi_driver_options, group=PyghmiDriver.driver_key)
|
||||
|
||||
self.logger = logging.getLogger("%s.%s" %
|
||||
(config.conf.logging.oobdriver_logger_name, self.driver_key))
|
||||
@ -101,15 +108,15 @@ class PyghmiDriver(oob.OobDriver):
|
||||
runner.start()
|
||||
|
||||
attempts = 0
|
||||
while (len(incomplete_subtasks) > 0 and
|
||||
attempts <= getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout)):
|
||||
max_attempts = getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout) * (60 / config.conf.pyghmi_driver.poll_interval)
|
||||
while (len(incomplete_subtasks) > 0 and attempts <= max_attempts):
|
||||
for n in incomplete_subtasks:
|
||||
t = self.state_manager.get_task(n)
|
||||
if t.get_status() in [hd_fields.TaskStatus.Terminated,
|
||||
hd_fields.TaskStatus.Complete,
|
||||
hd_fields.TaskStatus.Errored]:
|
||||
incomplete_subtasks.remove(n)
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.pyghmi_driver.poll_interval)
|
||||
attempts = attempts + 1
|
||||
|
||||
task = self.state_manager.get_task(task.get_id())
|
||||
@ -382,3 +389,5 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
|
||||
time.sleep(15)
|
||||
attempts = attempts + 1
|
||||
|
||||
def list_opts():
|
||||
return {PyghmiDriver.driver_key: PyghmiDriver.pyghmi_driver_options}
|
||||
|
@ -356,6 +356,7 @@ class Orchestrator(object):
|
||||
# Each attempt is a new task which might make the final task tree a bit confusing
|
||||
|
||||
node_identify_attempts = 0
|
||||
max_attempts = config.conf.timeouts.identify_node * (60 / config.conf.poll_interval)
|
||||
|
||||
while True:
|
||||
|
||||
@ -379,11 +380,11 @@ class Orchestrator(object):
|
||||
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
|
||||
hd_fields.ActionResult.Failure]:
|
||||
# TODO This threshold should be a configurable default and tunable by task API
|
||||
if node_identify_attempts > 10:
|
||||
if node_identify_attempts > max_attempts:
|
||||
failed = True
|
||||
break
|
||||
|
||||
time.sleep(1 * 60)
|
||||
time.sleep(config.conf.poll_interval)
|
||||
|
||||
# We can only commission nodes that were successfully identified in the provisioner
|
||||
if len(node_identify_task.result_detail['successful_nodes']) > 0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user