Add polling interval logic
Add option for defaults.poll_interval for subtask polling Add option for maasdriver.poll_interval for MaaS status polling Add option group for pyghmi_driver Add option for pyghmi_driver.poll_interval for IPMI status polling Update all tasks with subtask polling to respect poll interval
This commit is contained in:
parent
84f054610a
commit
22d6ca07bd
@ -41,6 +41,11 @@ class DrydockConfig(object):
|
|||||||
"""
|
"""
|
||||||
Initialize all the core options
|
Initialize all the core options
|
||||||
"""
|
"""
|
||||||
|
# Default options
|
||||||
|
options = [
|
||||||
|
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for checking subtask or downstream status'),
|
||||||
|
]
|
||||||
|
|
||||||
# Logging options
|
# Logging options
|
||||||
logging_options = [
|
logging_options = [
|
||||||
cfg.StrOpt('log_level', default='INFO', help='Global log level for Drydock'),
|
cfg.StrOpt('log_level', default='INFO', help='Global log level for Drydock'),
|
||||||
@ -86,6 +91,7 @@ class DrydockConfig(object):
|
|||||||
self.conf = cfg.ConfigOpts()
|
self.conf = cfg.ConfigOpts()
|
||||||
|
|
||||||
def register_options(self):
|
def register_options(self):
|
||||||
|
self.conf.register_opts(DrydockConfig.options)
|
||||||
self.conf.register_opts(DrydockConfig.logging_options, group='logging')
|
self.conf.register_opts(DrydockConfig.logging_options, group='logging')
|
||||||
self.conf.register_opts(DrydockConfig.auth_options, group='authentication')
|
self.conf.register_opts(DrydockConfig.auth_options, group='authentication')
|
||||||
self.conf.register_opts(DrydockConfig.plugin_options, group='plugins')
|
self.conf.register_opts(DrydockConfig.plugin_options, group='plugins')
|
||||||
@ -97,7 +103,8 @@ conf = config_mgr.conf
|
|||||||
IGNORED_MODULES = ('drydock', 'config')
|
IGNORED_MODULES = ('drydock', 'config')
|
||||||
|
|
||||||
def list_opts():
|
def list_opts():
|
||||||
opts = {'logging': DrydockConfig.logging_options,
|
opts = {'DEFAULT': DrydockConfig.options,
|
||||||
|
'logging': DrydockConfig.logging_options,
|
||||||
'authentication': DrydockConfig.auth_options,
|
'authentication': DrydockConfig.auth_options,
|
||||||
'plugins': DrydockConfig.plugin_options,
|
'plugins': DrydockConfig.plugin_options,
|
||||||
'timeouts': DrydockConfig.timeout_options
|
'timeouts': DrydockConfig.timeout_options
|
||||||
|
@ -38,6 +38,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
maasdriver_options = [
|
maasdriver_options = [
|
||||||
cfg.StrOpt('maas_api_key', help='The API key for accessing MaaS', secret=True),
|
cfg.StrOpt('maas_api_key', help='The API key for accessing MaaS', secret=True),
|
||||||
cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'),
|
cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'),
|
||||||
|
cfg.IntOpt('poll_interval', default=10, help='Polling interval for querying MaaS status in seconds'),
|
||||||
]
|
]
|
||||||
|
|
||||||
driver_name = 'maasdriver'
|
driver_name = 'maasdriver'
|
||||||
@ -185,9 +186,12 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
|
|
||||||
running_subtasks = len(subtasks)
|
running_subtasks = len(subtasks)
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.identify_node * (60 // config.conf.poll_interval)
|
||||||
worked = failed = False
|
worked = failed = False
|
||||||
|
|
||||||
while running_subtasks > 0 and attempts < config.conf.timeouts.identify_node:
|
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||||
|
(config.conf.poll_interval, max_attempts))
|
||||||
|
while running_subtasks > 0 and attempts < max_attempts:
|
||||||
for t in subtasks:
|
for t in subtasks:
|
||||||
subtask = self.state_manager.get_task(t)
|
subtask = self.state_manager.get_task(t)
|
||||||
|
|
||||||
@ -205,7 +209,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||||
worked = failed = True
|
worked = failed = True
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.maasdriver.poll_interval)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
if running_subtasks > 0:
|
if running_subtasks > 0:
|
||||||
@ -254,10 +258,13 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
|
|
||||||
running_subtasks = len(subtasks)
|
running_subtasks = len(subtasks)
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.poll_interval)
|
||||||
worked = failed = False
|
worked = failed = False
|
||||||
|
|
||||||
|
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||||
|
(config.conf.poll_interval, max_attempts))
|
||||||
#TODO Add timeout to config
|
#TODO Add timeout to config
|
||||||
while running_subtasks > 0 and attempts < config.conf.timeouts.configure_hardware:
|
while running_subtasks > 0 and attempts < max_attempts:
|
||||||
for t in subtasks:
|
for t in subtasks:
|
||||||
subtask = self.state_manager.get_task(t)
|
subtask = self.state_manager.get_task(t)
|
||||||
|
|
||||||
@ -275,7 +282,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||||
worked = failed = True
|
worked = failed = True
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.maasdriver.poll_interval)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
if running_subtasks > 0:
|
if running_subtasks > 0:
|
||||||
@ -324,9 +331,12 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
|
|
||||||
running_subtasks = len(subtasks)
|
running_subtasks = len(subtasks)
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.apply_node_networking * (60 // config.conf.poll_interval)
|
||||||
worked = failed = False
|
worked = failed = False
|
||||||
|
|
||||||
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_networking:
|
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||||
|
(config.conf.poll_interval, max_attempts))
|
||||||
|
while running_subtasks > 0 and attempts < max_attempts:
|
||||||
for t in subtasks:
|
for t in subtasks:
|
||||||
subtask = self.state_manager.get_task(t)
|
subtask = self.state_manager.get_task(t)
|
||||||
|
|
||||||
@ -344,7 +354,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||||
worked = failed = True
|
worked = failed = True
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.poll_interval)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
if running_subtasks > 0:
|
if running_subtasks > 0:
|
||||||
@ -393,9 +403,13 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
|
|
||||||
running_subtasks = len(subtasks)
|
running_subtasks = len(subtasks)
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.apply_node_platform * (60 // config.conf.poll_interval)
|
||||||
worked = failed = False
|
worked = failed = False
|
||||||
|
|
||||||
while running_subtasks > 0 and attempts < config.conf.timeouts.apply_node_platform:
|
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||||
|
(config.conf.poll_interval, max_attempts))
|
||||||
|
|
||||||
|
while running_subtasks > 0 and attempts < max_attempts:
|
||||||
for t in subtasks:
|
for t in subtasks:
|
||||||
subtask = self.state_manager.get_task(t)
|
subtask = self.state_manager.get_task(t)
|
||||||
|
|
||||||
@ -413,7 +427,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||||
worked = failed = True
|
worked = failed = True
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.poll_interval)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
if running_subtasks > 0:
|
if running_subtasks > 0:
|
||||||
@ -462,9 +476,13 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
|
|
||||||
running_subtasks = len(subtasks)
|
running_subtasks = len(subtasks)
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.poll_interval)
|
||||||
worked = failed = False
|
worked = failed = False
|
||||||
|
|
||||||
while running_subtasks > 0 and attempts < config.conf.timeouts.deploy_node:
|
self.logger.debug("Polling for subtask completetion every %d seconds, a max of %d polls." %
|
||||||
|
(config.conf.poll_interval, max_attempts))
|
||||||
|
|
||||||
|
while running_subtasks > 0 and attempts < max_attempts:
|
||||||
for t in subtasks:
|
for t in subtasks:
|
||||||
subtask = self.state_manager.get_task(t)
|
subtask = self.state_manager.get_task(t)
|
||||||
|
|
||||||
@ -482,7 +500,7 @@ class MaasNodeDriver(NodeDriver):
|
|||||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||||
worked = failed = True
|
worked = failed = True
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(max_attempts)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
if running_subtasks > 0:
|
if running_subtasks > 0:
|
||||||
@ -817,13 +835,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
|
|||||||
|
|
||||||
# Poll machine status
|
# Poll machine status
|
||||||
attempts = 0
|
attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.configure_hardware * (60 // config.conf.maasdriver.poll_interval)
|
||||||
|
|
||||||
while attempts < config.conf.timeouts.configure_hardware and machine.status_name != 'Ready':
|
while attempts < max_attempts and machine.status_name != 'Ready':
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.maasdriver.poll_interval)
|
||||||
try:
|
try:
|
||||||
machine.refresh()
|
machine.refresh()
|
||||||
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
|
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
|
||||||
except:
|
except:
|
||||||
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
||||||
(n))
|
(n))
|
||||||
@ -1184,12 +1203,14 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
while attempts < config.conf.timeouts.deploy_node and not machine.status_name.startswith('Deployed'):
|
max_attempts = config.conf.timeouts.deploy_node * (60 // config.conf.maasdriver.poll_interval)
|
||||||
|
|
||||||
|
while attempts < max_attempts and not machine.status_name.startswith('Deployed'):
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.maasdriver.poll_interval)
|
||||||
try:
|
try:
|
||||||
machine.refresh()
|
machine.refresh()
|
||||||
self.logger.debug("Polling node %s status attempt %d: %s" % (n, attempts, machine.status_name))
|
self.logger.debug("Polling node %s status attempt %d of %d: %s" % (n, attempts, max_attempts, machine.status_name))
|
||||||
except:
|
except:
|
||||||
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
self.logger.warning("Error updating node %s status during commissioning, will re-attempt." %
|
||||||
(n))
|
(n))
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from oslo_config import cfg
|
||||||
|
|
||||||
from pyghmi.ipmi.command import Command
|
from pyghmi.ipmi.command import Command
|
||||||
from pyghmi.exceptions import IpmiException
|
from pyghmi.exceptions import IpmiException
|
||||||
|
|
||||||
@ -28,15 +30,20 @@ import drydock_provisioner.drivers as drivers
|
|||||||
|
|
||||||
|
|
||||||
class PyghmiDriver(oob.OobDriver):
|
class PyghmiDriver(oob.OobDriver):
|
||||||
|
pyghmi_driver_options = [
|
||||||
|
cfg.IntOpt('poll_interval', default=10, help='Polling interval in seconds for querying IPMI status'),
|
||||||
|
]
|
||||||
|
|
||||||
oob_types_supported = ['ipmi']
|
oob_types_supported = ['ipmi']
|
||||||
|
|
||||||
|
driver_name = "pyghmi_driver"
|
||||||
|
driver_key = "pyghmi_driver"
|
||||||
|
driver_desc = "Pyghmi OOB Driver"
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(PyghmiDriver, self).__init__(**kwargs)
|
super(PyghmiDriver, self).__init__(**kwargs)
|
||||||
|
|
||||||
self.driver_name = "pyghmi_driver"
|
config.conf.register_opts(PyghmiDriver.pyghmi_driver_options, group=PyghmiDriver.driver_key)
|
||||||
self.driver_key = "pyghmi_driver"
|
|
||||||
self.driver_desc = "Pyghmi OOB Driver"
|
|
||||||
|
|
||||||
self.logger = logging.getLogger("%s.%s" %
|
self.logger = logging.getLogger("%s.%s" %
|
||||||
(config.conf.logging.oobdriver_logger_name, self.driver_key))
|
(config.conf.logging.oobdriver_logger_name, self.driver_key))
|
||||||
@ -101,15 +108,15 @@ class PyghmiDriver(oob.OobDriver):
|
|||||||
runner.start()
|
runner.start()
|
||||||
|
|
||||||
attempts = 0
|
attempts = 0
|
||||||
while (len(incomplete_subtasks) > 0 and
|
max_attempts = getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout) * (60 / config.conf.pyghmi_driver.poll_interval)
|
||||||
attempts <= getattr(config.conf.timeouts, task.action, config.conf.timeouts.drydock_timeout)):
|
while (len(incomplete_subtasks) > 0 and attempts <= max_attempts):
|
||||||
for n in incomplete_subtasks:
|
for n in incomplete_subtasks:
|
||||||
t = self.state_manager.get_task(n)
|
t = self.state_manager.get_task(n)
|
||||||
if t.get_status() in [hd_fields.TaskStatus.Terminated,
|
if t.get_status() in [hd_fields.TaskStatus.Terminated,
|
||||||
hd_fields.TaskStatus.Complete,
|
hd_fields.TaskStatus.Complete,
|
||||||
hd_fields.TaskStatus.Errored]:
|
hd_fields.TaskStatus.Errored]:
|
||||||
incomplete_subtasks.remove(n)
|
incomplete_subtasks.remove(n)
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.pyghmi_driver.poll_interval)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
task = self.state_manager.get_task(task.get_id())
|
task = self.state_manager.get_task(task.get_id())
|
||||||
@ -382,3 +389,5 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
|
|||||||
time.sleep(15)
|
time.sleep(15)
|
||||||
attempts = attempts + 1
|
attempts = attempts + 1
|
||||||
|
|
||||||
|
def list_opts():
|
||||||
|
return {PyghmiDriver.driver_key: PyghmiDriver.pyghmi_driver_options}
|
||||||
|
@ -356,6 +356,7 @@ class Orchestrator(object):
|
|||||||
# Each attempt is a new task which might make the final task tree a bit confusing
|
# Each attempt is a new task which might make the final task tree a bit confusing
|
||||||
|
|
||||||
node_identify_attempts = 0
|
node_identify_attempts = 0
|
||||||
|
max_attempts = config.conf.timeouts.identify_node * (60 / config.conf.poll_interval)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
@ -379,11 +380,11 @@ class Orchestrator(object):
|
|||||||
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
|
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
|
||||||
hd_fields.ActionResult.Failure]:
|
hd_fields.ActionResult.Failure]:
|
||||||
# TODO This threshold should be a configurable default and tunable by task API
|
# TODO This threshold should be a configurable default and tunable by task API
|
||||||
if node_identify_attempts > 10:
|
if node_identify_attempts > max_attempts:
|
||||||
failed = True
|
failed = True
|
||||||
break
|
break
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(config.conf.poll_interval)
|
||||||
|
|
||||||
# We can only commission nodes that were successfully identified in the provisioner
|
# We can only commission nodes that were successfully identified in the provisioner
|
||||||
if len(node_identify_task.result_detail['successful_nodes']) > 0:
|
if len(node_identify_task.result_detail['successful_nodes']) > 0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user