Do not silently ignore exceptions when running next steps
Currently if do_next_deploy/clean_step fails, the failure is swallowed by eventlet since they're run in a new thread. This 1) is incorrect, 2) leads to nodes stuck in DEPLOYING/CLEANING. Also update logging in agent_base to be able to easier spot similar problems. Change-Id: I0282c9e06c54a173efc666cd8df25cf573afb394
This commit is contained in:
parent
ac19e6050d
commit
0c3f52ec9a
@ -126,6 +126,8 @@ def do_node_clean(task, clean_steps=None):
|
||||
do_next_clean_step(task, step_index)
|
||||
|
||||
|
||||
@utils.fail_on_error(utils.deploying_error_handler,
|
||||
_("Unexpected error when processing next clean step"))
|
||||
@task_manager.require_exclusive_lock
|
||||
def do_next_clean_step(task, step_index):
|
||||
"""Do cleaning, starting from the specified clean step.
|
||||
|
@ -204,6 +204,8 @@ def do_node_deploy(task, conductor_id=None, configdrive=None):
|
||||
do_next_deploy_step(task, 0, conductor_id)
|
||||
|
||||
|
||||
@utils.fail_on_error(utils.deploying_error_handler,
|
||||
_("Unexpected error when processing next deploy step"))
|
||||
@task_manager.require_exclusive_lock
|
||||
def do_next_deploy_step(task, step_index, conductor_id):
|
||||
"""Do deployment, starting from the specified deploy step.
|
||||
|
@ -16,6 +16,7 @@ import contextlib
|
||||
import crypt
|
||||
import datetime
|
||||
from distutils.version import StrictVersion
|
||||
import functools
|
||||
import os
|
||||
import secrets
|
||||
import time
|
||||
@ -563,6 +564,21 @@ def deploying_error_handler(task, logmsg, errmsg=None, traceback=False,
|
||||
task.process_event('fail')
|
||||
|
||||
|
||||
def fail_on_error(error_callback, msg, *error_args, **error_kwargs):
|
||||
"""A decorator for failing operation on failure."""
|
||||
def wrapper(func):
|
||||
@functools.wraps(func)
|
||||
def wrapped(task, *args, **kwargs):
|
||||
try:
|
||||
return func(task, *args, **kwargs)
|
||||
except Exception as exc:
|
||||
errmsg = "%s. %s: %s" % (msg, exc.__class__.__name__, exc)
|
||||
error_callback(task, errmsg, *error_args, **error_kwargs)
|
||||
|
||||
return wrapped
|
||||
return wrapper
|
||||
|
||||
|
||||
@task_manager.require_exclusive_lock
|
||||
def abort_on_conductor_take_over(task):
|
||||
"""Set node's state when a task was aborted due to conductor take over.
|
||||
|
5
releasenotes/notes/spawn-error-2249f94606388fbd.yaml
Normal file
5
releasenotes/notes/spawn-error-2249f94606388fbd.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
No longer silently ignores exceptions that happen when trying to run the
|
||||
next clean or deploy step.
|
Loading…
x
Reference in New Issue
Block a user