swift-init: New option kill-after-timeout
This option send SIGKILL to daemon after kill_wait period. When daemon hangs and doesn't respond to SIGTERM/SIGHUP there is no way to stop it using swift-init now. Classic init scripts in Linux kills hanged process after grace period and this patch add same behaviour. This is most usefull when using "restart" on hanged daemon. Change-Id: I8c932b673a0f51e52132df87ea2f4396f4bba9d8
This commit is contained in:
parent
dafeea6322
commit
3b1591f235
@ -74,6 +74,11 @@ def main():
|
||||
help="Return zero status code even if some config is "
|
||||
"missing. Default mode if any server is a glob or "
|
||||
"one of aliases `all`, `main` or `rest`.")
|
||||
# SIGKILL daemon after kill_wait period
|
||||
parser.add_option('--kill-after-timeout', dest='kill_after_timeout',
|
||||
action='store_true',
|
||||
help="Kill daemon and all childs after kill-wait "
|
||||
"period.")
|
||||
|
||||
options, args = parser.parse_args()
|
||||
|
||||
|
@ -111,6 +111,7 @@ allows one to use the keywords such as "all", "main" and "rest" for the <server>
|
||||
.IP "-r RUN_DIR, --run-dir=RUN_DIR directory where the pids will be stored (default /var/run/swift)
|
||||
.IP "--strict return non-zero status code if some config is missing. Default mode if server is explicitly named."
|
||||
.IP "--non-strict return zero status code even if some config is missing. Default mode if server is one of aliases `all`, `main` or `rest`."
|
||||
.IP "--kill-after-timeout kill daemon and all childs after kill-wait period."
|
||||
.PD
|
||||
.RE
|
||||
|
||||
|
@ -162,6 +162,16 @@ def safe_kill(pid, sig, name):
|
||||
os.kill(pid, sig)
|
||||
|
||||
|
||||
def kill_group(pid, sig):
|
||||
"""Send signal to process group
|
||||
|
||||
: param pid: process id
|
||||
: param sig: signal to send
|
||||
"""
|
||||
# Negative PID means process group
|
||||
os.kill(-pid, sig)
|
||||
|
||||
|
||||
class UnknownCommandError(Exception):
|
||||
pass
|
||||
|
||||
@ -285,11 +295,27 @@ class Manager(object):
|
||||
return 0
|
||||
|
||||
# reached interval n watch_pids w/o killing all servers
|
||||
kill_after_timeout = kwargs.get('kill_after_timeout', False)
|
||||
for server, pids in server_pids.items():
|
||||
if not killed_pids.issuperset(pids):
|
||||
# some pids of this server were not killed
|
||||
print(_('Waited %s seconds for %s to die; giving up') % (
|
||||
kill_wait, server))
|
||||
if kill_after_timeout:
|
||||
print(_('Waited %s seconds for %s to die; killing') % (
|
||||
kill_wait, server))
|
||||
# Send SIGKILL to all remaining pids
|
||||
for pid in set(pids.keys()) - killed_pids:
|
||||
print(_('Signal %s pid: %s signal: %s') % (
|
||||
server, pid, signal.SIGKILL))
|
||||
# Send SIGKILL to process group
|
||||
try:
|
||||
kill_group(pid, signal.SIGKILL)
|
||||
except OSError as e:
|
||||
# PID died before kill_group can take action?
|
||||
if e.errno != errno.ESRCH:
|
||||
raise e
|
||||
else:
|
||||
print(_('Waited %s seconds for %s to die; giving up') % (
|
||||
kill_wait, server))
|
||||
return 1
|
||||
|
||||
@command
|
||||
|
@ -1916,13 +1916,18 @@ class TestManager(unittest.TestCase):
|
||||
continue
|
||||
yield server, pid
|
||||
|
||||
def mock_kill_group(pid, sig):
|
||||
self.fail('kill_group should not be called')
|
||||
|
||||
_orig_server = manager.Server
|
||||
_orig_watch_server_pids = manager.watch_server_pids
|
||||
_orig_kill_group = manager.kill_group
|
||||
try:
|
||||
manager.watch_server_pids = mock_watch_server_pids
|
||||
manager.kill_group = mock_kill_group
|
||||
# test stop one server
|
||||
server_pids = {
|
||||
'test': [1]
|
||||
'test': {1: "dummy.pid"}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
@ -1930,7 +1935,7 @@ class TestManager(unittest.TestCase):
|
||||
self.assertEqual(status, 0)
|
||||
# test not running
|
||||
server_pids = {
|
||||
'test': []
|
||||
'test': {}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
@ -1938,7 +1943,7 @@ class TestManager(unittest.TestCase):
|
||||
self.assertEqual(status, 1)
|
||||
# test kill not running
|
||||
server_pids = {
|
||||
'test': []
|
||||
'test': {}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
@ -1946,7 +1951,7 @@ class TestManager(unittest.TestCase):
|
||||
self.assertEqual(status, 0)
|
||||
# test won't die
|
||||
server_pids = {
|
||||
'test': [None]
|
||||
'test': {None: None}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
@ -1956,6 +1961,83 @@ class TestManager(unittest.TestCase):
|
||||
finally:
|
||||
manager.Server = _orig_server
|
||||
manager.watch_server_pids = _orig_watch_server_pids
|
||||
manager.kill_group = _orig_kill_group
|
||||
|
||||
def test_stop_kill_after_timeout(self):
|
||||
class MockServerFactory(object):
|
||||
class MockServer(object):
|
||||
def __init__(self, pids, run_dir=manager.RUN_DIR):
|
||||
self.pids = pids
|
||||
|
||||
def stop(self, **kwargs):
|
||||
return self.pids
|
||||
|
||||
def status(self, **kwargs):
|
||||
return not self.pids
|
||||
|
||||
def __init__(self, server_pids, run_dir=manager.RUN_DIR):
|
||||
self.server_pids = server_pids
|
||||
|
||||
def __call__(self, server, run_dir=manager.RUN_DIR):
|
||||
return MockServerFactory.MockServer(self.server_pids[server])
|
||||
|
||||
def mock_watch_server_pids(server_pids, **kwargs):
|
||||
for server, pids in server_pids.items():
|
||||
for pid in pids:
|
||||
if pid is None:
|
||||
continue
|
||||
yield server, pid
|
||||
|
||||
mock_kill_group_called = []
|
||||
|
||||
def mock_kill_group(*args):
|
||||
mock_kill_group_called.append(args)
|
||||
|
||||
def mock_kill_group_oserr(*args):
|
||||
raise OSError()
|
||||
|
||||
def mock_kill_group_oserr_ESRCH(*args):
|
||||
raise OSError(errno.ESRCH, 'No such process')
|
||||
|
||||
_orig_server = manager.Server
|
||||
_orig_watch_server_pids = manager.watch_server_pids
|
||||
_orig_kill_group = manager.kill_group
|
||||
try:
|
||||
manager.watch_server_pids = mock_watch_server_pids
|
||||
manager.kill_group = mock_kill_group
|
||||
# test stop one server
|
||||
server_pids = {
|
||||
'test': {None: None}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
status = m.stop(kill_after_timeout=True)
|
||||
self.assertEqual(status, 1)
|
||||
self.assertEqual(mock_kill_group_called, [(None, 9)])
|
||||
|
||||
manager.kill_group = mock_kill_group_oserr
|
||||
# test stop one server - OSError
|
||||
server_pids = {
|
||||
'test': {None: None}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
with self.assertRaises(OSError):
|
||||
status = m.stop(kill_after_timeout=True)
|
||||
|
||||
manager.kill_group = mock_kill_group_oserr_ESRCH
|
||||
# test stop one server - OSError: No such process
|
||||
server_pids = {
|
||||
'test': {None: None}
|
||||
}
|
||||
manager.Server = MockServerFactory(server_pids)
|
||||
m = manager.Manager(['test'])
|
||||
status = m.stop(kill_after_timeout=True)
|
||||
self.assertEqual(status, 1)
|
||||
finally:
|
||||
manager.Server = _orig_server
|
||||
manager.watch_server_pids = _orig_watch_server_pids
|
||||
manager.kill_group = _orig_kill_group
|
||||
|
||||
# TODO(clayg): more tests
|
||||
def test_shutdown(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user