32fbf9eaae
There's an upstream eventlet bug that seems to cause process hangs during an atexit hook; unfortunately, that means that every time we call "once" in probe tests, we can hang indefinitely waiting for a process that won't terminate. See https://github.com/eventlet/eventlet/issues/989 Now, wait with a timeout; if it pops, kill the offending process and hope for the best. Do this by patching out subprocess.Popen.wait, but only in probe tests -- this ensures that we won't impact any real systems, while also ensuring a broad coverage of probe tests (as opposed to, say, plumbing some new wait_timeout kwarg into all the Manager call sites). Closes-Bug: #2088027 Change-Id: I8983eafbb575d73d1654c354815a7de7ae141873
57 lines
2.1 KiB
Python
57 lines
2.1 KiB
Python
# Copyright (c) 2010-2017 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
import eventlet
|
|
eventlet.monkey_patch()
|
|
|
|
import subprocess
|
|
from test import get_config
|
|
from swift.common.utils import config_true_value
|
|
|
|
|
|
config = get_config('probe_test')
|
|
CHECK_SERVER_TIMEOUT = int(config.get('check_server_timeout', 30))
|
|
SUBPROCESS_WAIT_TIMEOUT = int(config.get('subprocess_wait_timeout',
|
|
CHECK_SERVER_TIMEOUT))
|
|
VALIDATE_RSYNC = config_true_value(config.get('validate_rsync', False))
|
|
PROXY_BASE_URL = config.get('proxy_base_url')
|
|
if PROXY_BASE_URL is None:
|
|
# TODO: find and load an "appropriate" proxy-server.conf(.d), piece
|
|
# something together from bind_ip, bind_port, and cert_file
|
|
PROXY_BASE_URL = 'http://127.0.0.1:8080'
|
|
orig_popen_wait = subprocess.Popen.wait
|
|
|
|
|
|
def wait_with_timeout(self, timeout=None, check_interval=0.01):
|
|
# We want to always have a timeout; no probe test should need to wait
|
|
# on even minute-long running processes.
|
|
timeout = SUBPROCESS_WAIT_TIMEOUT if timeout is None else timeout
|
|
try:
|
|
return orig_popen_wait(
|
|
self, timeout=timeout, check_interval=check_interval)
|
|
except subprocess.TimeoutExpired:
|
|
# Assume we tripped https://github.com/eventlet/eventlet/issues/989
|
|
# Kill the process (it should be mid-shutdown anyway) and log about it
|
|
print('WARNING: killing long running daemon after %ss: %r'
|
|
% (timeout, self.args))
|
|
self.kill()
|
|
# return 128 + 9 = 137 which is same as if using a command line like
|
|
# 'timeout -s KILL <timeout> <command>'
|
|
return 137
|
|
|
|
|
|
subprocess.Popen.wait = wait_with_timeout
|