diff --git a/setup.cfg b/setup.cfg index 650a5e04cb..de284d5276 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,6 +91,7 @@ keystone = console_scripts = swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main swift-container-deleter = swift.cli.container_deleter:main + swift-reload = swift.cli.reload:main paste.app_factory = proxy = swift.proxy.server:app_factory diff --git a/swift/cli/reload.py b/swift/cli/reload.py new file mode 100755 index 0000000000..6e822f2f21 --- /dev/null +++ b/swift/cli/reload.py @@ -0,0 +1,141 @@ +# Copyright (c) 2022 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Safely reload WSGI servers while minimizing client downtime and errors by + + * validating that the process is a Swift WSGI server manager, + * checking that the configuration file used is valid, + * sending the "seamless reload" signal, and + * waiting for the reload to complete. +""" + +from __future__ import print_function +import argparse +import errno +import os +import os.path +import signal +import subprocess +import sys +import time + +from swift.common.manager import get_child_pids + + +EXIT_BAD_PID = 2 # similar to argparse exiting 2 on an unknown arg +EXIT_RELOAD_FAILED = 1 +EXIT_RELOAD_TIMEOUT = 128 + errno.ETIMEDOUT + + +def validate_manager_pid(pid): + try: + with open('/proc/%d/cmdline' % pid, 'r') as fp: + cmd = fp.read().strip('\x00').split('\x00') + sid = os.getsid(pid) + except (IOError, OSError): + print("Failed to get process information for %s" % pid, + file=sys.stderr) + exit(EXIT_BAD_PID) + + scripts = [os.path.basename(c) for c in cmd + if '/bin/' in c and '/bin/python' not in c] + + if len(scripts) != 1 or not scripts[0].startswith("swift-"): + print("Non-swift process: %r" % ' '.join(cmd), file=sys.stderr) + exit(EXIT_BAD_PID) + + if scripts[0] not in {"swift-proxy-server", "swift-account-server", + "swift-container-server", "swift-object-server"}: + print("Process does not support config checks: %s" % scripts[0], + file=sys.stderr) + exit(EXIT_BAD_PID) + + if sid != pid: + print("Process appears to be a %s worker, not a manager. " + "Did you mean %s?" % (scripts[0], sid), file=sys.stderr) + exit(EXIT_BAD_PID) + + return cmd, scripts[0] + + +def main(args=None): + parser = argparse.ArgumentParser(__doc__) + parser.add_argument("pid", type=int, + help="server PID which should be reloaded") + wait_group = parser.add_mutually_exclusive_group() + wait_group.add_argument("-t", "--timeout", type=float, default=300.0, + help="max time to wait for reload to complete") + wait_group.add_argument("-w", "--no-wait", + action="store_false", dest="wait", + help="skip waiting for reload to complete") + parser.add_argument("-v", "--verbose", action="store_true", + help="display more information as the process reloads") + args = parser.parse_args(args) + + cmd, script = validate_manager_pid(args.pid) + + if args.verbose: + print("Checking config for %s" % script) + try: + subprocess.check_call(cmd + ["--test-config"]) + except subprocess.CalledProcessError: + print("Failed to validate config", file=sys.stderr) + exit(EXIT_RELOAD_FAILED) + + if args.wait: + try: + original_children = get_child_pids(args.pid) + children_since_reload = set() + + if args.verbose: + print("Sending USR1 signal") + os.kill(args.pid, signal.SIGUSR1) + + start = time.time() + while time.time() - start < args.timeout: + children = get_child_pids(args.pid) + new_children = (children - original_children + - children_since_reload) + if new_children: + if args.verbose: + print("Found new children: %s" % ", ".join( + str(pid) for pid in new_children)) + children_since_reload |= new_children + if children_since_reload - children: + # At least one new child exited; presumably, it was + # the temporary child waiting to shutdown sockets + break + # We want this to be fairly low, since the temporary child + # may not hang around very long + time.sleep(0.1) + else: + print("Timed out reloading %s" % script, file=sys.stderr) + exit(EXIT_RELOAD_TIMEOUT) + + except subprocess.CalledProcessError: + # This could pop during any of the calls to get_child_pids + print("Process seems to have died!", file=sys.stderr) + exit(EXIT_RELOAD_FAILED) + else: # --no-wait + if args.verbose: + print("Sending USR1 signal") + os.kill(args.pid, signal.SIGUSR1) + + print("Reloaded %s" % script) + + +if __name__ == "__main__": + main() diff --git a/swift/common/manager.py b/swift/common/manager.py index a512cd0d39..f16a0fad9e 100644 --- a/swift/common/manager.py +++ b/swift/common/manager.py @@ -180,6 +180,17 @@ def kill_group(pid, sig): os.kill(-pid, sig) +def get_child_pids(pid): + """ + Get the current set of all child PIDs for a PID. + + :param pid: process id + """ + output = subprocess.check_output( + ["ps", "--ppid", str(pid), "--no-headers", "-o", "pid"]) + return {int(pid) for pid in output.split()} + + def format_server_name(servername): """ Formats server name as swift compatible server names @@ -700,9 +711,7 @@ class Server(object): print('Removing pid file %s with invalid pid' % pid_file) remove_file(pid_file) continue - ps_cmd = ['ps', '--ppid', str(pid), '--no-headers', '-o', 'pid'] - for pid in subprocess.check_output(ps_cmd).split(): - pid = int(pid) + for pid in get_child_pids(pid): if self._signal_pid(sig, pid, pid_file, kwargs.get('verbose')): pids[pid] = pid_file return pids diff --git a/test/unit/cli/test_reload.py b/test/unit/cli/test_reload.py new file mode 100644 index 0000000000..33fbc15a67 --- /dev/null +++ b/test/unit/cli/test_reload.py @@ -0,0 +1,220 @@ +# Copyright (c) 2022 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import signal +import six +import subprocess +import unittest + +from six.moves import StringIO +from swift.cli import reload + + +@mock.patch('sys.stderr', new_callable=StringIO) +class TestValidateManagerPid(unittest.TestCase): + def test_good(self, mock_stderr): + cmd_args = [ + '/usr/local/bin/python3.9', + '/usr/local/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf', + 'some', + 'extra', + 'args', + ] + with mock.patch.object(reload, 'open', mock.mock_open( + read_data='\x00'.join(cmd_args) + '\x00' + )) as mock_open, mock.patch('os.getsid', return_value=123): + self.assertEqual(reload.validate_manager_pid(123), ( + cmd_args, + 'swift-proxy-server', + )) + self.assertEqual(mock_open.mock_calls[0], + mock.call('/proc/123/cmdline', 'r')) + + def test_open_error(self, mock_stderr): + with mock.patch.object(reload, 'open', side_effect=OSError), \ + self.assertRaises(SystemExit) as caught: + reload.validate_manager_pid(123) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + self.assertEqual(mock_stderr.getvalue(), + 'Failed to get process information for 123\n') + + def test_non_python(self, mock_stderr): + with mock.patch.object(reload, 'open', mock.mock_open( + read_data='/usr/bin/rsync\x00' + )), mock.patch('os.getsid', return_value=56), \ + self.assertRaises(SystemExit) as caught: + reload.validate_manager_pid(56) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + self.assertEqual(mock_stderr.getvalue(), + "Non-swift process: '/usr/bin/rsync'\n") + + def test_non_swift(self, mock_stderr): + with mock.patch.object(reload, 'open', mock.mock_open( + read_data='/usr/bin/python\x00some-script\x00' + )), mock.patch('os.getsid', return_value=123), \ + self.assertRaises(SystemExit) as caught: + reload.validate_manager_pid(123) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + self.assertEqual(mock_stderr.getvalue(), + "Non-swift process: '/usr/bin/python some-script'\n") + + def test_worker(self, mock_stderr): + cmd_args = [ + '/usr/bin/python3.9', + '/usr/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf', + ] + with mock.patch.object(reload, 'open', mock.mock_open( + read_data='\x00'.join(cmd_args) + '\x00' + )) as mock_open, mock.patch('os.getsid', return_value=123), \ + self.assertRaises(SystemExit) as caught: + reload.validate_manager_pid(56) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + self.assertEqual(mock_stderr.getvalue(), + 'Process appears to be a swift-proxy-server worker, ' + 'not a manager. Did you mean 123?\n') + self.assertEqual(mock_open.mock_calls[0], + mock.call('/proc/56/cmdline', 'r')) + + def test_non_server(self, mock_stderr): + cmd_args = [ + '/usr/bin/swift-ring-builder', + '/etc/swift/object.builder', + 'rebalance', + ] + with mock.patch.object(reload, 'open', mock.mock_open( + read_data='\x00'.join(cmd_args) + '\x00' + )) as mock_open, mock.patch('os.getsid', return_value=123), \ + self.assertRaises(SystemExit) as caught: + reload.validate_manager_pid(123) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + self.assertEqual(mock_stderr.getvalue(), + 'Process does not support config checks: ' + 'swift-ring-builder\n') + self.assertEqual(mock_open.mock_calls[0], + mock.call('/proc/123/cmdline', 'r')) + + +class TestMain(unittest.TestCase): + def setUp(self): + patcher = mock.patch('sys.stderr', new_callable=StringIO) + self.mock_stderr = patcher.start() + self.addCleanup(patcher.stop) + + patcher = mock.patch('subprocess.check_call') + self.mock_check_call = patcher.start() + self.addCleanup(patcher.stop) + + patcher = mock.patch.object(reload, 'validate_manager_pid') + self.mock_validate = patcher.start() + self.addCleanup(patcher.stop) + + patcher = mock.patch.object(reload, 'get_child_pids') + self.mock_get_child_pids = patcher.start() + self.addCleanup(patcher.stop) + + patcher = mock.patch('os.kill') + self.mock_kill = patcher.start() + self.addCleanup(patcher.stop) + + def test_good(self): + self.mock_validate.return_value = ( + [ + '/usr/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf' + ], + 'swift-proxy-server', + ) + self.mock_get_child_pids.side_effect = [ + {'worker1', 'worker2'}, + {'worker1', 'worker2', 'foster parent'}, + {'worker1', 'worker2', 'foster parent', 'new worker'}, + {'worker1', 'worker2', 'new worker'}, + ] + self.assertIsNone(reload.main(['123'])) + self.assertEqual(self.mock_check_call.mock_calls, [mock.call([ + '/usr/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf', + '--test-config', + ])]) + self.assertEqual(self.mock_kill.mock_calls, [ + mock.call(123, signal.SIGUSR1), + ]) + + @mock.patch('time.time', side_effect=[1, 10, 100, 400]) + def test_timeout(self, mock_time): + self.mock_validate.return_value = ( + [ + '/usr/bin/python3', + '/usr/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf' + ], + 'swift-proxy-server', + ) + self.mock_get_child_pids.side_effect = [ + {'worker1', 'worker2'}, + {'worker1', 'worker2', 'foster parent'}, + {'worker1', 'worker2', 'foster parent', 'new worker'}, + ] + with self.assertRaises(SystemExit) as caught: + reload.main(['123']) + self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_TIMEOUT,)) + self.assertEqual(self.mock_check_call.mock_calls, [mock.call([ + '/usr/bin/python3', + '/usr/bin/swift-proxy-server', + '/etc/swift/proxy-server.conf', + '--test-config', + ])]) + self.assertEqual(self.mock_kill.mock_calls, [ + mock.call(123, signal.SIGUSR1), + ]) + self.assertEqual(self.mock_stderr.getvalue(), + 'Timed out reloading swift-proxy-server\n') + + def test_check_failed(self): + self.mock_validate.return_value = ( + [ + '/usr/bin/python3', + '/usr/bin/swift-object-server', + '/etc/swift/object-server/1.conf' + ], + 'swift-object-server', + ) + self.mock_check_call.side_effect = subprocess.CalledProcessError( + 2, 'swift-object-server') + with self.assertRaises(SystemExit) as caught: + reload.main(['123']) + self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_FAILED,)) + self.assertEqual(self.mock_check_call.mock_calls, [mock.call([ + '/usr/bin/python3', + '/usr/bin/swift-object-server', + '/etc/swift/object-server/1.conf', + '--test-config', + ])]) + self.assertEqual(self.mock_kill.mock_calls, []) + + def test_needs_pid(self): + with self.assertRaises(SystemExit) as caught: + reload.main([]) + self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,)) + msg = 'usage: \nSafely reload WSGI servers' + self.assertEqual(self.mock_stderr.getvalue()[:len(msg)], msg) + if six.PY2: + msg = '\n: error: too few arguments\n' + else: + msg = '\n: error: the following arguments are required: pid\n' + self.assertEqual(self.mock_stderr.getvalue()[-len(msg):], msg)