Add ssh_retry connection plugin

The default ssh connection plugin will cause a task to fail if a
connection cannot be made first time. The failures have been found to
cause a number of builds to fail.

This patch adds a new connection plugin called ssh_retry and sets it as
the default one to use.

The plugin can be enabled by setting the following options in
ansible.cfg:

    [defaults]
    connection_plugins = plugins/connection_plugins
    transport = ssh_retry

    [ssh_retry]
    retries = 3

Note, the default retries is 3.

Change-Id: Ic187fb154cfa7b6fa95b19bee4757ec976f3f368
Co-Authored-By: Hugh Saunders <hugh@wherenow.org>
Closes-Bug: #1404343
This commit is contained in:
git-harry 2015-02-04 14:41:35 +00:00
parent 81c4ab04f7
commit e9f7a0dec1
2 changed files with 78 additions and 0 deletions

View File

@ -11,5 +11,12 @@ forks = 15
# SSH timeout
timeout = 120
# ssh_retry connection plugin
connection_plugins = plugins/connection_plugins
transport = ssh_retry
# [ssh_retry]
# retries = 3
[ssh_connection]
pipelining = True

View File

@ -0,0 +1,71 @@
# (c) 2012, Michael DeHaan <michael.dehaan@gmail.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
#
import time
import ansible.constants as C
from ansible.callbacks import vvv, display
from ansible.runner.connection_plugins import ssh as base_ssh
class Connection(base_ssh.Connection):
'''SSH connections with retries on failure'''
def exec_command(self, *args, **kwargs):
""" Wrapper around _exec_command to retry in the case of an ssh
failure
Will retry if:
* an exception is caught
* ssh returns 255
Will not retry if
* remaining_tries is <2
* retries limit reached
"""
remaining_tries = C.get_config(
C.p, 'ssh_retry', 'retries',
'ANSIBLE_SSH_RETRY_RETRIES', 3, integer=True) + 1
cmd_summary = "%s %s..." % (args[0], str(kwargs)[:200])
for attempt in xrange(remaining_tries):
pause = 2 ** attempt - 1
if pause > 30:
pause = 30
time.sleep(pause)
try:
return_tuple = super(Connection, self).exec_command(*args,
**kwargs)
except Exception as e:
msg = ("ssh_retry: attempt: %d, caught exception(%s) from cmd "
"(%s).") % (attempt, e, cmd_summary)
display(msg, color='blue')
if attempt == remaining_tries - 1:
raise e
else:
continue
# 0 = success
# 1-254 = remote command return code
# 255 = failure from the ssh command itself
if return_tuple[0] != 255:
break
else:
msg = ('ssh_retry: attempt: %d, ssh return code is 255. cmd '
'(%s).') % (attempt, cmd_summary)
display(msg, color='blue')
return return_tuple