Backoff when minion fails to connect

NOTE: This is a potential change --- I haven't actually gotten a
test environment to try this out.

This adds a configuration option to the minion,
`acceptance_wait_time_max', that if set changes the acceptance
wait loop to add `acceptance_wait_time' seconds to the wait between
each iteration, up to this maximum value.

POTENTIAL FIX: Issue #5813
This commit is contained in:
Thomas L. Kula 2013-07-27 16:51:38 -04:00
parent 60728c72f7
commit 54a0746b4e
6 changed files with 53 additions and 1 deletions

View file

@ -98,6 +98,11 @@
# seconds, between those reconnection attempts.
#acceptance_wait_time: 10
# If this is set, the time between reconnection attempts will increase by
# acceptance_wait_time seconds per iteration, up to this maximum. If this
# is not set, the time between reconnection attempts will stay constant.
# acceptance_wait_time_max: None
# When the master-key changes, the minion will try to re-auth itself to
# receive the new master key. In larger environments this can cause a
# syn-flood on the master because all minions try to re-auth immediately.

View file

@ -47436,6 +47436,11 @@ def get_file(path, dest, env=\(aqbase\(aq):
# seconds, between those reconnection attempts.
#acceptance_wait_time: 10
# If this is set, the time between reconnection attempts will increase by
# acceptance_wait_time seconds per iteration, up to this maximum. If this
# is not set, the time between reconnection attempts will stay constant.
# acceptance_wait_time_max: None
# The loop_interval sets how long in seconds the minion will wait between
# evaluating the scheduler and running cleanup tasks. This defaults to a
# sane 60 seconds, but if the minion scheduler needs to be evaluated more
@ -48620,6 +48625,19 @@ master.
acceptance_wait_time: 10
.ft P
.fi
.SS \fBacceptance_wait_time_max\fP
.sp
Default: \fBNone\fP
.sp
The maximum number of seconds to wait until attempting to re\-authenticate
with the master. If set, the wait will increase by \fBacceptance_wait_time\fP
seconds each iteration.
.sp
.nf
.ft C
acceptance_wait_time_max: None
.ft P
.fi
.SS \fBdns_check\fP
.sp
Default: \fBTrue\fP

View file

@ -212,6 +212,21 @@ master.
acceptance_wait_time: 10
.. conf_minion:: acceptance_wait_time_max
``acceptance_wait_time_max``
------------------------
Default: ``None``
The maximum number of seconds to wait until attempting to re\-authenticate
with the master. If set, the wait will increase by acceptance_wait_time
seconds each iteration.
.. code-block:: yaml
acceptance_wait_time_max: None
.. conf_minion:: dns_check
``dns_check``

View file

@ -91,6 +91,11 @@ pki_dir: /conf/pki/minion
# seconds, between those reconnection attempts.
#acceptance_wait_time: 10
# If this is set, the time between reconnection attempts will increase by
# acceptance_wait_time seconds per iteration, up to this maximum. If this
# is not set, the time between reconnection attempts will stay constant.
#acceptance_wait_time_max: None
# When healing, a dns_check is run. This is to make sure that the originally
# resolved dns has not changed. If this is something that does not happen in
# your environment, set this value to False.

View file

@ -91,6 +91,7 @@ VALID_OPTS = {
'state_verbose': bool,
'state_output': str,
'acceptance_wait_time': float,
'acceptance_wait_time_max': float,
'loop_interval': float,
'dns_check': bool,
'verify_env': bool,
@ -214,6 +215,7 @@ DEFAULT_MINION_OPTS = {
'state_output': 'full',
'state_auto_order': False,
'acceptance_wait_time': 10,
'acceptance_wait_time_max': None,
'loop_interval': 1,
'dns_check': True,
'verify_env': True,

View file

@ -839,13 +839,20 @@ class Minion(object):
)
)
auth = salt.crypt.Auth(self.opts)
acceptance_wait_time = self.opts['acceptance_wait_time']
acceptance_wait_time_max = self.opts['acceptance_wait_time_max']
if acceptance_wait_time_max is None:
acceptance_wait_time_max = acceptance_wait_time
while True:
creds = auth.sign_in(timeout, safe)
if creds != 'retry':
log.info('Authentication with master successful!')
break
log.info('Waiting for minion key to be accepted by the master.')
time.sleep(self.opts['acceptance_wait_time'])
time.sleep(acceptance_wait_time)
if acceptance_wait_time < acceptance_wait_time_max:
acceptance_wait_time += acceptance_wait_time
log.debug('Authentication wait time is {0}'.format( acceptance_wait_time ))
self.aes = creds['aes']
self.publish_port = creds['publish_port']
self.crypticle = salt.crypt.Crypticle(self.opts, self.aes)