Merge pull request #49764 from garethgreenaway/49520_multimaster_dns_issue_fix

[2018.3] Adding retry_dns_count to minion
This commit is contained in:
Nicole Thomas 2018-10-01 10:20:02 -04:00 committed by GitHub
commit ec978061aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 68 additions and 4 deletions

View file

@ -66,6 +66,11 @@
# Set to zero if the minion should shutdown and not retry.
# retry_dns: 30
# Set the number of times to attempt to resolve
# the master hostname if name resolution fails. Defaults to None,
# which will attempt the resolution indefinitely.
# retry_dns_count: 3
# Set the port used by the master reply and authentication server.
#master_port: 4506

View file

@ -307,6 +307,23 @@ Set to zero if the minion should shutdown and not retry.
retry_dns: 30
.. conf_minion:: retry_dns_count
``retry_dns_count``
-------------------
.. versionadded:: 2018.3.4
Default: ``None``
Set the number of attempts to perform when resolving
the master hostname if name resolution fails.
By default the minion will retry indefinitely.
.. code-block:: yaml
retry_dns_count: 3
.. conf_minion:: master_port
``master_port``

View file

@ -518,6 +518,7 @@ VALID_OPTS = {
# The number of seconds to sleep between retrying an attempt to resolve the hostname of a
# salt master
'retry_dns': float,
'retry_dns_count': (type(None), int),
# In the case when the resolve of the salt master hostname fails, fall back to localhost
'resolve_dns_fallback': bool,
@ -1397,6 +1398,7 @@ DEFAULT_MINION_OPTS = {
'update_url': False,
'update_restart_services': [],
'retry_dns': 30,
'retry_dns_count': None,
'resolve_dns_fallback': True,
'recon_max': 10000,
'recon_default': 1000,

View file

@ -96,6 +96,12 @@ class SaltSyndicMasterError(SaltException):
'''
class SaltMasterUnresolvableError(SaltException):
'''
Problem resolving the name of the Salt master
'''
class MasterExit(SystemExit):
'''
Rise when the master exits

View file

@ -115,6 +115,7 @@ from salt.exceptions import (
SaltSystemExit,
SaltDaemonNotRunning,
SaltException,
SaltMasterUnresolvableError
)
@ -154,8 +155,13 @@ def resolve_dns(opts, fallback=True):
True,
opts['ipv6'])
except SaltClientError:
retry_dns_count = opts.get('retry_dns_count', None)
if opts['retry_dns']:
while True:
if retry_dns_count is not None:
if retry_dns_count == 0:
raise SaltMasterUnresolvableError
retry_dns_count -= 1
import salt.log
msg = ('Master hostname: \'{0}\' not found or not responsive. '
'Retrying in {1} seconds').format(opts['master'], opts['retry_dns'])
@ -962,7 +968,17 @@ class MinionManager(MinionBase):
loaded_base_name=loaded_base_name,
jid_queue=jid_queue)
def _spawn_minions(self):
def _check_minions(self):
'''
Check the size of self.minions and raise an error if it's empty
'''
if not self.minions:
err = ('Minion unable to successfully connect to '
'a Salt Master. Exiting.')
log.error(err)
raise SaltSystemExit(code=42, msg=err)
def _spawn_minions(self, timeout=60):
'''
Spawn all the coroutines which will sign in to masters
'''
@ -981,8 +997,9 @@ class MinionManager(MinionBase):
loaded_base_name='salt.loader.{0}'.format(s_opts['master']),
jid_queue=self.jid_queue,
)
self.minions.append(minion)
self.io_loop.spawn_callback(self._connect_minion, minion)
self._connect_minion(minion)
self.io_loop.call_later(timeout, self._check_minions)
@tornado.gen.coroutine
def _connect_minion(self, minion):
@ -1000,6 +1017,7 @@ class MinionManager(MinionBase):
minion.setup_scheduler(before_connect=True)
yield minion.connect_master(failed=failed)
minion.tune_in(start=False)
self.minions.append(minion)
break
except SaltClientError as exc:
failed = True
@ -1011,6 +1029,11 @@ class MinionManager(MinionBase):
if auth_wait < self.max_auth_wait:
auth_wait += self.auth_wait
yield tornado.gen.sleep(auth_wait) # TODO: log?
except SaltMasterUnresolvableError:
err = 'Master address: \'{0}\' could not be resolved. Invalid or unresolveable address. ' \
'Set \'master\' value in minion config.'.format(minion.opts['master'])
log.error(err)
break
except Exception as e:
failed = True
log.critical(

View file

@ -16,7 +16,7 @@ from tests.support.helpers import skip_if_not_root
# Import salt libs
import salt.minion
import salt.utils.event as event
from salt.exceptions import SaltSystemExit
from salt.exceptions import SaltSystemExit, SaltMasterUnresolvableError
import salt.syspaths
import tornado
import tornado.testing
@ -282,6 +282,17 @@ class MinionTestCase(TestCase, AdaptedConfigurationTestCaseMixin):
finally:
minion.destroy()
def test_minion_retry_dns_count(self):
'''
Tests that the resolve_dns will retry dns look ups for a maximum of
3 times before raising a SaltMasterUnresolvableError exception.
'''
with patch.dict(__opts__, {'ipv6': False, 'master': 'dummy',
'master_port': '4555',
'retry_dns': 1, 'retry_dns_count': 3}):
self.assertRaises(SaltMasterUnresolvableError,
salt.minion.resolve_dns, __opts__)
@skipIf(NO_MOCK, NO_MOCK_REASON)
class MinionAsyncTestCase(TestCase, AdaptedConfigurationTestCaseMixin, tornado.testing.AsyncTestCase):