Fix master failover and add documentation for same. Factor in syndics. Syndics will not failover (yet).

This commit is contained in:
C. R. Oldham 2015-06-23 10:56:18 -06:00
parent 1650233be9
commit b9a756ff5f
2 changed files with 63 additions and 51 deletions

View file

@ -296,7 +296,7 @@ VALID_OPTS = {
DEFAULT_MINION_OPTS = {
'interface': '0.0.0.0',
'master': 'salt',
'master_type': 'str',
'master_type': 'standard',
'master_port': '4506',
'master_finger': '',
'master_shuffle': False,

View file

@ -773,7 +773,7 @@ class Minion(MinionBase):
(possibly failed) master will then be removed from the list of masters.
'''
# check if master_type was altered from its default
if opts['master_type'] != 'str':
if opts['master_type'] != 'standard' and opts['__role'] != 'syndic':
# check for a valid keyword
if opts['master_type'] == 'func':
# split module and function and try loading the module
@ -791,31 +791,30 @@ class Minion(MinionBase):
sys.exit(salt.defaults.exitcodes.EX_GENERIC)
log.info('Evaluated master from module: {0}'.format(master_mod))
# if failover is set, master has to be of type list
# if failover is set, the first time through, opts['master'] is a list.
elif opts['master_type'] == 'failover':
if isinstance(opts['master'], list):
if failed:
log.info('Removing possibly failed master {0} from list of'
' masters'.format(opts['master']))
# create new list of master with the possibly failed one removed
opts['master_active_list'] = [x for x in opts['master_active_list'] if opts['master'] != x]
elif isinstance(opts['master'], list):
log.info('Got list of available master addresses:'
' {0}'.format(opts['master']))
if opts['master_shuffle']:
shuffle(opts['master'])
# if opts['master'] is a str and we have never created opts['master_list']
elif isinstance(opts['master'], str) and ('master_list' not in opts):
opts['master_list'] = opts['master']
opts['master_active_list'] = opts['master']
if opts.get('master_shuffle'):
shuffle(opts['master_list'])
elif isinstance(opts['master'], str):
# We have a string, but a list was what was intended. Convert.
# See issue 23611 for details
opts['master'] = [opts['master']]
elif opts['__role'] == 'syndic':
log.info('Syndic setting master_syndic to \'{0}\''.format(opts['master']))
opts['master_list'] = opts['master']
opts['master_active_list'] = opts['master']
# if failed=True, the minion was previously connected
# we're probably called from the minions main-event-loop
# because a master connection loss was detected. remove
# the possibly failed master from the list of masters.
elif failed:
log.info('Removing possibly failed master {0} from list of'
' masters'.format(opts['master']))
# create new list of master with the possibly failed one removed
opts['master'] = [x for x in opts['master_list'] if opts['master'] != x]
else:
msg = ('master_type set to \'failover\' but \'master\' '
'is not of type list but of type '
@ -828,44 +827,56 @@ class Minion(MinionBase):
log.error(msg)
sys.exit(salt.defaults.exitcodes.EX_GENERIC)
# if we have a list of masters, loop through them and be
# happy with the first one that allows us to connect
if isinstance(opts['master'], list):
conn = False
# shuffle the masters and then loop through them
local_masters = copy.copy(opts['master'])
# if the master_active_list is empty, reset with the original list
# if we have a list of masters, loop through them and be
# happy with the first one that allows us to connect
# If there is no 'master_list' then this is a single-master scenario, so fall through this
# conditional without warning.
if not opts.get('master_active_list'):
if opts.get('master_list'):
log.info('List of active masters is empty, try all masters from the top.')
opts['master_active_list'] = opts['master_list']
for master in local_masters:
opts['master'] = master
opts.update(resolve_dns(opts))
super(Minion, self).__init__(opts)
if isinstance(opts['master_active_list'], list):
conn = False
# shuffle the masters and then loop through them
local_masters = opts['master_active_list']
# on first run, update self.opts with the whole master list
# to enable a minion to re-use old masters if they get fixed
if 'master_list' not in self.opts:
self.opts['master_list'] = local_masters
for master in local_masters:
opts['master'] = master
opts.update(resolve_dns(opts))
super(Minion, self).__init__(opts)
try:
if self.authenticate(timeout, safe) != 'full':
conn = True
break
except SaltClientError:
msg = ('Master {0} could not be reached, trying '
'next master (if any)'.format(opts['master']))
log.info(msg)
continue
# on first run, update self.opts with the whole master list
# to enable a minion to re-use old masters if they get fixed
# if 'master_list' not in self.opts:
# self.opts['master_list'] = local_masters
# cro: I don't think we need this because we are getting master_list
# from above.
if not conn:
self.connected = False
msg = ('No master could be reached or all masters denied '
'the minions connection attempt.')
log.error(msg)
else:
self.connected = True
return opts['master']
try:
if self.authenticate(timeout, safe) != 'full':
conn = True
break
except SaltClientError:
msg = ('Master {0} could not be reached, trying '
'next master (if any)'.format(opts['master']))
log.info(msg)
continue
# single master sign in
if not conn:
self.connected = False
msg = ('No master could be reached or all masters denied '
'the minions connection attempt.')
log.error(msg)
else:
self.connected = True
return opts['master']
# single master sign in or syndic
else:
if opts['__role'] == 'syndic':
log.info('Syndic setting master_syndic to \'{0}\''.format(opts['master']))
opts.update(resolve_dns(opts))
super(Minion, self).__init__(opts)
if self.authenticate(timeout, safe) == 'full':
@ -1694,9 +1705,10 @@ class Minion(MinionBase):
self._fire_master(data['data'], data['tag'], data['events'], data['pretag'])
elif package.startswith('__master_disconnected'):
tag, data = salt.utils.event.MinionEvent.unpack(package)
# if the master disconnect event is for a different master, raise an exception
# if the master disconnect event is for a different master, ignore
if data['master'] != self.opts['master']:
raise Exception()
return self.opts['master']
# raise Exception()
if self.connected:
# we are not connected anymore
self.connected = False