mirror of
https://github.com/saltstack/salt.git
synced 2025-04-17 10:10:20 +00:00
Merge pull request #24902 from cro/fixfo2
Fix minion failover, document same
This commit is contained in:
commit
2dd24ece71
7 changed files with 167 additions and 65 deletions
12
conf/minion
12
conf/minion
|
@ -22,6 +22,18 @@
|
|||
# If only one master is listed, this setting is ignored and a warning will be logged.
|
||||
#random_master: False
|
||||
|
||||
# Minions can connect to multiple masters simultaneously (all masters
|
||||
# are "hot"), or can be configured to failover if a master becomes
|
||||
# unavailable. Multiple hot masters are configured by setting this
|
||||
# value to "standard". Failover masters can be requested by setting
|
||||
# to "failover". MAKE SURE TO SET master_alive_interval if you are
|
||||
# using failover.
|
||||
# master_type: standard
|
||||
|
||||
# Poll interval in seconds for checking if the master is still there. Only
|
||||
# respected if master_type above is "failover".
|
||||
# master_alive_interval: 30
|
||||
|
||||
# Set whether the minion should connect to the master via IPv6:
|
||||
#ipv6: False
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ Salt Table of Contents
|
|||
topics/jobs/index
|
||||
topics/event/index
|
||||
topics/topology/index
|
||||
topics/highavailability/index
|
||||
topics/windows/index
|
||||
topics/cloud/index
|
||||
topics/netapi/index
|
||||
|
|
|
@ -21,6 +21,7 @@ Salt Table of Contents
|
|||
topics/event/index
|
||||
topics/beacons/index
|
||||
topics/ext_processes/index
|
||||
topics/highavailability/index
|
||||
topics/topology/index
|
||||
topics/transports/raet/index
|
||||
topics/windows/index
|
||||
|
|
|
@ -82,10 +82,24 @@ The option can can also be set to a list of masters, enabling
|
|||
|
||||
.. versionadded:: 2014.7.0
|
||||
|
||||
Default: ``str``
|
||||
Default: ``standard``
|
||||
|
||||
The type of the :conf_minion:`master` variable. Can be either ``func`` or
|
||||
``failover``.
|
||||
The type of the :conf_minion:`master` variable. Can be ``standard``, ``failover`` or
|
||||
``func``.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
master_type: failover
|
||||
|
||||
If this option is set to ``failover``, :conf_minion:`master` must be a list of
|
||||
master addresses. The minion will then try each master in the order specified
|
||||
in the list until it successfully connects. :conf_minion:`master_alive_interval`
|
||||
must also be set, this determines how often the minion will verify the presence
|
||||
of the master.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
master_type: func
|
||||
|
||||
If the master needs to be dynamically assigned by executing a function instead
|
||||
of reading in the static master value, set this to ``func``. This can be used
|
||||
|
@ -93,19 +107,16 @@ to manage the minion's master setting from an execution module. By simply
|
|||
changing the algorithm in the module to return a new master ip/fqdn, restart
|
||||
the minion and it will connect to the new master.
|
||||
|
||||
``master_alive_interval``
|
||||
-------------------------
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
master_type: func
|
||||
master_alive_interval: 30
|
||||
|
||||
If this option is set to ``failover``, :conf_minion:`master` must be a list of
|
||||
master addresses. The minion will then try each master in the order specified
|
||||
in the list until it successfully connects.
|
||||
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
master_type: failover
|
||||
Configures how often, in seconds, the minion will verify that the current
|
||||
master is alive and responding. The minion will try to establish a connection
|
||||
to the next master in the list if it finds the existing one is dead.
|
||||
|
||||
``master_shuffle``
|
||||
------------------
|
||||
|
|
65
doc/topics/highavailability/index.rst
Normal file
65
doc/topics/highavailability/index.rst
Normal file
|
@ -0,0 +1,65 @@
|
|||
.. _highavailability:
|
||||
|
||||
==================================
|
||||
High Availability Features in Salt
|
||||
==================================
|
||||
|
||||
Salt supports several features for high availability and fault tolerance.
|
||||
Brief documentation for these features is listed alongside their configuration
|
||||
parameters in :ref:`Configuration file examples <configuration/examples>`.
|
||||
|
||||
Multimaster
|
||||
===========
|
||||
|
||||
Salt minions can connect to multiple masters at one time by configuring the
|
||||
`master` configuration paramter as a YAML list of all the available masters. By
|
||||
default, all masters are "hot", meaning that any master can direct commands to
|
||||
the Salt infrastructure.
|
||||
|
||||
In a multimaster configuration, each master must have the same cryptographic
|
||||
keys, and minion keys must be accepted on all masters separately. The contents
|
||||
of file_roots and pillar_roots need to be kept in sync with processes external
|
||||
to Salt as well
|
||||
|
||||
A tutorial on setting up multimaster with "hot" masters is here:
|
||||
|
||||
:doc:`Multimaster Tutorial </topics/tutorials/multimaster>`
|
||||
|
||||
Multimaster with Failover
|
||||
=========================
|
||||
|
||||
Changing the ``master_type`` parameter from ``standard`` to ``failover`` will
|
||||
cause minions to connect to the first responding master in the list of masters.
|
||||
Every ``master_alive_check`` seconds the minions will check to make sure
|
||||
the current master is still responding. If the master does not respond,
|
||||
the minion will attempt to connect to the next master in the list. If the
|
||||
minion runs out of masters, the list will be recycled in case dead masters
|
||||
have been restored. Note that ``master_alive_check`` must be present in the
|
||||
minion configuration, or else the recurring job to check master status
|
||||
will not get scheduled.
|
||||
|
||||
Failover can be combined with PKI-style encrypted keys, but PKI is NOT
|
||||
REQUIRED to use failover.
|
||||
|
||||
Multimaster with PKI and Failover is discussed in
|
||||
:doc:`this tutorial </topics/tutorials/multimaster_pki>`
|
||||
|
||||
``master_type: failover`` can be combined with ``master_shuffle: True``
|
||||
to spread minion connections across all masters (one master per
|
||||
minion, not each minion connecting to all masters). Adding Salt Syndics
|
||||
into the mix makes it possible to create a load-balanced Salt infrastructure.
|
||||
If a master fails, minions will notice and select another master from the
|
||||
available list.
|
||||
|
||||
Syndic
|
||||
======
|
||||
|
||||
Salt's Syndic feature is a way to create differing infrastructure
|
||||
topologies. It is not strictly an HA feature, but can be treated as such.
|
||||
|
||||
With the syndic, a Salt infrastructure can be partitioned in such a way that
|
||||
certain masters control certain segments of the infrastructure, and "Master
|
||||
of Masters" nodes can control multiple segments underneath them.
|
||||
|
||||
Syndics are covered in depth in :doc:`Salt Syndic </topics/topology/syndic>`.
|
||||
|
|
@ -296,7 +296,7 @@ VALID_OPTS = {
|
|||
DEFAULT_MINION_OPTS = {
|
||||
'interface': '0.0.0.0',
|
||||
'master': 'salt',
|
||||
'master_type': 'str',
|
||||
'master_type': 'standard',
|
||||
'master_port': '4506',
|
||||
'master_finger': '',
|
||||
'master_shuffle': False,
|
||||
|
|
116
salt/minion.py
116
salt/minion.py
|
@ -773,7 +773,7 @@ class Minion(MinionBase):
|
|||
(possibly failed) master will then be removed from the list of masters.
|
||||
'''
|
||||
# check if master_type was altered from its default
|
||||
if opts['master_type'] != 'str':
|
||||
if opts['master_type'] != 'standard' and opts['__role'] != 'syndic':
|
||||
# check for a valid keyword
|
||||
if opts['master_type'] == 'func':
|
||||
# split module and function and try loading the module
|
||||
|
@ -791,31 +791,30 @@ class Minion(MinionBase):
|
|||
sys.exit(salt.defaults.exitcodes.EX_GENERIC)
|
||||
log.info('Evaluated master from module: {0}'.format(master_mod))
|
||||
|
||||
# if failover is set, master has to be of type list
|
||||
# if failover is set, the first time through, opts['master'] is a list.
|
||||
elif opts['master_type'] == 'failover':
|
||||
if isinstance(opts['master'], list):
|
||||
log.info('Got list of available master addresses:'
|
||||
' {0}'.format(opts['master']))
|
||||
if opts['master_shuffle']:
|
||||
shuffle(opts['master'])
|
||||
# if opts['master'] is a str and we have never created opts['master_list']
|
||||
elif isinstance(opts['master'], str) and ('master_list' not in opts):
|
||||
# We have a string, but a list was what was intended. Convert.
|
||||
# See issue 23611 for details
|
||||
opts['master'] = [opts['master']]
|
||||
elif opts['__role'] == 'syndic':
|
||||
log.info('Syndic setting master_syndic to \'{0}\''.format(opts['master']))
|
||||
|
||||
# if failed=True, the minion was previously connected
|
||||
# we're probably called from the minions main-event-loop
|
||||
# because a master connection loss was detected. remove
|
||||
# the possibly failed master from the list of masters.
|
||||
elif failed:
|
||||
if failed:
|
||||
log.info('Removing possibly failed master {0} from list of'
|
||||
' masters'.format(opts['master']))
|
||||
# create new list of master with the possibly failed one removed
|
||||
opts['master'] = [x for x in opts['master_list'] if opts['master'] != x]
|
||||
|
||||
opts['master_active_list'] = [x for x in opts['master_active_list'] if opts['master'] != x]
|
||||
elif isinstance(opts['master'], list):
|
||||
log.info('Got list of available master addresses:'
|
||||
' {0}'.format(opts['master']))
|
||||
opts['master_list'] = opts['master']
|
||||
opts['master_active_list'] = opts['master']
|
||||
if opts.get('master_shuffle'):
|
||||
shuffle(opts['master_list'])
|
||||
elif isinstance(opts['master'], str):
|
||||
# We have a string, but a list was what was intended. Convert.
|
||||
# See issue 23611 for details
|
||||
opts['master'] = [opts['master']]
|
||||
opts['master_list'] = opts['master']
|
||||
opts['master_active_list'] = opts['master']
|
||||
else:
|
||||
msg = ('master_type set to \'failover\' but \'master\' '
|
||||
'is not of type list but of type '
|
||||
|
@ -828,44 +827,56 @@ class Minion(MinionBase):
|
|||
log.error(msg)
|
||||
sys.exit(salt.defaults.exitcodes.EX_GENERIC)
|
||||
|
||||
# if we have a list of masters, loop through them and be
|
||||
# happy with the first one that allows us to connect
|
||||
if isinstance(opts['master'], list):
|
||||
conn = False
|
||||
# shuffle the masters and then loop through them
|
||||
local_masters = copy.copy(opts['master'])
|
||||
# if the master_active_list is empty, reset with the original list
|
||||
# if we have a list of masters, loop through them and be
|
||||
# happy with the first one that allows us to connect
|
||||
# If there is no 'master_list' then this is a single-master scenario, so fall through this
|
||||
# conditional without warning.
|
||||
if not opts.get('master_active_list'):
|
||||
if opts.get('master_list'):
|
||||
log.info('List of active masters is empty, try all masters from the top.')
|
||||
opts['master_active_list'] = opts['master_list']
|
||||
|
||||
for master in local_masters:
|
||||
opts['master'] = master
|
||||
opts.update(resolve_dns(opts))
|
||||
super(Minion, self).__init__(opts)
|
||||
if isinstance(opts['master_active_list'], list):
|
||||
conn = False
|
||||
# shuffle the masters and then loop through them
|
||||
local_masters = opts['master_active_list']
|
||||
|
||||
# on first run, update self.opts with the whole master list
|
||||
# to enable a minion to re-use old masters if they get fixed
|
||||
if 'master_list' not in self.opts:
|
||||
self.opts['master_list'] = local_masters
|
||||
for master in local_masters:
|
||||
opts['master'] = master
|
||||
opts.update(resolve_dns(opts))
|
||||
super(Minion, self).__init__(opts)
|
||||
|
||||
try:
|
||||
if self.authenticate(timeout, safe) != 'full':
|
||||
conn = True
|
||||
break
|
||||
except SaltClientError:
|
||||
msg = ('Master {0} could not be reached, trying '
|
||||
'next master (if any)'.format(opts['master']))
|
||||
log.info(msg)
|
||||
continue
|
||||
# on first run, update self.opts with the whole master list
|
||||
# to enable a minion to re-use old masters if they get fixed
|
||||
# if 'master_list' not in self.opts:
|
||||
# self.opts['master_list'] = local_masters
|
||||
# cro: I don't think we need this because we are getting master_list
|
||||
# from above.
|
||||
|
||||
if not conn:
|
||||
self.connected = False
|
||||
msg = ('No master could be reached or all masters denied '
|
||||
'the minions connection attempt.')
|
||||
log.error(msg)
|
||||
else:
|
||||
self.connected = True
|
||||
return opts['master']
|
||||
try:
|
||||
if self.authenticate(timeout, safe) != 'full':
|
||||
conn = True
|
||||
break
|
||||
except SaltClientError:
|
||||
msg = ('Master {0} could not be reached, trying '
|
||||
'next master (if any)'.format(opts['master']))
|
||||
log.info(msg)
|
||||
continue
|
||||
|
||||
# single master sign in
|
||||
if not conn:
|
||||
self.connected = False
|
||||
msg = ('No master could be reached or all masters denied '
|
||||
'the minions connection attempt.')
|
||||
log.error(msg)
|
||||
else:
|
||||
self.connected = True
|
||||
return opts['master']
|
||||
|
||||
# single master sign in or syndic
|
||||
else:
|
||||
if opts['__role'] == 'syndic':
|
||||
log.info('Syndic setting master_syndic to \'{0}\''.format(opts['master']))
|
||||
opts.update(resolve_dns(opts))
|
||||
super(Minion, self).__init__(opts)
|
||||
if self.authenticate(timeout, safe) == 'full':
|
||||
|
@ -1694,9 +1705,10 @@ class Minion(MinionBase):
|
|||
self._fire_master(data['data'], data['tag'], data['events'], data['pretag'])
|
||||
elif package.startswith('__master_disconnected'):
|
||||
tag, data = salt.utils.event.MinionEvent.unpack(package)
|
||||
# if the master disconnect event is for a different master, raise an exception
|
||||
# if the master disconnect event is for a different master, ignore
|
||||
if data['master'] != self.opts['master']:
|
||||
raise Exception()
|
||||
return self.opts['master']
|
||||
# raise Exception()
|
||||
if self.connected:
|
||||
# we are not connected anymore
|
||||
self.connected = False
|
||||
|
|
Loading…
Add table
Reference in a new issue