mirror of
https://github.com/saltstack/salt.git
synced 2025-04-17 10:10:20 +00:00
Merge pull request #36126 from edlane/develop
Add keepalives to salt-master created connections for better presence
This commit is contained in:
commit
8f609a5799
2 changed files with 66 additions and 26 deletions
32
conf/master
32
conf/master
|
@ -1024,3 +1024,35 @@
|
|||
#event_match_type: startswith
|
||||
# Save runner returns to the job cache
|
||||
#runner_returns: True
|
||||
|
||||
###### Keepalive settings ######
|
||||
############################################
|
||||
# Warning: Failure to set TCP keepalives on the salt-master can result in
|
||||
# not detecting the loss of a minion when the connection is lost or when
|
||||
# it's host has been terminated without first closing the socket.
|
||||
# Salt's Presence System depends on this connection status to know if a minion
|
||||
# is "present".
|
||||
# ZeroMQ now includes support for configuring SO_KEEPALIVE if supported by
|
||||
# the OS. If connections between the minion and the master pass through
|
||||
# a state tracking device such as a firewall or VPN gateway, there is
|
||||
# the risk that it could tear down the connection the master and minion
|
||||
# without informing either party that their connection has been taken away.
|
||||
# Enabling TCP Keepalives prevents this from happening.
|
||||
|
||||
# Overall state of TCP Keepalives, enable (1 or True), disable (0 or False)
|
||||
# or leave to the OS defaults (-1), on Linux, typically disabled. Default True, enabled.
|
||||
#tcp_keepalive: True
|
||||
|
||||
# How long before the first keepalive should be sent in seconds. Default 300
|
||||
# to send the first keepalive after 5 minutes, OS default (-1) is typically 7200 seconds
|
||||
# on Linux see /proc/sys/net/ipv4/tcp_keepalive_time.
|
||||
#tcp_keepalive_idle: 300
|
||||
|
||||
# How many lost probes are needed to consider the connection lost. Default -1
|
||||
# to use OS defaults, typically 9 on Linux, see /proc/sys/net/ipv4/tcp_keepalive_probes.
|
||||
#tcp_keepalive_cnt: -1
|
||||
|
||||
# How often, in seconds, to send keepalives after the first one. Default -1 to
|
||||
# use OS defaults, typically 75 seconds on Linux, see
|
||||
# /proc/sys/net/ipv4/tcp_keepalive_intvl.
|
||||
#tcp_keepalive_intvl: -1
|
||||
|
|
|
@ -649,6 +649,38 @@ class ZeroMQReqServerChannel(salt.transport.mixins.auth.AESReqServerMixin, salt.
|
|||
sys.exit(salt.defaults.exitcodes.EX_OK)
|
||||
|
||||
|
||||
def _set_tcp_keepalive(zmq_socket, opts):
|
||||
'''
|
||||
Ensure that TCP keepalives are set as specified in "opts".
|
||||
|
||||
Warning: Failure to set TCP keepalives on the salt-master can result in
|
||||
not detecting the loss of a minion when the connection is lost or when
|
||||
it's host has been terminated without first closing the socket.
|
||||
Salt's Presence System depends on this connection status to know if a minion
|
||||
is "present".
|
||||
|
||||
Warning: Failure to set TCP keepalives on minions can result in frequent or
|
||||
unexpected disconnects!
|
||||
'''
|
||||
if hasattr(zmq, 'TCP_KEEPALIVE') and opts:
|
||||
if 'tcp_keepalive' in opts:
|
||||
zmq_socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE, opts['tcp_keepalive']
|
||||
)
|
||||
if 'tcp_keepalive_idle' in opts:
|
||||
zmq_socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_IDLE, opts['tcp_keepalive_idle']
|
||||
)
|
||||
if 'tcp_keepalive_cnt' in opts:
|
||||
zmq_socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_CNT, opts['tcp_keepalive_cnt']
|
||||
)
|
||||
if 'tcp_keepalive_intvl' in opts:
|
||||
zmq_socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_INTVL, opts['tcp_keepalive_intvl']
|
||||
)
|
||||
|
||||
|
||||
class ZeroMQPubServerChannel(salt.transport.server.PubServerChannel):
|
||||
'''
|
||||
Encapsulate synchronous operations for a publisher channel
|
||||
|
@ -670,6 +702,7 @@ class ZeroMQPubServerChannel(salt.transport.server.PubServerChannel):
|
|||
context = zmq.Context(1)
|
||||
# Prepare minion publish socket
|
||||
pub_sock = context.socket(zmq.PUB)
|
||||
_set_tcp_keepalive(pub_sock, self.opts)
|
||||
# if 2.1 >= zmq < 3.0, we only have one HWM setting
|
||||
try:
|
||||
pub_sock.setsockopt(zmq.HWM, self.opts.get('pub_hwm', 1000))
|
||||
|
@ -882,7 +915,7 @@ class AsyncReqMessageClient(object):
|
|||
zmq.RECONNECT_IVL_MAX, 5000
|
||||
)
|
||||
|
||||
self._set_tcp_keepalive()
|
||||
_set_tcp_keepalive(self.socket, self.opts)
|
||||
if self.addr.startswith('tcp://['):
|
||||
# Hint PF type if bracket enclosed IPv6 address
|
||||
if hasattr(zmq, 'IPV6'):
|
||||
|
@ -893,31 +926,6 @@ class AsyncReqMessageClient(object):
|
|||
self.socket.connect(self.addr)
|
||||
self.stream = zmq.eventloop.zmqstream.ZMQStream(self.socket, io_loop=self.io_loop)
|
||||
|
||||
def _set_tcp_keepalive(self):
|
||||
'''
|
||||
Ensure that TCP keepalives are set for the ReqServer.
|
||||
|
||||
Warning: Failure to set TCP keepalives can result in frequent or unexpected
|
||||
disconnects!
|
||||
'''
|
||||
if hasattr(zmq, 'TCP_KEEPALIVE') and self.opts:
|
||||
if 'tcp_keepalive' in self.opts:
|
||||
self.socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE, self.opts['tcp_keepalive']
|
||||
)
|
||||
if 'tcp_keepalive_idle' in self.opts:
|
||||
self.socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_IDLE, self.opts['tcp_keepalive_idle']
|
||||
)
|
||||
if 'tcp_keepalive_cnt' in self.opts:
|
||||
self.socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_CNT, self.opts['tcp_keepalive_cnt']
|
||||
)
|
||||
if 'tcp_keepalive_intvl' in self.opts:
|
||||
self.socket.setsockopt(
|
||||
zmq.TCP_KEEPALIVE_INTVL, self.opts['tcp_keepalive_intvl']
|
||||
)
|
||||
|
||||
@tornado.gen.coroutine
|
||||
def _internal_send_recv(self):
|
||||
while len(self.send_queue) > 0:
|
||||
|
|
Loading…
Add table
Reference in a new issue