mirror of
https://github.com/saltstack/salt.git
synced 2025-04-17 10:10:20 +00:00
Minions check dns when re-connecting to a master
Check for a chainging dns record anytime a minion gets disconnected from it's master. See github issue #63654 #61482.
This commit is contained in:
parent
b1754eeb63
commit
d2c59f45b6
1 changed files with 75 additions and 105 deletions
180
salt/minion.py
180
salt/minion.py
|
@ -2826,9 +2826,60 @@ class Minion(MinionBase):
|
||||||
# we are not connected anymore
|
# we are not connected anymore
|
||||||
self.connected = False
|
self.connected = False
|
||||||
log.info("Connection to master %s lost", self.opts["master"])
|
log.info("Connection to master %s lost", self.opts["master"])
|
||||||
|
if self.opts["transport"] != "tcp":
|
||||||
|
self.schedule.delete_job(name=master_event(type="alive"))
|
||||||
|
|
||||||
if self.opts["master_type"] != "failover":
|
log.info("Trying to tune in to next master from master-list")
|
||||||
# modify the scheduled job to fire on reconnect
|
|
||||||
|
if hasattr(self, "pub_channel"):
|
||||||
|
self.pub_channel.on_recv(None)
|
||||||
|
if hasattr(self.pub_channel, "auth"):
|
||||||
|
self.pub_channel.auth.invalidate()
|
||||||
|
if hasattr(self.pub_channel, "close"):
|
||||||
|
self.pub_channel.close()
|
||||||
|
if hasattr(self, "req_channel") and self.req_channel:
|
||||||
|
self.req_channel.close()
|
||||||
|
self.req_channel = None
|
||||||
|
|
||||||
|
# if eval_master finds a new master for us, self.connected
|
||||||
|
# will be True again on successful master authentication
|
||||||
|
try:
|
||||||
|
master, self.pub_channel = yield self.eval_master(
|
||||||
|
opts=self.opts,
|
||||||
|
failed=True,
|
||||||
|
failback=tag.startswith(master_event(type="failback")),
|
||||||
|
)
|
||||||
|
except SaltClientError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if self.connected:
|
||||||
|
self.opts["master"] = master
|
||||||
|
|
||||||
|
# re-init the subsystems to work with the new master
|
||||||
|
log.info(
|
||||||
|
"Re-initialising subsystems for new master %s",
|
||||||
|
self.opts["master"],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.req_channel = salt.channel.client.AsyncReqChannel.factory(
|
||||||
|
self.opts, io_loop=self.io_loop
|
||||||
|
)
|
||||||
|
|
||||||
|
# put the current schedule into the new loaders
|
||||||
|
self.opts["schedule"] = self.schedule.option("schedule")
|
||||||
|
(
|
||||||
|
self.functions,
|
||||||
|
self.returners,
|
||||||
|
self.function_errors,
|
||||||
|
self.executors,
|
||||||
|
) = self._load_modules()
|
||||||
|
# make the schedule to use the new 'functions' loader
|
||||||
|
self.schedule.functions = self.functions
|
||||||
|
self.pub_channel.on_recv(self._handle_payload)
|
||||||
|
self._fire_master_minion_start()
|
||||||
|
log.info("Minion is ready to receive requests!")
|
||||||
|
|
||||||
|
# update scheduled job to run with the new master addr
|
||||||
if self.opts["transport"] != "tcp":
|
if self.opts["transport"] != "tcp":
|
||||||
schedule = {
|
schedule = {
|
||||||
"function": "status.master",
|
"function": "status.master",
|
||||||
|
@ -2838,116 +2889,35 @@ class Minion(MinionBase):
|
||||||
"return_job": False,
|
"return_job": False,
|
||||||
"kwargs": {
|
"kwargs": {
|
||||||
"master": self.opts["master"],
|
"master": self.opts["master"],
|
||||||
"connected": False,
|
"connected": True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
self.schedule.modify_job(
|
self.schedule.modify_job(
|
||||||
name=master_event(type="alive", master=self.opts["master"]),
|
name=master_event(type="alive", master=self.opts["master"]),
|
||||||
schedule=schedule,
|
schedule=schedule,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.opts["master_failback"] and "master_list" in self.opts:
|
||||||
|
if self.opts["master"] != self.opts["master_list"][0]:
|
||||||
|
schedule = {
|
||||||
|
"function": "status.ping_master",
|
||||||
|
"seconds": self.opts["master_failback_interval"],
|
||||||
|
"jid_include": True,
|
||||||
|
"maxrunning": 1,
|
||||||
|
"return_job": False,
|
||||||
|
"kwargs": {"master": self.opts["master_list"][0]},
|
||||||
|
}
|
||||||
|
self.schedule.modify_job(
|
||||||
|
name=master_event(type="failback"),
|
||||||
|
schedule=schedule,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.schedule.delete_job(
|
||||||
|
name=master_event(type="failback"), persist=True
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# delete the scheduled job to don't interfere with the failover process
|
self.restart = True
|
||||||
if self.opts["transport"] != "tcp":
|
self.io_loop.stop()
|
||||||
self.schedule.delete_job(name=master_event(type="alive"))
|
|
||||||
|
|
||||||
log.info("Trying to tune in to next master from master-list")
|
|
||||||
|
|
||||||
if hasattr(self, "pub_channel"):
|
|
||||||
self.pub_channel.on_recv(None)
|
|
||||||
if hasattr(self.pub_channel, "auth"):
|
|
||||||
self.pub_channel.auth.invalidate()
|
|
||||||
if hasattr(self.pub_channel, "close"):
|
|
||||||
self.pub_channel.close()
|
|
||||||
if hasattr(self, "req_channel") and self.req_channel:
|
|
||||||
self.req_channel.close()
|
|
||||||
self.req_channel = None
|
|
||||||
|
|
||||||
# if eval_master finds a new master for us, self.connected
|
|
||||||
# will be True again on successful master authentication
|
|
||||||
try:
|
|
||||||
master, self.pub_channel = yield self.eval_master(
|
|
||||||
opts=self.opts,
|
|
||||||
failed=True,
|
|
||||||
failback=tag.startswith(master_event(type="failback")),
|
|
||||||
)
|
|
||||||
except SaltClientError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if self.connected:
|
|
||||||
self.opts["master"] = master
|
|
||||||
|
|
||||||
# re-init the subsystems to work with the new master
|
|
||||||
log.info(
|
|
||||||
"Re-initialising subsystems for new master %s",
|
|
||||||
self.opts["master"],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.req_channel = salt.channel.client.AsyncReqChannel.factory(
|
|
||||||
self.opts, io_loop=self.io_loop
|
|
||||||
)
|
|
||||||
|
|
||||||
# put the current schedule into the new loaders
|
|
||||||
self.opts["schedule"] = self.schedule.option("schedule")
|
|
||||||
(
|
|
||||||
self.functions,
|
|
||||||
self.returners,
|
|
||||||
self.function_errors,
|
|
||||||
self.executors,
|
|
||||||
) = self._load_modules()
|
|
||||||
# make the schedule to use the new 'functions' loader
|
|
||||||
self.schedule.functions = self.functions
|
|
||||||
self.pub_channel.on_recv(self._handle_payload)
|
|
||||||
self._fire_master_minion_start()
|
|
||||||
log.info("Minion is ready to receive requests!")
|
|
||||||
|
|
||||||
# update scheduled job to run with the new master addr
|
|
||||||
if self.opts["transport"] != "tcp":
|
|
||||||
schedule = {
|
|
||||||
"function": "status.master",
|
|
||||||
"seconds": self.opts["master_alive_interval"],
|
|
||||||
"jid_include": True,
|
|
||||||
"maxrunning": 1,
|
|
||||||
"return_job": False,
|
|
||||||
"kwargs": {
|
|
||||||
"master": self.opts["master"],
|
|
||||||
"connected": True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
self.schedule.modify_job(
|
|
||||||
name=master_event(
|
|
||||||
type="alive", master=self.opts["master"]
|
|
||||||
),
|
|
||||||
schedule=schedule,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.opts["master_failback"]
|
|
||||||
and "master_list" in self.opts
|
|
||||||
):
|
|
||||||
if self.opts["master"] != self.opts["master_list"][0]:
|
|
||||||
schedule = {
|
|
||||||
"function": "status.ping_master",
|
|
||||||
"seconds": self.opts[
|
|
||||||
"master_failback_interval"
|
|
||||||
],
|
|
||||||
"jid_include": True,
|
|
||||||
"maxrunning": 1,
|
|
||||||
"return_job": False,
|
|
||||||
"kwargs": {
|
|
||||||
"master": self.opts["master_list"][0]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
self.schedule.modify_job(
|
|
||||||
name=master_event(type="failback"),
|
|
||||||
schedule=schedule,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self.schedule.delete_job(
|
|
||||||
name=master_event(type="failback"), persist=True
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self.restart = True
|
|
||||||
self.io_loop.stop()
|
|
||||||
|
|
||||||
elif tag.startswith(master_event(type="connected")):
|
elif tag.startswith(master_event(type="connected")):
|
||||||
# handle this event only once. otherwise it will pollute the log
|
# handle this event only once. otherwise it will pollute the log
|
||||||
|
|
Loading…
Add table
Reference in a new issue