From be97ceb118298780ec403205b8b511e5c4e3667d Mon Sep 17 00:00:00 2001 From: Max Arnold Date: Tue, 6 Feb 2024 09:46:41 -0600 Subject: [PATCH] Add runner proc files --- salt/client/mixins.py | 12 ++++++++++++ salt/master.py | 1 + salt/utils/master.py | 36 +++++++++++++++++++++++++++++++++++- salt/utils/minion.py | 3 +-- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/salt/client/mixins.py b/salt/client/mixins.py index e8e22728a28..c8f46c99e54 100644 --- a/salt/client/mixins.py +++ b/salt/client/mixins.py @@ -378,6 +378,12 @@ class SyncClientMixin(ClientStateMixin): data["fun_args"] = list(args) + ([kwargs] if kwargs else []) func_globals["__jid_event__"].fire_event(data, "new") + sdata = copy.deepcopy(data) + proc_fn = os.path.join(self.opts["cachedir"], "proc", jid) + sdata["pid"] = os.getpid() + with salt.utils.files.fopen(proc_fn, "w+b") as fp_: + fp_.write(salt.payload.dumps(sdata)) + func = self.functions[fun] try: data["return"] = func(*args, **kwargs) @@ -408,6 +414,12 @@ class SyncClientMixin(ClientStateMixin): ) data["success"] = False data["retcode"] = 1 + finally: + # Job has finished or issue found, so let's clean up after ourselves + try: + os.remove(proc_fn) + except OSError as err: + log.debug("Error attempting to remove master job tracker: %s", err) if self.store_job: try: diff --git a/salt/master.py b/salt/master.py index 03e0123bd3d..6d1cb01b7cb 100644 --- a/salt/master.py +++ b/salt/master.py @@ -305,6 +305,7 @@ class Maintenance(salt.utils.process.SignalHandlingProcess): salt.daemons.masterapi.clean_old_jobs(self.opts) salt.daemons.masterapi.clean_expired_tokens(self.opts) salt.daemons.masterapi.clean_pub_auth(self.opts) + salt.utils.master.clean_proc_dir(self.opts) if not last or (now - last_git_pillar_update) >= git_pillar_update_interval: last_git_pillar_update = now self.handle_git_pillar() diff --git a/salt/utils/master.py b/salt/utils/master.py index 6aa661c0607..2c69117f3c2 100644 --- a/salt/utils/master.py +++ b/salt/utils/master.py @@ -32,7 +32,7 @@ log = logging.getLogger(__name__) def get_running_jobs(opts): """ - Return the running jobs on this minion + Return the running jobs on this master """ ret = [] @@ -53,6 +53,40 @@ def get_running_jobs(opts): return ret +def clean_proc_dir(opts): + """ + Clean out old tracked jobs running on the master + Generally, anything tracking a job should remove the job + once the job has finished. However, this will remove any + jobs that for some reason were not properly removed + when finished or errored. + """ + proc_dir = os.path.join(opts["cachedir"], "proc") + for fn_ in os.listdir(proc_dir): + proc_file = os.path.join(proc_dir, fn_) + data = _read_proc_file(proc_file, opts) + if not data: + try: + log.warning( + "Found proc file %s without proper data. Removing from tracked proc files.", + proc_file, + ) + os.remove(proc_file) + except OSError as err: + log.error("Unable to remove proc file: %s.", err) + continue + if not _check_cmdline(data): + try: + log.warning( + "PID %s not owned by salt or no longer running. Removing tracked proc file %s", + data["pid"], + proc_file, + ) + os.remove(proc_file) + except OSError as err: + log.error("Unable to remove proc file: %s.", err) + + def _read_proc_file(path, opts): """ Return a dict of JID metadata, or None diff --git a/salt/utils/minion.py b/salt/utils/minion.py index 39bae1a2302..f4d8829d7b6 100644 --- a/salt/utils/minion.py +++ b/salt/utils/minion.py @@ -136,7 +136,6 @@ def _check_cmdline(data): return False try: with salt.utils.files.fopen(path, "rb") as fp_: - if b"salt" in fp_.read(): - return True + return b"salt" in fp_.read() except OSError: return False