Refractor after review

This commit is contained in:
David Murphy 2024-05-16 16:35:38 -06:00 committed by Daniel Wozniak
parent af5a485793
commit 81e39a3042
2 changed files with 229 additions and 104 deletions

View file

@ -13,6 +13,7 @@ import io
import logging import logging
import multiprocessing import multiprocessing
import os import os
import pathlib
import shlex import shlex
import shutil import shutil
import stat import stat
@ -82,6 +83,14 @@ _INVALID_REPO = (
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
HAS_PSUTIL = False
try:
import psutil
HAS_PSUTIL = True
except ImportError:
pass
# pylint: disable=import-error # pylint: disable=import-error
try: try:
if ( if (
@ -516,6 +525,18 @@ class GitProvider:
os.makedirs(self._salt_working_dir) os.makedirs(self._salt_working_dir)
self.fetch_request_check() self.fetch_request_check()
if HAS_PSUTIL:
cur_pid = os.getpid()
process = psutil.Process(cur_pid)
if isinstance(process, salt.utils.process.Process):
cache_dir = self.opts.get("cachedir", None)
gitfs_active = self.opts.get("gitfs_remotes", None)
if cache_dir and gitfs_active:
log.warning(
"DGM class GitProvider registering gitfs_zombie_cleanup"
)
process.register_finalize_method(gitfs_zombie_cleanup, cache_dir)
def get_cache_basehash(self): def get_cache_basehash(self):
return self._cache_basehash return self._cache_basehash
@ -3631,3 +3652,104 @@ class WinRepo(GitBase):
cachedir = self.do_checkout(repo, fetch_on_fail=fetch_on_fail) cachedir = self.do_checkout(repo, fetch_on_fail=fetch_on_fail)
if cachedir is not None: if cachedir is not None:
self.winrepo_dirs[repo.id] = cachedir self.winrepo_dirs[repo.id] = cachedir
## DGM wip code
def gitfs_zombie_cleanup(cache_dir):
"""
Clean up zombie processes that used gitfs
Initial wip
"""
log.warning("DGM class GitProvider gitfs_zombie_cleanup entry")
cur_pid = os.getpid()
mach_id = _get_machine_identifier().get("machine_id", "no_machine_id_available")
log.debug("exiting for process id %s and machine identifer %s", cur_pid, mach_id)
# need to clean up any resources left around like lock files if using gitfs
# example: lockfile
# /var/cache/salt/master/gitfs/work/NlJQs6Pss_07AugikCrmqfmqEFrfPbCDBqGLBiCd3oU=/_/update.lk
# check for gitfs file locks to ensure no resource leaks
# last chance to clean up any missed unlock droppings
cache_dir = pathlib.Path(cache_dir + "/gitfs/work")
if cache_dir.exists and cache_dir.is_dir():
file_list = list(cache_dir.glob("**/*.lk"))
file_del_list = []
file_pid = 0
file_mach_id = 0
try:
for file_name in file_list:
with salt.utils.files.fopen(file_name, "r") as fd_:
try:
file_pid = int(
salt.utils.stringutils.to_unicode(fd_.readline()).rstrip()
)
except ValueError:
# Lock file is empty, set pid to 0 so it evaluates as False.
file_pid = 0
try:
file_mach_id = salt.utils.stringutils.to_unicode(
fd_.readline()
).rstrip()
except ValueError:
# Lock file is empty, set mach_id to 0 so it evaluates False.
file_mach_id = 0
if cur_pid == file_pid:
if mach_id != file_mach_id:
if not file_mach_id:
msg = (
f"gitfs lock file for pid '{file_pid}' does not "
"contain a machine id, deleting lock file which may "
"affect if using multi-master with shared gitfs cache, "
"the lock may have been obtained by another master "
"recommend updating Salt version on other masters to a "
"version which insert machine identification in lock a file."
)
log.debug(msg)
file_del_list.append((file_name, file_pid, file_mach_id))
else:
file_del_list.append((file_name, file_pid, file_mach_id))
except FileNotFoundError:
log.debug("gitfs lock file: %s not found", file_name)
for file_name, file_pid, file_mach_id in file_del_list:
try:
os.remove(file_name)
except OSError as exc:
if exc.errno == errno.ENOENT:
# No lock file present
msg = (
"SIGTERM clean up of resources attempted to remove lock "
f"file {file_name}, pid '{file_pid}', machine identifier "
f"'{mach_id}' but it did not exist, exception : {exc} "
)
log.debug(msg)
elif exc.errno == errno.EISDIR:
# Somehow this path is a directory. Should never happen
# unless some wiseguy manually creates a directory at this
# path, but just in case, handle it.
try:
shutil.rmtree(file_name)
except OSError as exc:
msg = (
f"SIGTERM clean up of resources, lock file '{file_name}'"
f", pid '{file_pid}', machine identifier '{file_mach_id}'"
f"was a directory, removed directory, exception : '{exc}'"
)
log.debug(msg)
else:
msg = (
"SIGTERM clean up of resources, unable to remove lock file "
f"'{file_name}', pid '{file_pid}', machine identifier "
f"'{file_mach_id}', exception : '{exc}'"
)
log.debug(msg)
else:
msg = (
"SIGTERM clean up of resources, removed lock file "
f"'{file_name}', pid '{file_pid}', machine identifier "
f"'{file_mach_id}'"
)
log.debug(msg)

View file

@ -13,9 +13,7 @@ import logging
import multiprocessing import multiprocessing
import multiprocessing.util import multiprocessing.util
import os import os
import pathlib
import queue import queue
import shutil
import signal import signal
import socket import socket
import subprocess import subprocess
@ -32,6 +30,10 @@ import salt.utils.versions
from salt.ext.tornado import gen from salt.ext.tornado import gen
from salt.utils.platform import get_machine_identifier as _get_machine_identifier from salt.utils.platform import get_machine_identifier as _get_machine_identifier
## DGM import pathlib
## DGM import shutil
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
HAS_PSUTIL = False HAS_PSUTIL = False
@ -210,7 +212,7 @@ def get_process_info(pid=None):
# pid_exists can have false positives # pid_exists can have false positives
# for example Windows reserves PID 5 in a hack way # for example Windows reserves PID 5 in a hack way
# another reasons is the the process requires kernel permissions # another reasons is the process requires kernel permissions
try: try:
raw_process_info.status() raw_process_info.status()
except psutil.NoSuchProcess: except psutil.NoSuchProcess:
@ -1076,15 +1078,16 @@ class SignalHandlingProcess(Process):
msg += ". Exiting" msg += ". Exiting"
log.debug(msg) log.debug(msg)
mach_id = _get_machine_identifier().get("machine_id", "no_machine_id_available") ## DGM mach_id = _get_machine_identifier().get("machine_id", "no_machine_id_available")
log.debug( ## DGM log.debug(
"exiting for process id %s and machine identifer %s", os.getpid(), mach_id ## DGM "exiting for process id %s and machine identifer %s", os.getpid(), mach_id
) ## DGM )
## DGM
## DGM cur_pid = os.getpid()
cur_pid = os.getpid()
if HAS_PSUTIL: if HAS_PSUTIL:
try: try:
process = psutil.Process(cur_pid) process = psutil.Process(os.getpid())
if hasattr(process, "children"): if hasattr(process, "children"):
for child in process.children(recursive=True): for child in process.children(recursive=True):
try: try:
@ -1098,105 +1101,105 @@ class SignalHandlingProcess(Process):
os.getpid(), os.getpid(),
) )
# need to clean up any resources left around like lock files if using gitfs ## DGM # need to clean up any resources left around like lock files if using gitfs
# example: lockfile i ## DGM # example: lockfile i
# /var/cache/salt/master/gitfs/work/NlJQs6Pss_07AugikCrmqfmqEFrfPbCDBqGLBiCd3oU=/_/update.lk ## DGM # /var/cache/salt/master/gitfs/work/NlJQs6Pss_07AugikCrmqfmqEFrfPbCDBqGLBiCd3oU=/_/update.lk
cache_dir = self.opts.get("cachedir", None) ## DGM cache_dir = self.opts.get("cachedir", None)
gitfs_active = self.opts.get("gitfs_remotes", None) ## DGM gitfs_active = self.opts.get("gitfs_remotes", None)
if cache_dir and gitfs_active: ## DGM if cache_dir and gitfs_active:
# check for gitfs file locks to ensure no resource leaks ## DGM # check for gitfs file locks to ensure no resource leaks
# last chance to clean up any missed unlock droppings ## DGM # last chance to clean up any missed unlock droppings
cache_dir = pathlib.Path(cache_dir + "/gitfs/work") ## DGM cache_dir = pathlib.Path(cache_dir + "/gitfs/work")
if cache_dir.exists and cache_dir.is_dir(): ## DGM if cache_dir.exists and cache_dir.is_dir():
file_list = list(cache_dir.glob("**/*.lk")) ## DGM file_list = list(cache_dir.glob("**/*.lk"))
file_del_list = [] ## DGM file_del_list = []
file_pid = 0 ## DGM file_pid = 0
file_mach_id = 0 ## DGM file_mach_id = 0
try: ## DGM try:
for file_name in file_list: ## DGM for file_name in file_list:
with salt.utils.files.fopen(file_name, "r") as fd_: ## DGM with salt.utils.files.fopen(file_name, "r") as fd_:
try: ## DGM try:
file_pid = int( ## DGM file_pid = int(
salt.utils.stringutils.to_unicode( ## DGM salt.utils.stringutils.to_unicode(
fd_.readline() ## DGM fd_.readline()
).rstrip() ## DGM ).rstrip()
) ## DGM )
except ValueError: ## DGM except ValueError:
# Lock file is empty, set pid to 0 so it evaluates as False. ## DGM # Lock file is empty, set pid to 0 so it evaluates as False.
file_pid = 0 ## DGM file_pid = 0
try: ## DGM try:
file_mach_id = ( ## DGM file_mach_id = (
salt.utils.stringutils.to_unicode( ## DGM salt.utils.stringutils.to_unicode(
fd_.readline() ## DGM fd_.readline()
).rstrip() ## DGM ).rstrip()
) ## DGM )
except ValueError: ## DGM except ValueError:
# Lock file is empty, set mach_id to 0 so it evaluates False. ## DGM # Lock file is empty, set mach_id to 0 so it evaluates False.
file_mach_id = 0 ## DGM file_mach_id = 0
if cur_pid == file_pid: ## DGM if cur_pid == file_pid:
if mach_id != file_mach_id: ## DGM if mach_id != file_mach_id:
if not file_mach_id: ## DGM if not file_mach_id:
msg = ( ## DGM msg = (
f"gitfs lock file for pid '{file_pid}' does not " ## DGM f"gitfs lock file for pid '{file_pid}' does not "
"contain a machine id, deleting lock file which may " ## DGM "contain a machine id, deleting lock file which may "
"affect if using multi-master with shared gitfs cache, " ## DGM "affect if using multi-master with shared gitfs cache, "
"the lock may have been obtained by another master " ## DGM "the lock may have been obtained by another master "
"recommend updating Salt version on other masters to a " ## DGM "recommend updating Salt version on other masters to a "
"version which insert machine identification in lock a file." ## DGM "version which insert machine identification in lock a file."
) ## DGM )
log.debug(msg) ## DGM log.debug(msg)
file_del_list.append( ## DGM file_del_list.append(
(file_name, file_pid, file_mach_id) ## DGM (file_name, file_pid, file_mach_id)
) ## DGM )
else: ## DGM else:
file_del_list.append( ## DGM file_del_list.append(
(file_name, file_pid, file_mach_id) ## DGM (file_name, file_pid, file_mach_id)
) ## DGM )
except FileNotFoundError: ## DGM except FileNotFoundError:
log.debug("gitfs lock file: %s not found", file_name) ## DGM log.debug("gitfs lock file: %s not found", file_name)
for file_name, file_pid, file_mach_id in file_del_list: ## DGM for file_name, file_pid, file_mach_id in file_del_list:
try: ## DGM try:
os.remove(file_name) ## DGM os.remove(file_name)
except OSError as exc: ## DGM except OSError as exc:
if exc.errno == errno.ENOENT: ## DGM if exc.errno == errno.ENOENT:
# No lock file present ## DGM # No lock file present
msg = ( ## DGM msg = (
"SIGTERM clean up of resources attempted to remove lock " ## DGM "SIGTERM clean up of resources attempted to remove lock "
f"file {file_name}, pid '{file_pid}', machine identifier " ## DGM f"file {file_name}, pid '{file_pid}', machine identifier "
f"'{mach_id}' but it did not exist, exception : {exc} " ## DGM f"'{mach_id}' but it did not exist, exception : {exc} "
) ## DGM )
log.debug(msg) ## DGM log.debug(msg)
elif exc.errno == errno.EISDIR: ## DGM elif exc.errno == errno.EISDIR:
# Somehow this path is a directory. Should never happen ## DGM # Somehow this path is a directory. Should never happen
# unless some wiseguy manually creates a directory at this ## DGM # unless some wiseguy manually creates a directory at this
# path, but just in case, handle it. ## DGM # path, but just in case, handle it.
try: ## DGM try:
shutil.rmtree(file_name) ## DGM shutil.rmtree(file_name)
except OSError as exc: ## DGM except OSError as exc:
msg = ( ## DGM msg = (
f"SIGTERM clean up of resources, lock file '{file_name}'" ## DGM f"SIGTERM clean up of resources, lock file '{file_name}'"
f", pid '{file_pid}', machine identifier '{file_mach_id}'" ## DGM f", pid '{file_pid}', machine identifier '{file_mach_id}'"
f"was a directory, removed directory, exception : '{exc}'" ## DGM f"was a directory, removed directory, exception : '{exc}'"
) ## DGM )
log.debug(msg) ## DGM log.debug(msg)
else: ## DGM else:
msg = ( ## DGM msg = (
"SIGTERM clean up of resources, unable to remove lock file " ## DGM "SIGTERM clean up of resources, unable to remove lock file "
f"'{file_name}', pid '{file_pid}', machine identifier " ## DGM f"'{file_name}', pid '{file_pid}', machine identifier "
f"'{file_mach_id}', exception : '{exc}'" ## DGM f"'{file_mach_id}', exception : '{exc}'"
) ## DGM )
log.debug(msg) ## DGM log.debug(msg)
else: ## DGM else:
msg = ( ## DGM msg = (
"SIGTERM clean up of resources, removed lock file " ## DGM "SIGTERM clean up of resources, removed lock file "
f"'{file_name}', pid '{file_pid}', machine identifier " ## DGM f"'{file_name}', pid '{file_pid}', machine identifier "
f"'{file_mach_id}'" ## DGM f"'{file_mach_id}'"
) ## DGM )
log.debug(msg) ## DGM log.debug(msg)
except psutil.NoSuchProcess: except psutil.NoSuchProcess:
log.warning( log.warning(