mirror of
https://github.com/saltstack/salt.git
synced 2025-04-16 01:30:20 +00:00
Fix for GitFS failure to unlock lock file, and resource cleanup for process SIGTERM, tests to be done
This commit is contained in:
parent
1290f93d06
commit
d58a6897e6
3 changed files with 89 additions and 12 deletions
1
changelog/65816.fixed.md
Normal file
1
changelog/65816.fixed.md
Normal file
|
@ -0,0 +1 @@
|
|||
Fix for GitFS failure to unlock lock file, and resource cleanup for process SIGTERM
|
|
@ -247,9 +247,11 @@ class GitProvider:
|
|||
|
||||
def _val_cb(x, y):
|
||||
return str(y)
|
||||
|
||||
# DGM try getting machine_identifier
|
||||
# get machine_identifier
|
||||
self.mach_id = salt.utils.platform.get_machine_identifier()
|
||||
log.debug(f"DGM getting machine_id for lock file, machine_id '{self.mach_id}'")
|
||||
log.debug(f"machine_id for lock file, machine_id '{self.mach_id}'")
|
||||
|
||||
self.global_saltenv = salt.utils.data.repack_dictlist(
|
||||
self.opts.get(f"{self.role}_saltenv", []),
|
||||
|
@ -754,11 +756,8 @@ class GitProvider:
|
|||
except OSError as exc:
|
||||
if exc.errno == errno.ENOENT:
|
||||
# No lock file present
|
||||
msg = "Attempt to remove lock {} for file ({}) which was not found to exist : {} ".format(
|
||||
self.url, lock_file, exc
|
||||
)
|
||||
msg = f"Attempt to remove lock {self.url} for file ({lock_file}) which was not found to exist, exception : {exc} "
|
||||
log.debug(msg)
|
||||
# DGM pass
|
||||
|
||||
elif exc.errno == errno.EISDIR:
|
||||
# Somehow this path is a directory. Should never happen
|
||||
|
@ -1101,20 +1100,21 @@ class GitProvider:
|
|||
if poll_interval > timeout:
|
||||
poll_interval = timeout
|
||||
|
||||
lock_set = False
|
||||
lock_set1 = False
|
||||
lock_set2 = False
|
||||
try:
|
||||
time_start = time.time()
|
||||
while True:
|
||||
try:
|
||||
self._lock(lock_type=lock_type, failhard=True)
|
||||
lock_set = True
|
||||
lock_set1 = True
|
||||
# docs state need to yield a single value, lock_set will do
|
||||
yield lock_set
|
||||
yield lock_set1
|
||||
|
||||
# Break out of his loop once we've yielded the lock, to
|
||||
# avoid continued attempts to iterate and establish lock
|
||||
# just ensuring lock_set is true (belts and braces)
|
||||
lock_set = True
|
||||
lock_set2 = True
|
||||
break
|
||||
|
||||
except (OSError, GitLockError) as exc:
|
||||
|
@ -1132,7 +1132,9 @@ class GitProvider:
|
|||
time.sleep(poll_interval)
|
||||
continue
|
||||
finally:
|
||||
if lock_set:
|
||||
if lock_set1 or lock_set2:
|
||||
msg = f"Attempting to remove '{lock_type}' lock for '{self.role}' remote '{self.id}' due to lock_set1 '{lock_set1}' or lock_set2 '{lock_set2}'"
|
||||
log.debug(msg)
|
||||
self.clear_lock(lock_type=lock_type)
|
||||
|
||||
def init_remote(self):
|
||||
|
|
|
@ -14,12 +14,14 @@ import multiprocessing
|
|||
import multiprocessing.util
|
||||
import os
|
||||
import queue
|
||||
import shutil
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import salt._logging
|
||||
import salt.defaults.exitcodes
|
||||
|
@ -1071,7 +1073,9 @@ class SignalHandlingProcess(Process):
|
|||
log.debug(msg)
|
||||
|
||||
mach_id = salt.utils.platform.get_machine_identifier()
|
||||
log.debug(f"DGM exiting for machine identifer '{mach_id}'")
|
||||
log.debug(
|
||||
f"exiting for process id '{os.getpid()}' and machine identifer '{mach_id}'"
|
||||
)
|
||||
|
||||
if HAS_PSUTIL:
|
||||
try:
|
||||
|
@ -1090,8 +1094,78 @@ class SignalHandlingProcess(Process):
|
|||
os.getpid(),
|
||||
)
|
||||
|
||||
# DGM need to go through and clean up any resources left around like lock files
|
||||
# need to go through and clean up any resources left around like lock files if using gitfs
|
||||
# example lockfile /var/cache/salt/master/gitfs/work/NlJQs6Pss_07AugikCrmqfmqEFrfPbCDBqGLBiCd3oU=/_/update.lk
|
||||
gitfs_active = self.opts.get("gitfs_remotes", None)
|
||||
if gitfs_active:
|
||||
# check for gitfs file locks to ensure no resource leaks
|
||||
# last chance to clean up any missed unlock droppings
|
||||
cache_dir = Path("/var/cache/salt/master/gitfs/work")
|
||||
if cache_dir.exists and cache_dir.is_dir():
|
||||
file_list = list(cache_dir.glob("**/*.lk"))
|
||||
file_del_list = []
|
||||
|
||||
try:
|
||||
for file_name in file_list:
|
||||
with salt.utils.files.fopen(file_name, "r") as fd_:
|
||||
try:
|
||||
file_pid = int(
|
||||
salt.utils.stringutils.to_unicode(
|
||||
fd_.readline()
|
||||
).rstrip()
|
||||
)
|
||||
except ValueError:
|
||||
# Lock file is empty, set pid to 0 so it evaluates as False.
|
||||
file_pid = 0
|
||||
try:
|
||||
file_mach_id = int(
|
||||
salt.utils.stringutils.to_unicode(
|
||||
fd_.readline()
|
||||
).rstrip()
|
||||
)
|
||||
except ValueError:
|
||||
# Lock file is empty, set mach_id to 0 so it evaluates as False.
|
||||
file_mach_id = 0
|
||||
if cur_pid == file_pid:
|
||||
if mach_id != file_mach_id:
|
||||
if not file_mach_id:
|
||||
msg = f"gitfs lock file for pid '{file_pid}' does not contain a machine id, deleting lock file which may affect if using multi-master with shared gitfs cache, the lock may have been obtained by another master recommend updating Salt version on other masters to a version which insert machine identification in lock a file."
|
||||
log.debug(msg)
|
||||
file_del_list.append(
|
||||
(file_name, file_pid, file_mach_id)
|
||||
)
|
||||
else:
|
||||
file_del_list.append(
|
||||
(file_name, file_pid, file_mach_id)
|
||||
)
|
||||
|
||||
except FileNotFoundError:
|
||||
log.debug("gitfs lock file: %s not found", file_name)
|
||||
|
||||
for (file_name, file_pid, file_mach_id) in file_del_list:
|
||||
try:
|
||||
os.remove(file_name)
|
||||
except OSError as exc:
|
||||
if exc.errno == errno.ENOENT:
|
||||
# No lock file present
|
||||
msg = f"SIGTERM clean up of resources attempted to remove lock file {file_name}, pid '{file_pid}', machine identifier '{mach_id}' but it did not exist, exception : {exc} "
|
||||
log.debug(msg)
|
||||
|
||||
elif exc.errno == errno.EISDIR:
|
||||
# Somehow this path is a directory. Should never happen
|
||||
# unless some wiseguy manually creates a directory at this
|
||||
# path, but just in case, handle it.
|
||||
try:
|
||||
shutil.rmtree(file_name)
|
||||
except OSError as exc:
|
||||
msg = f"SIGTERM clean up of resources, lock file '{file_name}', pid '{file_pid}', machine identifier '{file_mach_id}' was a directory, removed directory, exception : '{exc}'"
|
||||
log.debug(msg)
|
||||
else:
|
||||
msg = f"SIGTERM clean up of resources, unable to remove lock file '{file_name}', pid '{file_pid}', machine identifier '{file_mach_id}', exception : '{exc}'"
|
||||
log.debug(msg)
|
||||
else:
|
||||
msg = f"SIGTERM clean up of resources, removed lock file '{file_name}', pid '{file_pid}', machine identifier '{file_mach_id}'"
|
||||
log.debug(msg)
|
||||
|
||||
except psutil.NoSuchProcess:
|
||||
log.warning(
|
||||
|
|
Loading…
Add table
Reference in a new issue