mirror of
https://github.com/saltstack/salt.git
synced 2025-04-17 10:10:20 +00:00
Merge pull request #61356 from cmcmarrow/fix_job_hard_cache_corruption
avoids crash on cache corruption
This commit is contained in:
commit
810945d5be
3 changed files with 36 additions and 6 deletions
1
changelog/60170.fixed
Normal file
1
changelog/60170.fixed
Normal file
|
@ -0,0 +1 @@
|
|||
Fixed cache directory corruption startup error
|
|
@ -392,6 +392,20 @@ def get_jids_filter(count, filter_find_job=True):
|
|||
return ret
|
||||
|
||||
|
||||
def _remove_job_dir(job_path):
|
||||
"""
|
||||
Try to remove job dir. In rare cases NotADirectoryError can raise because node corruption.
|
||||
:param job_path: Path to job
|
||||
"""
|
||||
# Remove job dir
|
||||
try:
|
||||
shutil.rmtree(job_path)
|
||||
except (NotADirectoryError, OSError) as err:
|
||||
log.error("Unable to remove %s: %s", job_path, err)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def clean_old_jobs():
|
||||
"""
|
||||
Clean out the old jobs from the job cache
|
||||
|
@ -423,7 +437,7 @@ def clean_old_jobs():
|
|||
if not os.path.isfile(jid_file) and os.path.exists(f_path):
|
||||
# No jid file means corrupted cache entry, scrub it
|
||||
# by removing the entire f_path directory
|
||||
shutil.rmtree(f_path)
|
||||
_remove_job_dir(f_path)
|
||||
elif os.path.isfile(jid_file):
|
||||
jid_ctime = os.stat(jid_file).st_ctime
|
||||
hours_difference = (time.time() - jid_ctime) / 3600.0
|
||||
|
@ -431,10 +445,7 @@ def clean_old_jobs():
|
|||
t_path
|
||||
):
|
||||
# Remove the entire f_path from the original JID dir
|
||||
try:
|
||||
shutil.rmtree(f_path)
|
||||
except OSError as err:
|
||||
log.error("Unable to remove %s: %s", f_path, err)
|
||||
_remove_job_dir(f_path)
|
||||
|
||||
# Remove empty JID dirs from job cache, if they're old enough.
|
||||
# JID dirs may be empty either from a previous cache-clean with the bug
|
||||
|
@ -447,7 +458,7 @@ def clean_old_jobs():
|
|||
t_path_ctime = os.stat(t_path).st_ctime
|
||||
hours_difference = (time.time() - t_path_ctime) / 3600.0
|
||||
if hours_difference > __opts__["keep_jobs"]:
|
||||
shutil.rmtree(t_path)
|
||||
_remove_job_dir(t_path)
|
||||
|
||||
|
||||
def update_endtime(jid, time):
|
||||
|
|
18
tests/pytests/unit/returners/test_local_cache.py
Normal file
18
tests/pytests/unit/returners/test_local_cache.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import pytest
|
||||
|
||||
from salt.returners.local_cache import _remove_job_dir
|
||||
from tests.support.mock import patch
|
||||
|
||||
|
||||
@pytest.mark.parametrize("e", (NotADirectoryError, OSError))
|
||||
def test_remove_job_dir(e):
|
||||
# Test that _remove_job_dir job will catch error
|
||||
with patch("shutil.rmtree", side_effect=e("Node Corruption!")):
|
||||
assert not _remove_job_dir("cache")
|
||||
|
||||
# Test that _remove_job_dir job will not catch other errors
|
||||
with patch("shutil.rmtree", side_effect=FileExistsError()):
|
||||
try:
|
||||
_remove_job_dir("cache")
|
||||
except FileExistsError:
|
||||
pass
|
Loading…
Add table
Reference in a new issue