Don't unnecessarily download remote sources to cache

(cherry picked from commit 109dd48389)
This commit is contained in:
jeanluc 2024-04-10 17:56:24 +02:00 committed by Pedro Algarvio
parent fe29d3c167
commit e56ad175e8
3 changed files with 61 additions and 3 deletions

1
changelog/66342.fixed.md Normal file
View file

@ -0,0 +1 @@
Made `file.managed` skip download of a remote source if the managed file already exists with the correct hash

View file

@ -2480,8 +2480,12 @@ def managed(
Set to ``False`` to discard the cached copy of the source file once the
state completes. This can be useful for larger files to keep them from
taking up space in minion cache. However, keep in mind that discarding
the source file will result in the state needing to re-download the
source file if the state is run again.
the source file might result in the state needing to re-download the
source file if the state is run again. If the source is not a local or
``salt://`` one, the source hash is known, ``skip_verify`` is not true
and the managed file exists with the correct hash and is not templated,
this is not the case (i.e. remote downloads are avoided if the local hash
matches the expected one).
.. versionadded:: 2017.7.3
@ -3123,6 +3127,59 @@ def managed(
if defaults and not isinstance(defaults, dict):
return _error(ret, "Defaults must be formed as a dict")
# If we're pulling from a remote source untemplated and we have a source hash,
# check early if the local file exists with the correct hash and skip
# managing contents if so. This avoids a lot of overhead.
if (
contents is None
and not template
and source
and not skip_verify
and os.path.exists(name)
and replace
):
try:
# If the source is a list, find the first existing file.
# We're doing this after basic checks to not slow down
# runs where it does not matter.
source, source_hash = __salt__["file.source_list"](
source, source_hash, __env__
)
source_sum = None
if (
source
and source_hash
and urllib.parse.urlparse(source).scheme
not in (
"salt",
"file",
)
and not os.path.isabs(source)
):
source_sum = __salt__["file.get_source_sum"](
name,
source,
source_hash,
source_hash_name,
__env__,
verify_ssl=verify_ssl,
source_hash_sig=source_hash_sig,
signed_by_any=signed_by_any,
signed_by_all=signed_by_all,
keyring=keyring,
gnupghome=gnupghome,
)
hsum = __salt__["file.get_hash"](name, source_sum["hash_type"])
except (CommandExecutionError, OSError) as err:
log.error(
"Failed checking existing file's hash against specified source_hash: %s",
err,
exc_info_on_loglevel=logging.DEBUG,
)
else:
if source_sum and source_sum["hsum"] == hsum:
replace = False
if not replace and os.path.exists(name):
ret_perms = {}
# Check and set the permissions if necessary

View file

@ -830,7 +830,7 @@ def test_file_managed_remote_source_does_not_refetch_existing_file_with_correct_
Issue #64373
"""
name = tmp_path / "scene33"
name.write_text(grail_scene33_file.read_text())
name.write_bytes(grail_scene33_file.read_bytes())
ret = file.managed(
str(name),
source="http://127.0.0.1:1337/does/not/exist",