fix incorrect hash type result in s3fs

The s3fs backend is currently computing the incorrect hash type. The
default hashing algorithm was changed from md5 to sha256. The s3 backend
calls the hashing function without specifying the hash algorithm,
and then hardcodes the result's hash type to md5. After the change to
sha256, this means it was computing a sha256 sum, but saying it was md5.
In turn, it would compute the sha256 sum of the existing target file
and compare it to the cached file's md5, which would obviously always
be different.

This causes downstream effects, such as test mode always reporting a
change, even when there isn't one.

Fixes #65589
This commit is contained in:
Skyler Hawthorne 2023-11-20 19:00:50 -05:00 committed by Pedro Algarvio
parent f25cef6a6c
commit 3703fc5291
3 changed files with 121 additions and 8 deletions

1
changelog/65589.fixed.md Normal file
View file

@ -0,0 +1 @@
Fixes the s3fs backend computing the local cache's files with the wrong hash type

View file

@ -104,6 +104,8 @@ import salt.utils.versions
log = logging.getLogger(__name__)
S3_HASH_TYPE = "md5"
def envs():
"""
@ -189,7 +191,7 @@ def find_file(path, saltenv="base", **kwargs):
def file_hash(load, fnd):
"""
Return an MD5 file hash
Return the hash of an object's cached copy
"""
if "env" in load:
# "env" is not supported; Use "saltenv".
@ -208,8 +210,8 @@ def file_hash(load, fnd):
)
if os.path.isfile(cached_file_path):
ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path)
ret["hash_type"] = "md5"
ret["hash_type"] = S3_HASH_TYPE
ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path, S3_HASH_TYPE)
return ret
@ -716,7 +718,9 @@ def _get_file_from_s3(metadata, saltenv, bucket_name, path, cached_file_path):
if file_etag.find("-") == -1:
file_md5 = file_etag
cached_md5 = salt.utils.hashutils.get_hash(cached_file_path, "md5")
cached_md5 = salt.utils.hashutils.get_hash(
cached_file_path, S3_HASH_TYPE
)
# hashes match we have a cache hit
if cached_md5 == file_md5:

View file

@ -1,19 +1,127 @@
import os
import boto3
import pytest
import yaml
# moto must be imported before boto3
from moto import mock_s3
import salt.fileserver.s3fs as s3fs
import salt.utils.s3
@pytest.fixture
def configure_loader_modules(tmp_path):
def bucket():
with mock_s3():
yield "mybucket"
@pytest.fixture(scope="module")
def aws_creds():
return {
"aws_access_key_id": "testing",
"aws_secret_access_key": "testing",
"aws_session_token": "testing",
"region_name": "us-east-1",
}
@pytest.fixture(scope="function")
def configure_loader_modules(tmp_path, bucket):
opts = {
"cachedir": tmp_path,
"s3.buckets": {"base": [bucket]},
"s3.location": "us-east-1",
"s3.s3_cache_expire": -1,
}
return {s3fs: {"__opts__": opts}}
utils = {"s3.query": salt.utils.s3.query}
yield {s3fs: {"__opts__": opts, "__utils__": utils}}
def test_cache_round_trip():
@pytest.fixture(scope="function")
def s3(bucket, aws_creds):
conn = boto3.client("s3", **aws_creds)
conn.create_bucket(Bucket=bucket)
return conn
def make_keys(bucket, conn, keys):
for key, data in keys.items():
conn.put_object(
Bucket=bucket,
Key=key,
Body=data["content"],
)
def verify_cache(bucket, expected):
for key, data in expected.items():
correct_content = data["content"]
cache_file = s3fs._get_cached_file_name(bucket, "base", key)
assert os.path.exists(cache_file)
if correct_content is None:
continue
with salt.utils.files.fopen(cache_file) as f:
content = f.read()
assert correct_content == content
def test_update(bucket, s3):
"""Tests that files get downloaded from s3 to the local cache."""
keys = {
"top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})},
"foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})},
"files/nginx.conf": {"content": "server {}"},
}
make_keys(bucket, s3, keys)
s3fs.update()
verify_cache(bucket, keys)
# make a modification and update again - verify the change is retrieved
keys["top.sls"]["content"] = yaml.dump({"base": {"*": ["foo", "bar"]}})
make_keys(bucket, s3, keys)
s3fs.update()
verify_cache(bucket, keys)
def test_s3_hash(bucket, s3):
"""Verifies that s3fs hashes files correctly."""
keys = {
"top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})},
"foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})},
"files/nginx.conf": {"content": "server {}"},
}
make_keys(bucket, s3, keys)
s3fs.update()
for key, item in keys.items():
cached_file_path = s3fs._get_cached_file_name(bucket, "base", key)
item["hash"] = salt.utils.hashutils.get_hash(
cached_file_path, s3fs.S3_HASH_TYPE
)
item["cached_file_path"] = cached_file_path
load = {"saltenv": "base"}
fnd = {"bucket": bucket}
for key, item in keys.items():
fnd["path"] = item["cached_file_path"]
actual_hash = s3fs.file_hash(load, fnd)
assert s3fs.S3_HASH_TYPE == actual_hash["hash_type"]
assert item["hash"] == actual_hash["hsum"]
def test_cache_round_trip(bucket):
metadata = {"foo": "bar"}
cache_file = s3fs._get_cached_file_name("base", "fake_bucket", "some_file")
cache_file = s3fs._get_cached_file_name(bucket, "base", "somefile")
s3fs._write_buckets_cache_file(metadata, cache_file)
assert s3fs._read_buckets_cache_file(cache_file) == metadata