From 3703fc5291909fdbaf2eed232bc2e5fd17a71578 Mon Sep 17 00:00:00 2001 From: Skyler Hawthorne Date: Mon, 20 Nov 2023 19:00:50 -0500 Subject: [PATCH] fix incorrect hash type result in s3fs The s3fs backend is currently computing the incorrect hash type. The default hashing algorithm was changed from md5 to sha256. The s3 backend calls the hashing function without specifying the hash algorithm, and then hardcodes the result's hash type to md5. After the change to sha256, this means it was computing a sha256 sum, but saying it was md5. In turn, it would compute the sha256 sum of the existing target file and compare it to the cached file's md5, which would obviously always be different. This causes downstream effects, such as test mode always reporting a change, even when there isn't one. Fixes #65589 --- changelog/65589.fixed.md | 1 + salt/fileserver/s3fs.py | 12 ++- tests/pytests/unit/fileserver/test_s3fs.py | 116 ++++++++++++++++++++- 3 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 changelog/65589.fixed.md diff --git a/changelog/65589.fixed.md b/changelog/65589.fixed.md new file mode 100644 index 00000000000..e6f8f40e341 --- /dev/null +++ b/changelog/65589.fixed.md @@ -0,0 +1 @@ +Fixes the s3fs backend computing the local cache's files with the wrong hash type diff --git a/salt/fileserver/s3fs.py b/salt/fileserver/s3fs.py index aa03c33bbcd..d013ea3b193 100644 --- a/salt/fileserver/s3fs.py +++ b/salt/fileserver/s3fs.py @@ -104,6 +104,8 @@ import salt.utils.versions log = logging.getLogger(__name__) +S3_HASH_TYPE = "md5" + def envs(): """ @@ -189,7 +191,7 @@ def find_file(path, saltenv="base", **kwargs): def file_hash(load, fnd): """ - Return an MD5 file hash + Return the hash of an object's cached copy """ if "env" in load: # "env" is not supported; Use "saltenv". @@ -208,8 +210,8 @@ def file_hash(load, fnd): ) if os.path.isfile(cached_file_path): - ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path) - ret["hash_type"] = "md5" + ret["hash_type"] = S3_HASH_TYPE + ret["hsum"] = salt.utils.hashutils.get_hash(cached_file_path, S3_HASH_TYPE) return ret @@ -716,7 +718,9 @@ def _get_file_from_s3(metadata, saltenv, bucket_name, path, cached_file_path): if file_etag.find("-") == -1: file_md5 = file_etag - cached_md5 = salt.utils.hashutils.get_hash(cached_file_path, "md5") + cached_md5 = salt.utils.hashutils.get_hash( + cached_file_path, S3_HASH_TYPE + ) # hashes match we have a cache hit if cached_md5 == file_md5: diff --git a/tests/pytests/unit/fileserver/test_s3fs.py b/tests/pytests/unit/fileserver/test_s3fs.py index b50424a1d1a..7a89543e27f 100644 --- a/tests/pytests/unit/fileserver/test_s3fs.py +++ b/tests/pytests/unit/fileserver/test_s3fs.py @@ -1,19 +1,127 @@ +import os + +import boto3 import pytest +import yaml + +# moto must be imported before boto3 +from moto import mock_s3 import salt.fileserver.s3fs as s3fs +import salt.utils.s3 @pytest.fixture -def configure_loader_modules(tmp_path): +def bucket(): + with mock_s3(): + yield "mybucket" + + +@pytest.fixture(scope="module") +def aws_creds(): + return { + "aws_access_key_id": "testing", + "aws_secret_access_key": "testing", + "aws_session_token": "testing", + "region_name": "us-east-1", + } + + +@pytest.fixture(scope="function") +def configure_loader_modules(tmp_path, bucket): opts = { "cachedir": tmp_path, + "s3.buckets": {"base": [bucket]}, + "s3.location": "us-east-1", + "s3.s3_cache_expire": -1, } - return {s3fs: {"__opts__": opts}} + utils = {"s3.query": salt.utils.s3.query} + + yield {s3fs: {"__opts__": opts, "__utils__": utils}} -def test_cache_round_trip(): +@pytest.fixture(scope="function") +def s3(bucket, aws_creds): + conn = boto3.client("s3", **aws_creds) + conn.create_bucket(Bucket=bucket) + return conn + + +def make_keys(bucket, conn, keys): + for key, data in keys.items(): + conn.put_object( + Bucket=bucket, + Key=key, + Body=data["content"], + ) + + +def verify_cache(bucket, expected): + for key, data in expected.items(): + correct_content = data["content"] + cache_file = s3fs._get_cached_file_name(bucket, "base", key) + assert os.path.exists(cache_file) + + if correct_content is None: + continue + + with salt.utils.files.fopen(cache_file) as f: + content = f.read() + assert correct_content == content + + +def test_update(bucket, s3): + """Tests that files get downloaded from s3 to the local cache.""" + + keys = { + "top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})}, + "foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})}, + "files/nginx.conf": {"content": "server {}"}, + } + + make_keys(bucket, s3, keys) + s3fs.update() + verify_cache(bucket, keys) + + # make a modification and update again - verify the change is retrieved + keys["top.sls"]["content"] = yaml.dump({"base": {"*": ["foo", "bar"]}}) + make_keys(bucket, s3, keys) + s3fs.update() + verify_cache(bucket, keys) + + +def test_s3_hash(bucket, s3): + """Verifies that s3fs hashes files correctly.""" + + keys = { + "top.sls": {"content": yaml.dump({"base": {"*": ["foo"]}})}, + "foo.sls": {"content": yaml.dump({"nginx": {"pkg.installed": []}})}, + "files/nginx.conf": {"content": "server {}"}, + } + + make_keys(bucket, s3, keys) + s3fs.update() + + for key, item in keys.items(): + cached_file_path = s3fs._get_cached_file_name(bucket, "base", key) + item["hash"] = salt.utils.hashutils.get_hash( + cached_file_path, s3fs.S3_HASH_TYPE + ) + item["cached_file_path"] = cached_file_path + + load = {"saltenv": "base"} + fnd = {"bucket": bucket} + + for key, item in keys.items(): + fnd["path"] = item["cached_file_path"] + actual_hash = s3fs.file_hash(load, fnd) + assert s3fs.S3_HASH_TYPE == actual_hash["hash_type"] + assert item["hash"] == actual_hash["hsum"] + + +def test_cache_round_trip(bucket): metadata = {"foo": "bar"} - cache_file = s3fs._get_cached_file_name("base", "fake_bucket", "some_file") + cache_file = s3fs._get_cached_file_name(bucket, "base", "somefile") s3fs._write_buckets_cache_file(metadata, cache_file) assert s3fs._read_buckets_cache_file(cache_file) == metadata