Fix 3 bugs in subdict matching

1. Non-ascii was causing a UnicodeDecodeError when matching 2. Matching a dict value after a wildcard (i.e. the key is the wildcard) did not work. This is because it was matching the pattern against the key, instead of the value as it should have. 3. It was not possible to use a wildcard for the first level of matching.
2025-04-17 10:10:20 +00:00 · 2018-09-26 12:44:20 -05:00 · 2018-09-26 12:44:20 -05:00 · ac0f8000d8
commit ac0f8000d8
parent 413868329c
2 changed files with 75 additions and 20 deletions
--- a/salt/utils/data.py
+++ b/salt/utils/data.py
@ -455,13 +455,14 @@ def traverse_dict(data, key, default=None, delimiter=DEFAULT_TARGET_DELIM):
    data['foo']['bar']['baz'] if this value exists, and will otherwise return
    the dict in the default argument.
    '''
+    ptr = data
    try:
        for each in key.split(delimiter):
-            data = data[each]
+            ptr = ptr[each]
    except (KeyError, IndexError, TypeError):
        # Encountered a non-indexable value in the middle of traversing
        return default
-    return data
+    return ptr


@jinja_filter('traverse')
@ -476,16 +477,17 @@ def traverse_dict_and_list(data, key, default=None, delimiter=DEFAULT_TARGET_DEL
    {'foo':{'bar':['baz']}} , if data like {'foo':{'bar':{'0':'baz'}}}
    then return data['foo']['bar']['0']
    '''
+    ptr = data
    for each in key.split(delimiter):
-        if isinstance(data, list):
+        if isinstance(ptr, list):
            try:
                idx = int(each)
            except ValueError:
                embed_match = False
                # Index was not numeric, lets look at any embedded dicts
-                for embedded in (x for x in data if isinstance(x, dict)):
+                for embedded in (x for x in ptr if isinstance(x, dict)):
                    try:
-                        data = embedded[each]
+                        ptr = embedded[each]
                        embed_match = True
                        break
                    except KeyError:
@ -495,15 +497,15 @@ def traverse_dict_and_list(data, key, default=None, delimiter=DEFAULT_TARGET_DEL
                    return default
            else:
                try:
-                    data = data[idx]
+                    ptr = ptr[idx]
                except IndexError:
                    return default
        else:
            try:
-                data = data[each]
+                ptr = ptr[each]
            except (KeyError, TypeError):
                return default
-    return data
+    return ptr


 def subdict_match(data,
@ -519,16 +521,28 @@ def subdict_match(data,
    latter.
    '''
    def _match(target, pattern, regex_match=False, exact_match=False):
+        try:
+            target = six.text_type(target).lower()
+        except UnicodeDecodeError:
+            # We're on PY2 and target is a str type with non-ascii chars. Coax
+            # it into a unicode type.
+            target = salt.utils.stringutils.to_unicode(target).lower()
+        try:
+            pattern = six.text_type(pattern).lower()
+        except UnicodeDecodeError:
+            # We're on PY2 and pattern is a str type with non-ascii chars. Coax
+            # it into a unicode type.
+            pattern = salt.utils.stringutils.to_unicode(pattern).lower()
+
        if regex_match:
            try:
-                return re.match(pattern.lower(), six.text_type(target).lower())
+                return re.match(pattern, target)
            except Exception:
                log.error('Invalid regex \'%s\' in match', pattern)
                return False
-        elif exact_match:
-            return six.text_type(target).lower() == pattern.lower()
        else:
-            return fnmatch.fnmatch(six.text_type(target).lower(), pattern.lower())
+            return target == pattern if exact_match \
+                else fnmatch.fnmatch(target, pattern)

    def _dict_match(target, pattern, regex_match=False, exact_match=False):
        wildcard = pattern.startswith('*:')
@ -548,11 +562,6 @@ def subdict_match(data,
            return True
        if wildcard:
            for key in target:
-                if _match(key,
-                          pattern,
-                          regex_match=regex_match,
-                          exact_match=exact_match):
-                    return True
                if isinstance(target[key], dict):
                    if _dict_match(target[key],
                                   pattern,
@ -566,15 +575,31 @@ def subdict_match(data,
                                  regex_match=regex_match,
                                  exact_match=exact_match):
                            return True
+                elif _match(target[key],
+                            pattern,
+                            regex_match=regex_match,
+                            exact_match=exact_match):
+                    return True
+        return False
+
+    splits = expr.split(delimiter)
+    num_splits = len(splits)
+    if num_splits == 1:
+        # Delimiter not present, this can't possibly be a match
        return False

    for idx in range(1, expr.count(delimiter) + 1):
-        splits = expr.split(delimiter)
        key = delimiter.join(splits[:idx])
-        matchstr = delimiter.join(splits[idx:])
+        if key == '*':
+            # We are matching on everything under the top level, so we need to
+            # treat the match as the entire data being passed in
+            matchstr = expr
+            match = data
+        else:
+            matchstr = delimiter.join(splits[idx:])
+            match = traverse_dict_and_list(data, key, {}, delimiter=delimiter)
        log.debug("Attempting to match '%s' in '%s' using delimiter '%s'",
                  matchstr, key, delimiter)
-        match = traverse_dict_and_list(data, key, {}, delimiter=delimiter)
        if match == {}:
            continue
        if isinstance(match, dict):
--- a/tests/unit/utils/test_data.py
+++ b/tests/unit/utils/test_data.py
@ -144,6 +144,36 @@ class DataTestCase(TestCase):
            )
        )

+    def test_subdict_match_with_wildcards(self):
+        '''
+        Tests subdict matching when wildcards are used in the expression
+        '''
+        data = {
+            'a': {
+                'b': {
+                    'ç': 'd',
+                    'é': ['eff', 'gee', '8ch'],
+                    'ĩ': {'j': 'k'}
+                }
+            }
+        }
+        assert salt.utils.data.subdict_match(data, '*:*:*:*')
+        assert salt.utils.data.subdict_match(data, 'a:*:*:*')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:*')
+        assert salt.utils.data.subdict_match(data, 'a:b:ç:*')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:d')
+        assert salt.utils.data.subdict_match(data, 'a:*:ç:d')
+        assert salt.utils.data.subdict_match(data, '*:b:ç:d')
+        assert salt.utils.data.subdict_match(data, '*:*:ç:d')
+        assert salt.utils.data.subdict_match(data, '*:*:*:d')
+        assert salt.utils.data.subdict_match(data, 'a:*:*:d')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:ef*')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:g*')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:j:*')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:j:k')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:*:k')
+        assert salt.utils.data.subdict_match(data, 'a:b:*:*:*')
+
    def test_traverse_dict(self):
        test_two_level_dict = {'foo': {'bar': 'baz'}}