Fix #1879. Searching/matching is now done on multiple lines.

* `salt.modules.file.contains()`, `salt.modules.file.contains_regex()`, `salt.modules.file.contains_glob()` and `salt.utils.find` now do the searching/matching against chunks of data; using defaults, 32KB chunks of data in files; instead of searching/matching line by line. * Based on the above changes `salt.states.file.append()`, when checking if the text to append is already present, now uses `salt.modules.file.contains_regex()` in order to match spanning multiple lines ignoring the addition/deletion of white-space or new lines, except inside commas. * The regex used on the above item is built at runtime using `salt.utils.build_whitepace_splited_regex()`, just feed the text to it and you'll get back the proper regex to the matching/searching on, for example `salt.modules.file.contains_regex()`. * Added tests for all this code.
2025-04-17 10:10:20 +00:00 · 2012-08-28 10:03:11 +01:00 · 2012-08-28 10:03:11 +01:00 · 8b645b339b
commit 8b645b339b
parent 797defc320
11 changed files with 357 additions and 43 deletions
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@ -10,7 +10,6 @@ data
 import os
 import re
 import time
-import hashlib
 import shutil
 import stat
 import sys
@ -24,6 +23,7 @@ except ImportError:

 # Import salt libs
 import salt.utils.find
+from salt.utils.filebuffer import BufferedReader
 from salt.exceptions import CommandExecutionError, SaltInvocationError

 def __virtual__():
@ -515,9 +515,9 @@ def contains(path, text):
        return False

    try:
-        with open(path, 'r') as fp_:
-            for line in fp_:
-                if text.strip() == line.strip():
+        with BufferedReader(path) as br:
+            for chunk in br:
+                if text.strip() == chunk.strip():
                    return True
        return False
    except (IOError, OSError):
@ -537,9 +537,11 @@ def contains_regex(path, regex, lchar=''):
        return False

    try:
-        with open(path, 'r') as fp_:
-            for line in  fp_:
-                if re.search(regex, line.lstrip(lchar)):
+        with BufferedReader(path) as br:
+            for chunk in br:
+                if lchar:
+                    chunk = chunk.lstrip(lchar)
+                if re.search(regex, chunk):
                    return True
            return False
    except (IOError, OSError):
@ -558,12 +560,11 @@ def contains_glob(path, glob):
        return False

    try:
-        with open(path, 'r') as fp_:
-            data = fp_.read()
-            if fnmatch.fnmatch(data, glob):
-                return True
-            else:
-                return False
+        with BufferedReader(path) as br:
+            for chunk in br:
+                if fnmatch.fnmatch(chunk, glob):
+                    return True
+            return False
    except (IOError, OSError):
        return False

--- a/salt/states/file.py
+++ b/salt/states/file.py
@ -456,7 +456,7 @@ def _check_perms(name, ret, user, group, mode):
                        )
            except OSError, e:
                ret['result'] = False
-                
+
    if user:
        if user != __salt__['file.get_user'](name):
            ret['result'] = False
@ -1433,7 +1433,7 @@ def recurse(name,
                include_empty)
        return ret

-    def update_changes_by_perms(path, mode, changetype='updated'): 
+    def update_changes_by_perms(path, mode, changetype='updated'):
        _ret = {'name': name,
                'changes': {},
                'result': True,
@ -1444,7 +1444,7 @@ def recurse(name,
        if _ret['comment']:
            comments = ret['comment'].setdefault(path, [])
            comments.extend(_ret['comment'])
-        if _ret['changes']: 
+        if _ret['changes']:
            ret['changes'][path] = changetype

    vdir = set()
@ -1710,6 +1710,11 @@ def append(name, text):
        text = (text,)

    for chunk in text:
+
+        if __salt__['file.contains_regex'](
+                        name, salt.utils.build_whitepace_splited_regex(chunk)):
+            continue
+
        try:
            lines = chunk.split('\n')
        except AttributeError:
@ -1718,17 +1723,13 @@ def append(name, text):
            return _error(ret, 'Given text is not a string')

        for line in lines:
-            if __salt__['file.contains'](name, line):
-                continue
-            else:
-                if __opts__['test']:
-                    ret['comment'] = 'File {0} is set to be updated'.format(
-                            name)
-                    ret['result'] = None
-                    return ret
-                __salt__['file.append'](name, line)
-                cgs = ret['changes'].setdefault('new', [])
-                cgs.append(line)
+            if __opts__['test']:
+                ret['comment'] = 'File {0} is set to be updated'.format(name)
+                ret['result'] = None
+                return ret
+            __salt__['file.append'](name, line)
+            cgs = ret['changes'].setdefault('new', [])
+            cgs.append(line)

    count = len(ret['changes'].get('new', []))

--- a/salt/utils/init.py
+++ b/salt/utils/init.py
@ -5,6 +5,7 @@ from __future__ import absolute_import

 # Import Python libs
 import os
+import re
 import imp
 import random
 import sys
@ -13,6 +14,7 @@ import logging
 import hashlib
 import datetime
 import tempfile
+import shlex
 import shutil
 import time
 import platform
@ -518,3 +520,49 @@ def pem_finger(path, sum_type='md5'):
        else:
            finger += pre[ind]
    return finger.rstrip(':')
+
+
+def build_whitepace_splited_regex(text):
+    '''
+    Create a regular expression at runtime which should match ignoring the
+    addition or deletion of white space or line breaks, unless between commas
+
+    Example::
+
+    >>> import re
+    >>> from salt.utils import *
+    >>> regex = build_whitepace_splited_regex(
+    ...     """if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then"""
+    ... )
+
+    >>> regex
+    '(?:[\\s]+)?if(?:[\\s]+)?\\[(?:[\\s]+)?\\-z(?:[\\s]+)?\\"\\$debian'
+    '\\_chroot\\"(?:[\\s]+)?\\](?:[\\s]+)?\\&\\&(?:[\\s]+)?\\[(?:[\\s]+)?'
+    '\\-r(?:[\\s]+)?\\/etc\\/debian\\_chroot(?:[\\s]+)?\\]\\;(?:[\\s]+)?'
+    'then(?:[\\s]+)?'
+    >>> re.search(
+    ...     regex,
+    ...     """if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then"""
+    ... )
+
+    <_sre.SRE_Match object at 0xb70639c0>
+    >>>
+
+    '''
+
+    def __build_parts(text):
+        lexer = shlex.shlex(text)
+        lexer.whitespace_split = True
+        lexer.commenters = ''
+        if '"' in text:
+            lexer.quotes = '"'
+        elif '\'' in text:
+            lexer.quotes = '\''
+        return list(lexer)
+
+
+    regex = r''
+    for line in text.splitlines():
+        parts = [re.escape(s) for s in __build_parts(line)]
+        regex += r'(?:[\s]+)?{0}(?:[\s]+)?'.format(r'(?:[\s]+)?'.join(parts))
+    return regex
--- a/salt/utils/filebuffer.py
+++ b/salt/utils/filebuffer.py
@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+'''
+    salt.utils.filebuffer
+    ~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
+    :license: Apache 2.0, see LICENSE for more details.
+'''
+
+from salt.exceptions import SaltException
+
+
+class InvalidFileMode(SaltException):
+    '''
+    An invalid file mode was used to open the file passed to the buffer
+    '''
+
+
+class BufferedReader(object):
+    '''
+    This object allows iterating through the contents of a file keeping
+    X configurable bytes in memory which can be used to, for example,
+    do regex search/matching on more than a single line.
+
+    :type  path: str
+    :param path: The file path to be read
+
+    :type  max_in_mem: int
+    :param max_in_mem: The maximum bytes kept in memory while iterating through
+                       the file. Default 256KB.
+
+    :type  chunk_size: int
+    :param chunk_size: The size of each consequent read chunk. Default 32KB.
+
+    :type  mode: str
+    :param mode: The mode the file should be opened. **Only read modes**.
+
+    '''
+    def __init__(self, path, max_in_mem=256*1024, chunk_size=32*1024,
+                 mode='r'):
+        if 'a' in mode or 'w' in mode:
+            raise InvalidFileMode("Cannot open file in write or append mode")
+        self.__path = path
+        self.__file = open(self.__path, mode)
+        self.__max_in_mem = max_in_mem
+        self.__chunk_size = chunk_size
+        self.__buffered = None
+
+    # Public attributes
+    @property
+    def buffered(self):
+        return self.__buffered
+
+    # Support iteration
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.__buffered is None:
+            multiplier = self.__max_in_mem / self.__chunk_size
+            self.__buffered = ""
+        else:
+            multiplier = 1
+            self.__buffered = self.__buffered[self.__chunk_size:]
+
+        data = self.__file.read(self.__chunk_size*multiplier)
+
+        if not data:
+            self.__file.close()
+            raise StopIteration
+
+        self.__buffered += data
+        return self.__buffered
+
+
+    # Support with statements
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, tb):
+        pass
+
+
+if __name__ == '__main__':
+    def timeit_string(fpath, max_size, chunk_size):
+
+        sf = BufferedReader(fpath, max_size, chunk_size)
+        for chunk in sf:
+            chunk
+        return
+
+    def sizeof_fmt(num):
+        for x in ['bytes','KB','MB','GB']:
+            if num < 1024.0:
+                return "%3.1f%s" % (num, x)
+            num /= 1024.0
+        return "%3.1f%s" % (num, 'TB')
+
+    import os, timeit
+    fpath = os.path.normpath(os.path.join(
+        os.path.dirname(__file__),
+        "../../doc/topics/tutorials/starting_states.rst"
+    ))
+
+    tpath = "/tmp/starting_states.rst"
+
+    for fmultiplier in (1, 10, 50, 100, 800, 3200):
+        ffile = open(tpath, "w")
+        while fmultiplier > 0:
+            ffile.write(open(fpath).read())
+            fmultiplier -= 1
+
+        ffile.close()
+
+        TNUMBER = 1000
+
+        print "Running tests against a file with the size of %s" % sizeof_fmt(os.stat(tpath).st_size)
+
+        for idx, multiplier in enumerate([4, 8, 16, 32, 64, 128, 256]):
+            chunk_size = multiplier * 1024
+            max_size = chunk_size * 5
+            t = timeit.Timer("timeit_string('%s', %d, %d)" % (tpath, max_size, chunk_size), "from __main__ import timeit_string")
+            print "timeit_string ({0: >7} chunks; max: {1: >7}):".format(sizeof_fmt(chunk_size), sizeof_fmt(max_size)),
+            print u"{0: >6} \u00B5sec/pass".format(u"%.2f" % (TNUMBER * t.timeit(number=TNUMBER)/TNUMBER))
+
+        print
--- a/salt/utils/find.py
+++ b/salt/utils/find.py
@ -94,6 +94,7 @@ except ImportError:


 from salt._compat import MAX_SIZE
+from salt.utils.filebuffer import BufferedReader

 # Set up logger
 log = logging.getLogger(__name__)
@ -415,9 +416,9 @@ class GrepOption(Option):
    def match(self, dirname, filename, fstat):
        if not stat.S_ISREG(fstat[stat.ST_MODE]):
            return None
-        with open(os.path.join(dirname, filename), 'rb') as f:
-            for line in f:
-                if self.re.search(line):
+        with BufferedReader(os.path.join(dirname, filename), mode='rb') as br:
+            for chunk in br:
+                if self.re.search(chunk):
                    return os.path.join(dirname, filename)
        return None

--- a/tests/integration/files/file/base/issue-1879/init.sls
+++ b/tests/integration/files/file/base/issue-1879/init.sls
@ -1,3 +1,3 @@
 /tmp/salttest/issue-1879:
  file:
-    - touch
+    - touch
--- a/tests/integration/files/file/base/issue-1879/step-1.sls
+++ b/tests/integration/files/file/base/issue-1879/step-1.sls
@ -5,4 +5,5 @@
        # set variable identifying the chroot you work in (used in the prompt below)
        if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
            debian_chroot=$(cat /etc/debian_chroot)
-        fi
+        fi
+        
--- a/tests/integration/files/file/base/issue-1879/step-2.sls
+++ b/tests/integration/files/file/base/issue-1879/step-2.sls
@ -5,4 +5,5 @@
        # enable bash completion in interactive shells
        if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
            . /etc/bash_completion
-        fi
+        fi
+        
--- a/tests/integration/modules/state.py
+++ b/tests/integration/modules/state.py
@ -65,7 +65,21 @@ class StateModuleTest(integration.ModuleCase):
            'multiple state decs of the same type', sls
        )

+    maxDiff = None
+
    def test_issue_1879_too_simple_contains_check(self):
+        contents = """\
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+
+# enable bash completion in interactive shells
+if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
+    . /etc/bash_completion
+fi
+
+"""
        # Create the file
        self.run_function('state.sls', mods='issue-1879')
        # The first append
@ -73,16 +87,22 @@ class StateModuleTest(integration.ModuleCase):
        # The seccond append
        self.run_function('state.sls', mods='issue-1879.step-2')
        # Does it match?
-        self.assertMultiLineEqual("""\
-# set variable identifying the chroot you work in (used in the prompt below)
-if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
-    debian_chroot=$(cat /etc/debian_chroot)
-fi
-# enable bash completion in interactive shells
-if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
-    . /etc/bash_completion
-fi""", open("/tmp/salttest/issue-1879", "r").read())
-        os.unlink('/tmp/salttest/issue-1879')
+        try:
+            self.assertMultiLineEqual(
+                contents, open("/tmp/salttest/issue-1879", "r").read()
+            )
+            # Make sure we don't re-append existing text
+            self.run_function('state.sls', mods='issue-1879.step-1')
+            self.run_function('state.sls', mods='issue-1879.step-2')
+            self.assertMultiLineEqual(
+                contents, open("/tmp/salttest/issue-1879", "r").read()
+            )
+        except Exception:
+            import shutil
+            shutil.copy('/tmp/salttest/issue-1879', '/tmp/salttest/issue-1879.bak')
+            raise
+        finally:
+            os.unlink('/tmp/salttest/issue-1879')



--- a/tests/unit/utils/filebuffer_test.py
+++ b/tests/unit/utils/filebuffer_test.py
@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""
+    tests.unit.utils.filebuffer_test
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
+    :license: Apache 2.0, see LICENSE for more details.
+"""
+
+
+from saltunittest import TestCase, TestLoader, TextTestRunner, skipIf
+
+from salt.utils.filebuffer import BufferedReader, InvalidFileMode
+
+class TestFileBuffer(TestCase):
+    def test_read_only_mode(self):
+        with self.assertRaises(InvalidFileMode):
+            BufferedReader('/tmp/foo', mode='a')
+
+        with self.assertRaises(InvalidFileMode):
+            BufferedReader('/tmp/foo', mode='ab')
+
+        with self.assertRaises(InvalidFileMode):
+            BufferedReader('/tmp/foo', mode='w')
+
+        with self.assertRaises(InvalidFileMode):
+            BufferedReader('/tmp/foo', mode='wb')
+
+if __name__ == "__main__":
+    loader = TestLoader()
+    tests = loader.loadTestsFromTestCase(TestFileBuffer)
+    TextTestRunner(verbosity=1).run(tests)
--- a/tests/unit/utils/runtime_whitespace_regex_test.py
+++ b/tests/unit/utils/runtime_whitespace_regex_test.py
@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""
+    tests.unit.utils.runtime_whitespace_regex
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
+    :license: Apache 2.0, see LICENSE for more details.
+"""
+
+import re
+from saltunittest import TestCase, TestLoader, TextTestRunner, skipIf
+
+from salt.utils import build_whitepace_splited_regex
+
+DOUBLE_TXT = """\
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+"""
+
+SINGLE_TXT = """\
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+"""
+
+SINGLE_DOUBLE_TXT = """\
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+"""
+
+MATCH = """\
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+
+
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+
+
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+
+
+# set variable identifying the chroot you work in (used in the prompt below)
+if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
+    debian_chroot=$(cat /etc/debian_chroot)
+fi
+"""
+
+class TestRuntimeWhitespaceRegex(TestCase):
+
+    def test_single_quotes(self):
+        regex = build_whitepace_splited_regex(SINGLE_TXT)
+        self.assertTrue(re.search(regex, MATCH))
+
+    def test_double_quotes(self):
+        regex = build_whitepace_splited_regex(DOUBLE_TXT)
+        self.assertTrue(re.search(regex, MATCH))
+
+    def test_single_and_double_quotes(self):
+        regex = build_whitepace_splited_regex(SINGLE_DOUBLE_TXT)
+        self.assertTrue(re.search(regex, MATCH))
+
+if __name__ == "__main__":
+    loader = TestLoader()
+    tests = loader.loadTestsFromTestCase(TestRuntimeWhitespaceRegex)
+    TextTestRunner(verbosity=1).run(tests)