Fix #1879. Searching/matching is now done on multiple lines.

* `salt.modules.file.contains()`, `salt.modules.file.contains_regex()`, `salt.modules.file.contains_glob()` and `salt.utils.find` now do the searching/matching against chunks of data; using defaults, 32KB chunks of data in files; instead of searching/matching line by line.
* Based on the above changes `salt.states.file.append()`, when checking if the text to append is already present, now uses `salt.modules.file.contains_regex()` in order to match spanning multiple lines ignoring the addition/deletion of white-space or new lines, except inside commas.
* The regex used on the above item is built at runtime using `salt.utils.build_whitepace_splited_regex()`, just feed the text to it and you'll get back the proper regex to the matching/searching on, for example `salt.modules.file.contains_regex()`.
* Added tests for all this code.
This commit is contained in:
Pedro Algarvio 2012-08-28 10:03:11 +01:00
parent 797defc320
commit 8b645b339b
11 changed files with 357 additions and 43 deletions

View file

@ -10,7 +10,6 @@ data
import os
import re
import time
import hashlib
import shutil
import stat
import sys
@ -24,6 +23,7 @@ except ImportError:
# Import salt libs
import salt.utils.find
from salt.utils.filebuffer import BufferedReader
from salt.exceptions import CommandExecutionError, SaltInvocationError
def __virtual__():
@ -515,9 +515,9 @@ def contains(path, text):
return False
try:
with open(path, 'r') as fp_:
for line in fp_:
if text.strip() == line.strip():
with BufferedReader(path) as br:
for chunk in br:
if text.strip() == chunk.strip():
return True
return False
except (IOError, OSError):
@ -537,9 +537,11 @@ def contains_regex(path, regex, lchar=''):
return False
try:
with open(path, 'r') as fp_:
for line in fp_:
if re.search(regex, line.lstrip(lchar)):
with BufferedReader(path) as br:
for chunk in br:
if lchar:
chunk = chunk.lstrip(lchar)
if re.search(regex, chunk):
return True
return False
except (IOError, OSError):
@ -558,12 +560,11 @@ def contains_glob(path, glob):
return False
try:
with open(path, 'r') as fp_:
data = fp_.read()
if fnmatch.fnmatch(data, glob):
return True
else:
return False
with BufferedReader(path) as br:
for chunk in br:
if fnmatch.fnmatch(chunk, glob):
return True
return False
except (IOError, OSError):
return False

View file

@ -456,7 +456,7 @@ def _check_perms(name, ret, user, group, mode):
)
except OSError, e:
ret['result'] = False
if user:
if user != __salt__['file.get_user'](name):
ret['result'] = False
@ -1433,7 +1433,7 @@ def recurse(name,
include_empty)
return ret
def update_changes_by_perms(path, mode, changetype='updated'):
def update_changes_by_perms(path, mode, changetype='updated'):
_ret = {'name': name,
'changes': {},
'result': True,
@ -1444,7 +1444,7 @@ def recurse(name,
if _ret['comment']:
comments = ret['comment'].setdefault(path, [])
comments.extend(_ret['comment'])
if _ret['changes']:
if _ret['changes']:
ret['changes'][path] = changetype
vdir = set()
@ -1710,6 +1710,11 @@ def append(name, text):
text = (text,)
for chunk in text:
if __salt__['file.contains_regex'](
name, salt.utils.build_whitepace_splited_regex(chunk)):
continue
try:
lines = chunk.split('\n')
except AttributeError:
@ -1718,17 +1723,13 @@ def append(name, text):
return _error(ret, 'Given text is not a string')
for line in lines:
if __salt__['file.contains'](name, line):
continue
else:
if __opts__['test']:
ret['comment'] = 'File {0} is set to be updated'.format(
name)
ret['result'] = None
return ret
__salt__['file.append'](name, line)
cgs = ret['changes'].setdefault('new', [])
cgs.append(line)
if __opts__['test']:
ret['comment'] = 'File {0} is set to be updated'.format(name)
ret['result'] = None
return ret
__salt__['file.append'](name, line)
cgs = ret['changes'].setdefault('new', [])
cgs.append(line)
count = len(ret['changes'].get('new', []))

View file

@ -5,6 +5,7 @@ from __future__ import absolute_import
# Import Python libs
import os
import re
import imp
import random
import sys
@ -13,6 +14,7 @@ import logging
import hashlib
import datetime
import tempfile
import shlex
import shutil
import time
import platform
@ -518,3 +520,49 @@ def pem_finger(path, sum_type='md5'):
else:
finger += pre[ind]
return finger.rstrip(':')
def build_whitepace_splited_regex(text):
'''
Create a regular expression at runtime which should match ignoring the
addition or deletion of white space or line breaks, unless between commas
Example::
>>> import re
>>> from salt.utils import *
>>> regex = build_whitepace_splited_regex(
... """if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then"""
... )
>>> regex
'(?:[\\s]+)?if(?:[\\s]+)?\\[(?:[\\s]+)?\\-z(?:[\\s]+)?\\"\\$debian'
'\\_chroot\\"(?:[\\s]+)?\\](?:[\\s]+)?\\&\\&(?:[\\s]+)?\\[(?:[\\s]+)?'
'\\-r(?:[\\s]+)?\\/etc\\/debian\\_chroot(?:[\\s]+)?\\]\\;(?:[\\s]+)?'
'then(?:[\\s]+)?'
>>> re.search(
... regex,
... """if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then"""
... )
<_sre.SRE_Match object at 0xb70639c0>
>>>
'''
def __build_parts(text):
lexer = shlex.shlex(text)
lexer.whitespace_split = True
lexer.commenters = ''
if '"' in text:
lexer.quotes = '"'
elif '\'' in text:
lexer.quotes = '\''
return list(lexer)
regex = r''
for line in text.splitlines():
parts = [re.escape(s) for s in __build_parts(line)]
regex += r'(?:[\s]+)?{0}(?:[\s]+)?'.format(r'(?:[\s]+)?'.join(parts))
return regex

126
salt/utils/filebuffer.py Normal file
View file

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
'''
salt.utils.filebuffer
~~~~~~~~~~~~~~~~~~~~~
:copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
:license: Apache 2.0, see LICENSE for more details.
'''
from salt.exceptions import SaltException
class InvalidFileMode(SaltException):
'''
An invalid file mode was used to open the file passed to the buffer
'''
class BufferedReader(object):
'''
This object allows iterating through the contents of a file keeping
X configurable bytes in memory which can be used to, for example,
do regex search/matching on more than a single line.
:type path: str
:param path: The file path to be read
:type max_in_mem: int
:param max_in_mem: The maximum bytes kept in memory while iterating through
the file. Default 256KB.
:type chunk_size: int
:param chunk_size: The size of each consequent read chunk. Default 32KB.
:type mode: str
:param mode: The mode the file should be opened. **Only read modes**.
'''
def __init__(self, path, max_in_mem=256*1024, chunk_size=32*1024,
mode='r'):
if 'a' in mode or 'w' in mode:
raise InvalidFileMode("Cannot open file in write or append mode")
self.__path = path
self.__file = open(self.__path, mode)
self.__max_in_mem = max_in_mem
self.__chunk_size = chunk_size
self.__buffered = None
# Public attributes
@property
def buffered(self):
return self.__buffered
# Support iteration
def __iter__(self):
return self
def next(self):
if self.__buffered is None:
multiplier = self.__max_in_mem / self.__chunk_size
self.__buffered = ""
else:
multiplier = 1
self.__buffered = self.__buffered[self.__chunk_size:]
data = self.__file.read(self.__chunk_size*multiplier)
if not data:
self.__file.close()
raise StopIteration
self.__buffered += data
return self.__buffered
# Support with statements
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
pass
if __name__ == '__main__':
def timeit_string(fpath, max_size, chunk_size):
sf = BufferedReader(fpath, max_size, chunk_size)
for chunk in sf:
chunk
return
def sizeof_fmt(num):
for x in ['bytes','KB','MB','GB']:
if num < 1024.0:
return "%3.1f%s" % (num, x)
num /= 1024.0
return "%3.1f%s" % (num, 'TB')
import os, timeit
fpath = os.path.normpath(os.path.join(
os.path.dirname(__file__),
"../../doc/topics/tutorials/starting_states.rst"
))
tpath = "/tmp/starting_states.rst"
for fmultiplier in (1, 10, 50, 100, 800, 3200):
ffile = open(tpath, "w")
while fmultiplier > 0:
ffile.write(open(fpath).read())
fmultiplier -= 1
ffile.close()
TNUMBER = 1000
print "Running tests against a file with the size of %s" % sizeof_fmt(os.stat(tpath).st_size)
for idx, multiplier in enumerate([4, 8, 16, 32, 64, 128, 256]):
chunk_size = multiplier * 1024
max_size = chunk_size * 5
t = timeit.Timer("timeit_string('%s', %d, %d)" % (tpath, max_size, chunk_size), "from __main__ import timeit_string")
print "timeit_string ({0: >7} chunks; max: {1: >7}):".format(sizeof_fmt(chunk_size), sizeof_fmt(max_size)),
print u"{0: >6} \u00B5sec/pass".format(u"%.2f" % (TNUMBER * t.timeit(number=TNUMBER)/TNUMBER))
print

View file

@ -94,6 +94,7 @@ except ImportError:
from salt._compat import MAX_SIZE
from salt.utils.filebuffer import BufferedReader
# Set up logger
log = logging.getLogger(__name__)
@ -415,9 +416,9 @@ class GrepOption(Option):
def match(self, dirname, filename, fstat):
if not stat.S_ISREG(fstat[stat.ST_MODE]):
return None
with open(os.path.join(dirname, filename), 'rb') as f:
for line in f:
if self.re.search(line):
with BufferedReader(os.path.join(dirname, filename), mode='rb') as br:
for chunk in br:
if self.re.search(chunk):
return os.path.join(dirname, filename)
return None

View file

@ -1,3 +1,3 @@
/tmp/salttest/issue-1879:
file:
- touch
- touch

View file

@ -5,4 +5,5 @@
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
fi

View file

@ -5,4 +5,5 @@
# enable bash completion in interactive shells
if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
. /etc/bash_completion
fi
fi

View file

@ -65,7 +65,21 @@ class StateModuleTest(integration.ModuleCase):
'multiple state decs of the same type', sls
)
maxDiff = None
def test_issue_1879_too_simple_contains_check(self):
contents = """\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# enable bash completion in interactive shells
if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
. /etc/bash_completion
fi
"""
# Create the file
self.run_function('state.sls', mods='issue-1879')
# The first append
@ -73,16 +87,22 @@ class StateModuleTest(integration.ModuleCase):
# The seccond append
self.run_function('state.sls', mods='issue-1879.step-2')
# Does it match?
self.assertMultiLineEqual("""\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# enable bash completion in interactive shells
if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
. /etc/bash_completion
fi""", open("/tmp/salttest/issue-1879", "r").read())
os.unlink('/tmp/salttest/issue-1879')
try:
self.assertMultiLineEqual(
contents, open("/tmp/salttest/issue-1879", "r").read()
)
# Make sure we don't re-append existing text
self.run_function('state.sls', mods='issue-1879.step-1')
self.run_function('state.sls', mods='issue-1879.step-2')
self.assertMultiLineEqual(
contents, open("/tmp/salttest/issue-1879", "r").read()
)
except Exception:
import shutil
shutil.copy('/tmp/salttest/issue-1879', '/tmp/salttest/issue-1879.bak')
raise
finally:
os.unlink('/tmp/salttest/issue-1879')

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
"""
tests.unit.utils.filebuffer_test
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
:license: Apache 2.0, see LICENSE for more details.
"""
from saltunittest import TestCase, TestLoader, TextTestRunner, skipIf
from salt.utils.filebuffer import BufferedReader, InvalidFileMode
class TestFileBuffer(TestCase):
def test_read_only_mode(self):
with self.assertRaises(InvalidFileMode):
BufferedReader('/tmp/foo', mode='a')
with self.assertRaises(InvalidFileMode):
BufferedReader('/tmp/foo', mode='ab')
with self.assertRaises(InvalidFileMode):
BufferedReader('/tmp/foo', mode='w')
with self.assertRaises(InvalidFileMode):
BufferedReader('/tmp/foo', mode='wb')
if __name__ == "__main__":
loader = TestLoader()
tests = loader.loadTestsFromTestCase(TestFileBuffer)
TextTestRunner(verbosity=1).run(tests)

View file

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
"""
tests.unit.utils.runtime_whitespace_regex
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: © 2012 UfSoft.org - :email:`Pedro Algarvio (pedro@algarvio.me)`
:license: Apache 2.0, see LICENSE for more details.
"""
import re
from saltunittest import TestCase, TestLoader, TextTestRunner, skipIf
from salt.utils import build_whitepace_splited_regex
DOUBLE_TXT = """\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
"""
SINGLE_TXT = """\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
"""
SINGLE_DOUBLE_TXT = """\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
"""
MATCH = """\
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z '$debian_chroot' ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
"""
class TestRuntimeWhitespaceRegex(TestCase):
def test_single_quotes(self):
regex = build_whitepace_splited_regex(SINGLE_TXT)
self.assertTrue(re.search(regex, MATCH))
def test_double_quotes(self):
regex = build_whitepace_splited_regex(DOUBLE_TXT)
self.assertTrue(re.search(regex, MATCH))
def test_single_and_double_quotes(self):
regex = build_whitepace_splited_regex(SINGLE_DOUBLE_TXT)
self.assertTrue(re.search(regex, MATCH))
if __name__ == "__main__":
loader = TestLoader()
tests = loader.loadTestsFromTestCase(TestRuntimeWhitespaceRegex)
TextTestRunner(verbosity=1).run(tests)