Merge pull request #48934 from terminalmage/issue48777

Properly handle latin-1 encoding in file diffs
This commit is contained in:
Nicole Thomas 2018-08-07 17:02:23 -04:00 committed by GitHub
commit ab1a713bc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 169 additions and 69 deletions

View file

@ -12,7 +12,6 @@ from __future__ import absolute_import, print_function, unicode_literals
# Import python libs
import datetime
import difflib
import errno
import fnmatch
import io
@ -1567,7 +1566,7 @@ def comment_line(path,
check_perms(path, None, pre_user, pre_group, pre_mode)
# Return a diff using the two dictionaries
return ''.join(difflib.unified_diff(orig_file, new_file))
return __utils__['stringutils.get_diff'](orig_file, new_file)
def _get_flags(flags):
@ -2038,9 +2037,7 @@ def line(path, content=None, match=None, mode=None, location=None,
if show_changes:
with salt.utils.files.fopen(path, 'r') as fp_:
path_content = salt.utils.data.decode_list(fp_.read().splitlines(True))
changes_diff = ''.join(difflib.unified_diff(
path_content, body
))
changes_diff = __utils__['stringutils.get_diff'](path_content, body)
if __opts__['test'] is False:
fh_ = None
try:
@ -2426,18 +2423,15 @@ def replace(path,
if not dry_run and not salt.utils.platform.is_windows():
check_perms(path, None, pre_user, pre_group, pre_mode)
def get_changes():
orig_file_as_str = [salt.utils.stringutils.to_unicode(x) for x in orig_file]
new_file_as_str = [salt.utils.stringutils.to_unicode(x) for x in new_file]
return ''.join(difflib.unified_diff(orig_file_as_str, new_file_as_str))
differences = __utils__['stringutils.get_diff'](orig_file, new_file)
if show_changes:
return get_changes()
return differences
# We may have found a regex line match but don't need to change the line
# (for situations where the pattern also matches the repl). Revert the
# has_changes flag to False if the final result is unchanged.
if not get_changes():
if not differences:
has_changes = False
return has_changes
@ -2688,7 +2682,7 @@ def blockreplace(path,
)
if block_found:
diff = ''.join(difflib.unified_diff(orig_file, new_file))
diff = __utils__['stringutils.get_diff'](orig_file, new_file)
has_changes = diff is not ''
if has_changes and not dry_run:
# changes detected
@ -5018,11 +5012,7 @@ def get_diff(file1,
else:
if show_filenames:
args.extend(files)
ret = ''.join(
difflib.unified_diff(
*salt.utils.data.decode(args)
)
)
ret = __utils__['stringutils.get_diff'](*args)
return ret
return ''

View file

@ -6,6 +6,7 @@ Functions for manipulating or otherwise processing strings
# Import Python libs
from __future__ import absolute_import, print_function, unicode_literals
import base64
import difflib
import errno
import fnmatch
import logging
@ -31,21 +32,32 @@ def to_bytes(s, encoding=None, errors='strict'):
Given bytes, bytearray, str, or unicode (python 2), return bytes (str for
python 2)
'''
if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)
if not encoding:
raise ValueError('encoding cannot be empty')
exc = None
if six.PY3:
if isinstance(s, bytes):
return s
if isinstance(s, bytearray):
return bytes(s)
if isinstance(s, six.string_types):
if encoding:
return s.encode(encoding, errors)
else:
for enc in encoding:
try:
# Try UTF-8 first
return s.encode('utf-8', errors)
except UnicodeEncodeError:
# Fall back to detected encoding
return s.encode(__salt_system_encoding__, errors)
return s.encode(enc, errors)
except UnicodeEncodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeEncodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected bytes, bytearray, or str')
else:
return to_str(s, encoding, errors)
@ -61,35 +73,48 @@ def to_str(s, encoding=None, errors='strict', normalize=False):
except TypeError:
return s
if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)
if not encoding:
raise ValueError('encoding cannot be empty')
# This shouldn't be six.string_types because if we're on PY2 and we already
# have a string, we should just return it.
if isinstance(s, str):
return _normalize(s)
exc = None
if six.PY3:
if isinstance(s, (bytes, bytearray)):
if encoding:
return _normalize(s.decode(encoding, errors))
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s.decode('utf-8', errors))
except UnicodeDecodeError:
# Fall back to detected encoding
return _normalize(s.decode(__salt_system_encoding__, errors))
return _normalize(s.decode(enc, errors))
except UnicodeDecodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str, bytes, or bytearray not {}'.format(type(s)))
else:
if isinstance(s, bytearray):
return str(s) # future lint: disable=blacklisted-function
if isinstance(s, unicode): # pylint: disable=incompatible-py3-code,undefined-variable
if encoding:
return _normalize(s).encode(encoding, errors)
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s).encode('utf-8', errors)
except UnicodeEncodeError:
# Fall back to detected encoding
return _normalize(s).encode(__salt_system_encoding__, errors)
return _normalize(s).encode(enc, errors)
except UnicodeEncodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str, bytearray, or unicode')
@ -100,6 +125,16 @@ def to_unicode(s, encoding=None, errors='strict', normalize=False):
def _normalize(s):
return unicodedata.normalize('NFC', s) if normalize else s
if encoding is None:
# Try utf-8 first, and fall back to detected encoding
encoding = ('utf-8', __salt_system_encoding__)
if not isinstance(encoding, (tuple, list)):
encoding = (encoding,)
if not encoding:
raise ValueError('encoding cannot be empty')
exc = None
if six.PY3:
if isinstance(s, str):
return _normalize(s)
@ -113,15 +148,16 @@ def to_unicode(s, encoding=None, errors='strict', normalize=False):
if isinstance(s, unicode): # pylint: disable=incompatible-py3-code
return _normalize(s)
elif isinstance(s, (str, bytearray)):
if encoding:
return _normalize(s.decode(encoding, errors))
else:
for enc in encoding:
try:
# Try UTF-8 first
return _normalize(s.decode('utf-8', errors))
except UnicodeDecodeError:
# Fall back to detected encoding
return _normalize(s.decode(__salt_system_encoding__, errors))
return _normalize(s.decode(enc, errors))
except UnicodeDecodeError as err:
exc = err
continue
# The only way we get this far is if a UnicodeDecodeError was
# raised, otherwise we would have already returned (or raised some
# other exception).
raise exc # pylint: disable=raising-bad-type
raise TypeError('expected str or bytearray')
@ -513,3 +549,21 @@ def get_context(template, line, num_lines=5, marker=None):
buf[error_line_in_context] += marker
return '---\n{0}\n---'.format('\n'.join(buf))
def get_diff(a, b, *args, **kwargs):
'''
Perform diff on two iterables containing lines from two files, and return
the diff as as string. Lines are normalized to str types to avoid issues
with unicode on PY2.
'''
encoding = ('utf-8', 'latin-1', __salt_system_encoding__)
# Late import to avoid circular import
import salt.utils.data
return ''.join(
difflib.unified_diff(
salt.utils.data.decode_list(a, encoding=encoding),
salt.utils.data.decode_list(b, encoding=encoding),
*args, **kwargs
)
)

View file

@ -0,0 +1,5 @@
<html>
<body>
r臾sm<EFBFBD>g蚶
</body>
</html>

View file

@ -0,0 +1,4 @@
<html>
<body>
</body>
</html>

View file

@ -22,7 +22,7 @@ log = logging.getLogger(__name__)
# Import Salt Testing libs
from tests.support.case import ModuleCase
from tests.support.unit import skipIf
from tests.support.paths import FILES, TMP, TMP_STATE_TREE
from tests.support.paths import BASE_FILES, FILES, TMP, TMP_STATE_TREE
from tests.support.helpers import (
destructiveTest,
skip_if_not_root,
@ -61,7 +61,6 @@ IS_WINDOWS = salt.utils.platform.is_windows()
BINARY_FILE = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x05\x04\x04\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02D\x01\x00;'
STATE_DIR = os.path.join(FILES, 'file', 'base')
if IS_WINDOWS:
FILEPILLAR = 'C:\\Windows\\Temp\\filepillar-python'
FILEPILLARDEF = 'C:\\Windows\\Temp\\filepillar-defaultvalue'
@ -77,7 +76,7 @@ def _test_managed_file_mode_keep_helper(testcase, local=False):
DRY helper function to run the same test with a local or remote path
'''
name = os.path.join(TMP, 'scene33')
grail_fs_path = os.path.join(FILES, 'file', 'base', 'grail', 'scene33')
grail_fs_path = os.path.join(BASE_FILES, 'grail', 'scene33')
grail = 'salt://grail/scene33' if not local else grail_fs_path
# Get the current mode so that we can put the file back the way we
@ -248,9 +247,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
ret = self.run_state(
'file.managed', name=name, source='salt://grail/scene33'
)
src = os.path.join(
FILES, 'file', 'base', 'grail', 'scene33'
)
src = os.path.join(BASE_FILES, 'grail', 'scene33')
with salt.utils.files.fopen(src, 'r') as fp_:
master_data = fp_.read()
with salt.utils.files.fopen(name, 'r') as fp_:
@ -464,11 +461,11 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
funny_file = salt.utils.files.mkstemp(prefix='?f!le? n@=3&', suffix='.file type')
funny_file_name = os.path.split(funny_file)[1]
funny_url = 'salt://|' + funny_file_name
funny_url_path = os.path.join(STATE_DIR, funny_file_name)
funny_url_path = os.path.join(BASE_FILES, funny_file_name)
state_name = 'funny_file'
state_file_name = state_name + '.sls'
state_file = os.path.join(STATE_DIR, state_file_name)
state_file = os.path.join(BASE_FILES, state_file_name)
state_key = 'file_|-{0}_|-{0}_|-managed'.format(funny_file)
self.addCleanup(os.remove, state_file)
@ -495,7 +492,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
'''
state_name = 'file-FileTest-test_managed_contents'
state_filename = state_name + '.sls'
state_file = os.path.join(STATE_DIR, state_filename)
state_file = os.path.join(BASE_FILES, state_filename)
managed_files = {}
state_keys = {}
@ -589,7 +586,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
Make sure that we enforce the source_hash even with local files
'''
name = os.path.join(TMP, 'local_source_with_source_hash')
local_path = os.path.join(FILES, 'file', 'base', 'grail', 'scene33')
local_path = os.path.join(BASE_FILES, 'grail', 'scene33')
actual_hash = '567fd840bf1548edc35c48eb66cdd78bfdfcccff'
# Reverse the actual hash
bad_hash = actual_hash[::-1]
@ -645,7 +642,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
Make sure that we exit gracefully when a local source doesn't exist
'''
name = os.path.join(TMP, 'local_source_does_not_exist')
local_path = os.path.join(FILES, 'file', 'base', 'grail', 'scene99')
local_path = os.path.join(BASE_FILES, 'grail', 'scene99')
for proto in ('file://', ''):
source = proto + local_path
@ -671,7 +668,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
name = os.path.join(TMP, 'source_hash_indifferent_case')
state_name = 'file_|-{0}_|' \
'-{0}_|-managed'.format(name)
local_path = os.path.join(FILES, 'file', 'base', 'hello_world.txt')
local_path = os.path.join(BASE_FILES, 'hello_world.txt')
actual_hash = 'c98c24b677eff44860afea6f493bbaec5bb1c4cbb209c6fc2bbb47f66ff2ad31'
uppercase_hash = actual_hash.upper()
@ -713,6 +710,29 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
if os.path.exists(name):
os.remove(name)
@with_tempfile(create=False)
def test_managed_latin1_diff(self, name):
'''
Tests that latin-1 file contents are represented properly in the diff
'''
# Lay down the initial file
ret = self.run_state(
'file.managed',
name=name,
source='salt://issue-48777/old.html')
ret = ret[next(iter(ret))]
assert ret['result'] is True, ret
# Replace it with the new file and check the diff
ret = self.run_state(
'file.managed',
name=name,
source='salt://issue-48777/new.html')
ret = ret[next(iter(ret))]
assert ret['result'] is True, ret
diff_lines = ret['changes']['diff'].split('\n')
assert '+räksmörgås' in diff_lines, diff_lines
def test_directory(self):
'''
file.directory
@ -921,7 +941,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
'''
state_name = 'file-FileTest-test_directory_clean_require_in'
state_filename = state_name + '.sls'
state_file = os.path.join(STATE_DIR, state_filename)
state_file = os.path.join(BASE_FILES, state_filename)
directory = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(directory))
@ -956,7 +976,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
'''
state_name = 'file-FileTest-test_directory_clean_require_in_with_id'
state_filename = state_name + '.sls'
state_file = os.path.join(STATE_DIR, state_filename)
state_file = os.path.join(BASE_FILES, state_filename)
directory = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(directory))
@ -992,7 +1012,7 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
'''
state_name = 'file-FileTest-test_directory_clean_require_in_with_id'
state_filename = state_name + '.sls'
state_file = os.path.join(STATE_DIR, state_filename)
state_file = os.path.join(BASE_FILES, state_filename)
directory = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(directory))

View file

@ -63,7 +63,10 @@ class FileReplaceTestCase(TestCase, LoaderModuleMockMixin):
'grains': {},
},
'__grains__': {'kernel': 'Linux'},
'__utils__': {'files.is_text': MagicMock(return_value=True)},
'__utils__': {
'files.is_text': MagicMock(return_value=True),
'stringutils.get_diff': salt.utils.stringutils.get_diff,
},
}
}
@ -243,7 +246,8 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
'__grains__': {'kernel': 'Linux'},
'__utils__': {
'files.is_binary': MagicMock(return_value=False),
'files.get_encoding': MagicMock(return_value='utf-8')
'files.get_encoding': MagicMock(return_value='utf-8'),
'stringutils.get_diff': salt.utils.stringutils.get_diff,
},
}
}
@ -540,7 +544,10 @@ class FileGrepTestCase(TestCase, LoaderModuleMockMixin):
'grains': {},
},
'__grains__': {'kernel': 'Linux'},
'__utils__': {'files.is_text': MagicMock(return_value=True)},
'__utils__': {
'files.is_text': MagicMock(return_value=True),
'stringutils.get_diff': salt.utils.stringutils.get_diff,
},
}
}
@ -641,7 +648,10 @@ class FileModuleTestCase(TestCase, LoaderModuleMockMixin):
'cachedir': 'tmp',
'grains': {},
},
'__grains__': {'kernel': 'Linux'}
'__grains__': {'kernel': 'Linux'},
'__utils__': {
'stringutils.get_diff': salt.utils.stringutils.get_diff,
},
}
}
@ -1020,7 +1030,10 @@ class FilemodLineTests(TestCase, LoaderModuleMockMixin):
'cachedir': 'tmp',
'grains': {},
},
'__grains__': {'kernel': 'Linux'}
'__grains__': {'kernel': 'Linux'},
'__utils__': {
'stringutils.get_diff': salt.utils.stringutils.get_diff,
},
}
}

View file

@ -18,6 +18,9 @@ STR = BYTES = UNICODE.encode('utf-8')
# code points. Do not modify it.
EGGS = '\u044f\u0438\u0306\u0446\u0430'
LATIN1_UNICODE = 'räksmörgås'
LATIN1_BYTES = LATIN1_UNICODE.encode('latin-1')
class StringutilsTestCase(TestCase):
def test_contains_whitespace(self):
@ -134,6 +137,13 @@ class StringutilsTestCase(TestCase):
'яйца'
)
self.assertEqual(
salt.utils.stringutils.to_unicode(
LATIN1_BYTES, encoding='latin-1'
),
LATIN1_UNICODE
)
if six.PY3:
self.assertEqual(salt.utils.stringutils.to_unicode('plugh'), 'plugh')
self.assertEqual(salt.utils.stringutils.to_unicode('áéíóúý'), 'áéíóúý')
@ -150,6 +160,10 @@ class StringutilsTestCase(TestCase):
with patch.object(builtins, '__salt_system_encoding__', 'CP1252'):
self.assertEqual(salt.utils.stringutils.to_unicode('Ψ'.encode('utf-8')), 'Ψ')
def test_to_unicode_multi_encoding(self):
result = salt.utils.stringutils.to_unicode(LATIN1_BYTES, encoding=('utf-8', 'latin1'))
assert result == LATIN1_UNICODE
def test_build_whitespace_split_regex(self):
expected_regex = '(?m)^(?:[\\s]+)?Lorem(?:[\\s]+)?ipsum(?:[\\s]+)?dolor(?:[\\s]+)?sit(?:[\\s]+)?amet\\,' \
'(?:[\\s]+)?$'