Remove to_encoding, create get_encoding

Use io.open so you can pass an encoding
This commit is contained in:
twangboy 2018-05-16 12:21:33 -06:00
parent 68be0f9ed2
commit 9f369d3f22
No known key found for this signature in database
GPG key ID: 93FF3BDEB278C9EB
3 changed files with 112 additions and 48 deletions

View file

@ -14,8 +14,8 @@ from __future__ import absolute_import, print_function, unicode_literals
import datetime
import difflib
import errno
import fileinput
import fnmatch
import io
import itertools
import logging
import operator
@ -2535,17 +2535,17 @@ def blockreplace(path,
if not os.path.exists(path):
raise SaltInvocationError('File not found: {0}'.format(path))
try:
file_encoding = __utils__['files.get_encoding'](path)
except CommandExecutionError:
file_encoding = None
if __utils__['files.is_binary'](path):
# it may be a utf-8 or utf-16 encoded file
for encoding in ['utf-16-le', 'utf-8', 'utf-16']:
if __utils__['files.is_encoding'](path, encoding):
log.debug('Found "{0}" encoding'.format(encoding))
break
else:
if not file_encoding:
raise SaltInvocationError(
'Cannot perform string replacements on a binary file: {0}'
.format(path)
)
)
if append_newline is None and not content.endswith((os.linesep, '\n')):
append_newline = True
@ -2602,23 +2602,12 @@ def blockreplace(path,
#
# We could also use salt.utils.filebuffer.BufferedReader
try:
fi_file = fileinput.input(
path,
inplace=False,
backup=False,
bufsize=1,
mode='rb')
fi_file = io.open(path, mode='r', encoding=file_encoding)
for line in fi_file:
line = salt.utils.stringutils.to_unicode(line)
write_line_to_new_file = True
if linesep is None:
# Auto-detect line separator
# utf-16 encodings have \x00 between each character
if line.endswith('\r\x00\n'):
linesep = '\r\n'
elif line.endswith('\r\n'):
if line.endswith('\r\n'):
linesep = '\r\n'
elif line.endswith('\n'):
linesep = '\n'
@ -2718,7 +2707,7 @@ def blockreplace(path,
try:
fh_ = salt.utils.atomicfile.atomic_open(path, 'wb')
for line in new_file:
fh_.write(salt.utils.stringutils.to_bytes(line))
fh_.write(salt.utils.stringutils.to_bytes(line, encoding=file_encoding))
finally:
fh_.close()

View file

@ -30,7 +30,7 @@ import shutil # do not remove, used in imported file.py functions
import re # do not remove, used in imported file.py functions
import string # do not remove, used in imported file.py functions
import sys # do not remove, used in imported file.py functions
import fileinput # do not remove, used in imported file.py functions
import io # do not remove, used in imported file.py functions
import fnmatch # do not remove, used in imported file.py functions
import mmap # do not remove, used in imported file.py functions
import glob # do not remove, used in imported file.py functions

View file

@ -6,6 +6,7 @@ Functions for working with files
from __future__ import absolute_import, unicode_literals, print_function
# Import Python libs
import codecs
import contextlib
import errno
import logging
@ -616,31 +617,6 @@ def safe_filepath(file_path_name, dir_sep=None):
return path
def is_encoding(path, encoding="utf-8"):
'''
Detect if the file can be successfully decoded by the passed encoding
Args:
fp_ (pointer): A pointer to the file to check
encoding (str): The encoding to test
Return:
bool: True if successful, otherwise False
'''
if not os.path.isfile(path):
return False
try:
with fopen(path, 'rb') as fp_:
try:
data = fp_.read(2048)
data.decode(encoding)
except UnicodeDecodeError:
return True
except os.error:
return False
@jinja_filter('is_text_file')
def is_text(fp_, blocksize=512):
'''
@ -802,3 +778,102 @@ def backup_minion(path, bkroot):
if not salt.utils.platform.is_windows():
os.chown(bkpath, fstat.st_uid, fstat.st_gid)
os.chmod(bkpath, fstat.st_mode)
def get_encoding(path):
'''
Detect a file's encoding using the following:
- Check for ascii
- Check for Byte Order Marks (BOM)
- Check for UTF-8 Markers
- Check System Encoding
Args:
path (str): The path to the file to check
Returns:
str: The encoding of the file
Raises:
CommandExecutionError: If the encoding cannot be detected
'''
def check_ascii(_data):
# If all characters can be decoded to ASCII, then it's ASCII
try:
_data.decode('ASCII')
log.debug('Found ASCII')
except UnicodeDecodeError:
return False
else:
return True
def check_bom(_data):
# Supported Python Codecs
# https://docs.python.org/2/library/codecs.html
# https://docs.python.org/3/library/codecs.html
boms = [
('UTF-32-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_BE)),
('UTF-32-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_LE)),
('UTF-16-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_BE)),
('UTF-16-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_LE)),
('UTF-8', salt.utils.stringutils.to_bytes(codecs.BOM_UTF8)),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38\x2D')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x39')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2b')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2f')),
]
for _encoding, bom in boms:
if _data.startswith(bom):
log.debug('Found BOM for {0}'.format(_encoding))
return _encoding
return False
def check_utf8_markers(_data):
try:
decoded = _data.decode('UTF-8')
except UnicodeDecodeError:
return False
else:
# Reject surrogate characters in Py2 (Py3 behavior)
if six.PY2:
for char in decoded:
if 0xD800 <= ord(char) <= 0xDFFF:
return False
return True
def check_system_encoding(_data):
try:
_data.decode(__salt_system_encoding__)
except UnicodeDecodeError:
return False
else:
return True
if not os.path.isfile(path):
raise CommandExecutionError('Not a file')
try:
with fopen(path, 'rb') as fp_:
data = fp_.read(2048)
except os.error:
raise CommandExecutionError('Failed to open file')
# Check for ASCII first
if check_ascii(data):
return 'ASCII'
# Check for Unicode BOM
encoding = check_bom(data)
if encoding:
return encoding
# Check for UTF-8 markers
if check_utf8_markers(data):
return 'UTF-8'
# Check system encoding
if check_system_encoding(data):
return __salt_system_encoding__
raise CommandExecutionError('Could not detect file encoding')