Fix decoding of broken string from remote sources

Fixes #38070
Suggest to replace incorrect UTF symbols with the special replacement character.
This would simplily interation with broken things in the real world.
https://docs.python.org/3/howto/unicode.html#the-unicode-type
This commit is contained in:
Alexey Aksenov 2016-12-05 18:24:32 +03:00
parent 0af343e71f
commit 70c8db5489
2 changed files with 7 additions and 1 deletions

View file

@ -2977,7 +2977,9 @@ def to_str(s, encoding=None):
return s
if six.PY3:
if isinstance(s, (bytes, bytearray)):
return s.decode(encoding or __salt_system_encoding__)
# https://docs.python.org/3/howto/unicode.html#the-unicode-type
# replace error with U+FFFD, REPLACEMENT CHARACTER
return s.decode(encoding or __salt_system_encoding__, "replace")
raise TypeError('expected str, bytes, or bytearray')
else:
if isinstance(s, bytearray):

View file

@ -768,6 +768,10 @@ class UtilsTestCase(TestCase):
ut = bytes((0xe4, 0xb8, 0xad, 0xe5, 0x9b, 0xbd, 0xe8, 0xaa, 0x9e, 0x20, 0x28, 0xe7, 0xb9, 0x81, 0xe4, 0xbd, 0x93, 0x29))
self.assertEqual(utils.to_str(ut, 'utf-8'), un)
self.assertEqual(utils.to_str(bytearray(ut), 'utf-8'), un)
# Test situation when a minion returns incorrect utf-8 string because of... million reasons
ut2 = b'\x9c'
self.assertEqual(utils.to_str(ut2, 'utf-8'), u'\ufffd')
self.assertEqual(utils.to_str(bytearray(ut2), 'utf-8'), u'\ufffd')
else:
self.assertEqual(utils.to_str('plugh'), 'plugh')
self.assertEqual(utils.to_str(u'áéíóúý', 'utf-8'), 'áéíóúý')