Better unicode handling in gitfs (#35802)

* Revert "Don't use six.text_type() in salt.utils.gitfs"

This reverts commit d76659a63a.

* salt.utils.path_join: handle mixed unicode/binary-unicode paths

* Add test for mixed unicode and binary-unicode paths

* salt.utils.gitfs: replace os.path.join with salt.utils.path_join
This commit is contained in:
Erik Johnson 2016-08-26 11:44:22 -05:00 committed by Nicole Thomas
parent 0ee237a9cb
commit eb4d2f299b
3 changed files with 104 additions and 71 deletions

View file

@ -855,18 +855,39 @@ def path_join(*parts):
# Normalize path converting any os.sep as needed
parts = [os.path.normpath(p) for p in parts]
root = parts.pop(0)
try:
root = parts.pop(0)
except IndexError:
# No args passed to func
return ''
if not parts:
return root
ret = root
else:
if is_windows():
if len(root) == 1:
root += ':'
root = root.rstrip(os.sep) + os.sep
if is_windows():
if len(root) == 1:
root += ':'
root = root.rstrip(os.sep) + os.sep
return os.path.normpath(os.path.join(
root, *[p.lstrip(os.sep) for p in parts]
))
stripped = [p.lstrip(os.sep) for p in parts]
try:
ret = os.path.join(root, *stripped)
except UnicodeDecodeError:
# This is probably Python 2 and one of the parts contains unicode
# characters in a bytestring. First try to decode to the system
# encoding.
try:
enc = __salt_system_encoding__
except NameError:
enc = sys.stdin.encoding or sys.getdefaultencoding()
try:
ret = os.path.join(root.decode(enc),
*[x.decode(enc) for x in stripped])
except UnicodeDecodeError:
# Last resort, try UTF-8
ret = os.path.join(root.decode('UTF-8'),
*[x.decode('UTF-8') for x in stripped])
return os.path.normpath(ret)
def pem_finger(path=None, key=None, sum_type='sha256'):

View file

@ -153,7 +153,7 @@ class GitProvider(object):
self.id = next(iter(remote))
self.get_url()
per_remote_conf = dict(
[(key, str(val)) for key, val in
[(key, six.text_type(val)) for key, val in
six.iteritems(salt.utils.repack_dictlist(remote[self.id]))]
)
if not per_remote_conf:
@ -261,7 +261,7 @@ class GitProvider(object):
hash_type = getattr(hashlib, self.opts.get('hash_type', 'md5'))
self.hash = hash_type(self.id).hexdigest()
self.cachedir_basename = getattr(self, 'name', self.hash)
self.cachedir = os.path.join(cache_root, self.cachedir_basename)
self.cachedir = salt.utils.path_join(cache_root, self.cachedir_basename)
if not os.path.isdir(self.cachedir):
os.makedirs(self.cachedir)
@ -305,7 +305,7 @@ class GitProvider(object):
return ret
def _get_lock_file(self, lock_type='update'):
return os.path.join(self.gitdir, lock_type + '.lk')
return salt.utils.path_join(self.gitdir, lock_type + '.lk')
def check_root(self):
'''
@ -313,7 +313,7 @@ class GitProvider(object):
remote. Return the full path to that relative root if it does exist,
otherwise return None.
'''
root_dir = os.path.join(self.cachedir, self.root).rstrip(os.sep)
root_dir = salt.utils.path_join(self.cachedir, self.root).rstrip(os.sep)
if os.path.isdir(root_dir):
return root_dir
log.error(
@ -718,7 +718,7 @@ class GitPython(GitProvider):
log.error(_INVALID_REPO.format(self.cachedir, self.url, self.role))
return new
self.gitdir = os.path.join(self.repo.working_dir, '.git')
self.gitdir = salt.utils.path_join(self.repo.working_dir, '.git')
if not self.repo.remotes:
try:
@ -753,7 +753,7 @@ class GitPython(GitProvider):
relpath = lambda path: os.path.relpath(path, self.root)
else:
relpath = lambda path: path
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for blob in tree.traverse():
if isinstance(blob, git.Tree):
ret.add(add_mountpoint(relpath(blob.path)))
@ -829,7 +829,7 @@ class GitPython(GitProvider):
relpath = lambda path: os.path.relpath(path, self.root)
else:
relpath = lambda path: path
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for file_blob in tree.traverse():
if not isinstance(file_blob, git.Blob):
continue
@ -871,9 +871,7 @@ class GitPython(GitProvider):
stream.seek(0)
link_tgt = stream.read()
stream.close()
path = os.path.normpath(
os.path.join(os.path.dirname(path), link_tgt)
)
path = salt.utils.path_join(os.path.dirname(path), link_tgt)
else:
blob = file_blob
break
@ -1201,7 +1199,7 @@ class Pygit2(GitProvider):
log.error(_INVALID_REPO.format(self.cachedir, self.url, self.role))
return new
self.gitdir = os.path.join(self.repo.workdir, '.git')
self.gitdir = salt.utils.path_join(self.repo.workdir, '.git')
if not self.repo.remotes:
try:
@ -1242,9 +1240,9 @@ class Pygit2(GitProvider):
blob = self.repo[entry.oid]
if not isinstance(blob, pygit2.Tree):
continue
blobs.append(os.path.join(prefix, entry.name))
blobs.append(salt.utils.path_join(prefix, entry.name))
if len(blob):
_traverse(blob, blobs, os.path.join(prefix, entry.name))
_traverse(blob, blobs, salt.utils.path_join(prefix, entry.name))
ret = set()
tree = self.get_tree(tgt_env)
@ -1264,7 +1262,7 @@ class Pygit2(GitProvider):
blobs = []
if len(tree):
_traverse(tree, blobs, self.root)
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for blob in blobs:
ret.add(add_mountpoint(relpath(blob)))
if self.mountpoint:
@ -1352,13 +1350,13 @@ class Pygit2(GitProvider):
continue
obj = self.repo[entry.oid]
if isinstance(obj, pygit2.Blob):
repo_path = os.path.join(prefix, entry.name)
repo_path = salt.utils.path_join(prefix, entry.name)
blobs.setdefault('files', []).append(repo_path)
if stat.S_ISLNK(tree[entry.name].filemode):
link_tgt = self.repo[tree[entry.name].oid].data
blobs.setdefault('symlinks', {})[repo_path] = link_tgt
elif isinstance(obj, pygit2.Tree):
_traverse(obj, blobs, os.path.join(prefix, entry.name))
_traverse(obj, blobs, salt.utils.path_join(prefix, entry.name))
files = set()
symlinks = {}
@ -1382,7 +1380,7 @@ class Pygit2(GitProvider):
blobs = {}
if len(tree):
_traverse(tree, blobs, self.root)
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for repo_path in blobs.get('files', []):
files.add(add_mountpoint(relpath(repo_path)))
for repo_path, link_tgt in six.iteritems(blobs.get('symlinks', {})):
@ -1411,9 +1409,7 @@ class Pygit2(GitProvider):
# the symlink and set path to the location indicated
# in the blob data.
link_tgt = self.repo[tree[path].oid].data
path = os.path.normpath(
os.path.join(os.path.dirname(path), link_tgt)
)
path = salt.utils.path_join(os.path.dirname(path), link_tgt)
else:
oid = tree[path].oid
blob = self.repo[oid]
@ -1596,9 +1592,9 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
continue
if not isinstance(obj, dulwich.objects.Tree):
continue
blobs.append(os.path.join(prefix, item.path))
blobs.append(salt.utils.path_join(prefix, item.path))
if len(self.repo.get_object(item.sha)):
_traverse(obj, blobs, os.path.join(prefix, item.path))
_traverse(obj, blobs, salt.utils.path_join(prefix, item.path))
ret = set()
tree = self.get_tree(tgt_env)
@ -1612,7 +1608,7 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
relpath = lambda path: os.path.relpath(path, self.root)
else:
relpath = lambda path: path
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for blob in blobs:
ret.add(add_mountpoint(relpath(blob)))
if self.mountpoint:
@ -1713,14 +1709,14 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
# Entry is a submodule, skip it
continue
if isinstance(obj, dulwich.objects.Blob):
repo_path = os.path.join(prefix, item.path)
repo_path = salt.utils.path_join(prefix, item.path)
blobs.setdefault('files', []).append(repo_path)
mode, oid = tree[item.path]
if stat.S_ISLNK(mode):
link_tgt = self.repo.get_object(oid).as_raw_string()
blobs.setdefault('symlinks', {})[repo_path] = link_tgt
elif isinstance(obj, dulwich.objects.Tree):
_traverse(obj, blobs, os.path.join(prefix, item.path))
_traverse(obj, blobs, salt.utils.path_join(prefix, item.path))
files = set()
symlinks = {}
@ -1735,7 +1731,7 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
relpath = lambda path: os.path.relpath(path, self.root)
else:
relpath = lambda path: path
add_mountpoint = lambda path: os.path.join(self.mountpoint, path)
add_mountpoint = lambda path: salt.utils.path_join(self.mountpoint, path)
for repo_path in blobs.get('files', []):
files.add(add_mountpoint(relpath(repo_path)))
for repo_path, link_tgt in six.iteritems(blobs.get('symlinks', {})):
@ -1770,9 +1766,7 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
# symlink. Follow the symlink and set path to the
# location indicated in the blob data.
link_tgt = self.repo.get_object(oid).as_raw_string()
path = os.path.normpath(
os.path.join(os.path.dirname(path), link_tgt)
)
path = salt.utils.path_join(os.path.dirname(path), link_tgt)
else:
blob = self.repo.get_object(oid)
break
@ -1786,7 +1780,7 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
Returns a dulwich.config.ConfigFile object for the specified repo
'''
return dulwich.config.ConfigFile().from_path(
os.path.join(self.repo.controldir(), 'config')
salt.utils.path_join(self.repo.controldir(), 'config')
)
def get_remote_url(self, repo):
@ -1903,7 +1897,7 @@ class Dulwich(GitProvider): # pylint: disable=abstract-method
log.error(_INVALID_REPO.format(self.cachedir, self.url, self.role))
return new
self.gitdir = os.path.join(self.repo.path, '.git')
self.gitdir = salt.utils.path_join(self.repo.path, '.git')
# Read in config file and look for the remote
try:
@ -1968,11 +1962,11 @@ class GitBase(object):
if cache_root is not None:
self.cache_root = cache_root
else:
self.cache_root = os.path.join(self.opts['cachedir'], self.role)
self.env_cache = os.path.join(self.cache_root, 'envs.p')
self.hash_cachedir = os.path.join(
self.cache_root = salt.utils.path_join(self.opts['cachedir'], self.role)
self.env_cache = salt.utils.path_join(self.cache_root, 'envs.p')
self.hash_cachedir = salt.utils.path_join(
self.cache_root, 'hash')
self.file_list_cachedir = os.path.join(
self.file_list_cachedir = salt.utils.path_join(
self.opts['cachedir'], 'file_lists', self.role)
def init_remotes(self, remotes, per_remote_overrides):
@ -2016,7 +2010,7 @@ class GitBase(object):
'a bug, please report it.'.format(key)
)
failhard(self.role)
per_remote_defaults[param] = str(self.opts[key])
per_remote_defaults[param] = six.text_type(self.opts[key])
self.remotes = []
for remote in remotes:
@ -2077,7 +2071,7 @@ class GitBase(object):
for item in cachedir_ls:
if item in ('hash', 'refs'):
continue
path = os.path.join(self.cache_root, item)
path = salt.utils.path_join(self.cache_root, item)
if os.path.isdir(path):
to_remove.append(path)
failed = []
@ -2132,7 +2126,7 @@ class GitBase(object):
continue
except TypeError:
# remote was non-string, try again
if not fnmatch.fnmatch(repo.url, str(remote)):
if not fnmatch.fnmatch(repo.url, six.text_type(remote)):
continue
success, failed = repo.clear_lock(lock_type=lock_type)
cleared.extend(success)
@ -2177,7 +2171,7 @@ class GitBase(object):
continue
except TypeError:
# remote was non-string, try again
if not fnmatch.fnmatch(repo.url, str(remote)):
if not fnmatch.fnmatch(repo.url, six.text_type(remote)):
continue
success, failed = repo.lock()
locked.extend(success)
@ -2444,7 +2438,7 @@ class GitBase(object):
'''
Write the remote_map.txt
'''
remote_map = os.path.join(self.cache_root, 'remote_map.txt')
remote_map = salt.utils.path_join(self.cache_root, 'remote_map.txt')
try:
with salt.utils.fopen(remote_map, 'w+') as fp_:
timestamp = \
@ -2546,16 +2540,16 @@ class GitFS(GitBase):
(not salt.utils.is_hex(tgt_env) and tgt_env not in self.envs()):
return fnd
dest = os.path.join(self.cache_root, 'refs', tgt_env, path)
hashes_glob = os.path.join(self.hash_cachedir,
tgt_env,
'{0}.hash.*'.format(path))
blobshadest = os.path.join(self.hash_cachedir,
tgt_env,
'{0}.hash.blob_sha1'.format(path))
lk_fn = os.path.join(self.hash_cachedir,
tgt_env,
'{0}.lk'.format(path))
dest = salt.utils.path_join(self.cache_root, 'refs', tgt_env, path)
hashes_glob = salt.utils.path_join(self.hash_cachedir,
tgt_env,
'{0}.hash.*'.format(path))
blobshadest = salt.utils.path_join(self.hash_cachedir,
tgt_env,
'{0}.hash.blob_sha1'.format(path))
lk_fn = salt.utils.path_join(self.hash_cachedir,
tgt_env,
'{0}.lk'.format(path))
destdir = os.path.dirname(dest)
hashdir = os.path.dirname(blobshadest)
if not os.path.isdir(destdir):
@ -2579,7 +2573,7 @@ class GitFS(GitBase):
continue
repo_path = path[len(repo.mountpoint):].lstrip(os.path.sep)
if repo.root:
repo_path = os.path.join(repo.root, repo_path)
repo_path = salt.utils.path_join(repo.root, repo_path)
blob, blob_hexsha = repo.find_file(repo_path, tgt_env)
if blob is None:
@ -2669,10 +2663,10 @@ class GitFS(GitBase):
ret = {'hash_type': self.opts['hash_type']}
relpath = fnd['rel']
path = fnd['path']
hashdest = os.path.join(self.hash_cachedir,
load['saltenv'],
'{0}.hash.{1}'.format(relpath,
self.opts['hash_type']))
hashdest = salt.utils.path_join(self.hash_cachedir,
load['saltenv'],
'{0}.hash.{1}'.format(relpath,
self.opts['hash_type']))
if not os.path.isfile(hashdest):
if not os.path.exists(os.path.dirname(hashdest)):
os.makedirs(os.path.dirname(hashdest))
@ -2707,11 +2701,11 @@ class GitFS(GitBase):
)
)
return []
list_cache = os.path.join(
list_cache = salt.utils.path_join(
self.file_list_cachedir,
'{0}.p'.format(load['saltenv'].replace(os.path.sep, '_|-'))
)
w_lock = os.path.join(
w_lock = salt.utils.path_join(
self.file_list_cachedir,
'.{0}.w'.format(load['saltenv'].replace(os.path.sep, '_|-'))
)

View file

@ -17,7 +17,8 @@ import platform
import tempfile
# Import Salt Testing libs
from salttesting import TestCase
import salt.utils
from salttesting import TestCase, skipIf
from salttesting.helpers import ensure_in_syspath
ensure_in_syspath('../../')
@ -30,9 +31,6 @@ import salt.ext.six as six
class PathJoinTestCase(TestCase):
def setUp(self):
self.skipTest('Skipped until properly mocked')
PLATFORM_FUNC = platform.system
BUILTIN_MODULES = sys.builtin_module_names
@ -53,6 +51,7 @@ class PathJoinTestCase(TestCase):
(('c', r'\temp', r'\foo\bar'), 'c:\\temp\\foo\\bar')
)
@skipIf(True, 'Skipped until properly mocked')
def test_nix_paths(self):
if platform.system().lower() == "windows":
self.skipTest(
@ -65,6 +64,7 @@ class PathJoinTestCase(TestCase):
'{0}: {1}'.format(idx, expected)
)
@skipIf(True, 'Skipped until properly mocked')
def test_windows_paths(self):
if platform.system().lower() != "windows":
self.skipTest(
@ -79,6 +79,7 @@ class PathJoinTestCase(TestCase):
'{0}: {1}'.format(idx, expected)
)
@skipIf(True, 'Skipped until properly mocked')
def test_windows_paths_patched_path_module(self):
if platform.system().lower() == "windows":
self.skipTest(
@ -97,6 +98,23 @@ class PathJoinTestCase(TestCase):
self.__unpatch_path()
@skipIf(salt.utils.is_windows(), '*nix-only test')
def test_mixed_unicode_and_binary(self):
'''
This tests joining paths that contain a mix of components with unicode
strings and non-unicode strings with the unicode characters as binary.
This is no longer something we need to concern ourselves with in
Python 3, but the test should nonetheless pass on Python 3. Really what
we're testing here is that we don't get a UnicodeDecodeError when
running on Python 2.
'''
a = u'/foo/bar'
b = 'Д'
expected = u'/foo/bar/\u0414'
actual = path_join(a, b)
self.assertEqual(actual, expected)
def __patch_path(self):
import imp
modules = list(self.BUILTIN_MODULES[:])