Merge pull request #38832 from terminalmage/issue38711

archive.extracted: Identify symlinks when checking for incorrect types
2025-04-17 10:10:20 +00:00 · 2017-01-20 11:36:14 -07:00 · 2017-01-20 11:36:14 -07:00 · efe1bf10e8
commit efe1bf10e8
parent 10a3d8b8dd d10c068e25
4 changed files with 190 additions and 107 deletions
--- a/salt/modules/archive.py
+++ b/salt/modules/archive.py
@ -11,13 +11,21 @@ import logging
 import os
 import re
 import shlex
+import stat
 import tarfile
+import tempfile
 import zipfile
 try:
    from shlex import quote as _quote  # pylint: disable=E0611
 except ImportError:
    from pipes import quote as _quote

+try:
+    import rarfile
+    HAS_RARFILE = True
+except ImportError:
+    HAS_RARFILE = False
+
 # Import salt libs
 from salt.exceptions import SaltInvocationError, CommandExecutionError
 from salt.ext.six import string_types, integer_types
@ -45,6 +53,13 @@ def list_(name,
          saltenv='base'):
    '''
    .. versionadded:: 2016.11.0
+    .. versionchanged:: 2016.11.2
+        The rarfile_ Python module is now supported for listing the contents of
+        rar archives. This is necessary on minions with older releases of the
+        ``rar`` CLI tool, which do not support listing the contents in a
+        parsable format.
+
+    .. _rarfile: https://pypi.python.org/pypi/rarfile

    List the files and directories in an tar, zip, or rar archive.

@ -115,6 +130,10 @@ def list_(name,
        paths into separate keys containing the directory names, file names,
        and also directories/files present in the top level of the archive.

+        .. versionchanged:: 2016.11.2
+            This option now includes symlinks in their own list. Before, they
+            were included with files.
+
    saltenv : base
        Specifies the fileserver environment from which to retrieve
        ``archive``. This is only applicable when ``archive`` is a file from
@ -133,45 +152,64 @@ def list_(name,
            salt '*' archive.list https://domain.tld/myfile.zip
            salt '*' archive.list ftp://10.1.2.3/foo.rar
    '''
-    def _list_tar(name, cached, decompress_cmd):
+    def _list_tar(name, cached, decompress_cmd, failhard=False):
+        dirs = []
+        files = []
+        links = []
        try:
            with contextlib.closing(tarfile.open(cached)) as tar_archive:
-                return [
-                    x.name + '/' if x.isdir() else x.name
-                    for x in tar_archive.getmembers()
-                ]
-        except tarfile.ReadError:
-            if not salt.utils.which('tar'):
-                raise CommandExecutionError('\'tar\' command not available')
-            if decompress_cmd is not None:
-                # Guard against shell injection
-                try:
-                    decompress_cmd = ' '.join(
-                        [_quote(x) for x in shlex.split(decompress_cmd)]
-                    )
-                except AttributeError:
-                    raise CommandExecutionError('Invalid CLI options')
-            else:
-                if salt.utils.which('xz') \
-                        and __salt__['cmd.retcode'](['xz', '-l', cached],
-                                                    python_shell=False,
-                                                    ignore_retcode=True) == 0:
-                    decompress_cmd = 'xz --decompress --stdout'
+                for member in tar_archive.getmembers():
+                    if member.issym():
+                        links.append(member.name)
+                    elif member.isdir():
+                        dirs.append(member.name + '/')
+                    else:
+                        files.append(member.name)
+            return dirs, files, links

-            if decompress_cmd:
-                cmd = '{0} {1} | tar tf -'.format(decompress_cmd, _quote(cached))
-                result = __salt__['cmd.run_all'](cmd, python_shell=True)
-                if result['retcode'] != 0:
-                    raise CommandExecutionError(
-                        'Failed to decompress {0}'.format(name),
-                        info={'error': result['stderr']}
-                    )
-                ret = []
-                for line in salt.utils.itertools.split(result['stdout'], '\n'):
-                    line = line.strip()
-                    if line:
-                        ret.append(line)
-                return ret
+        except tarfile.ReadError:
+            if not failhard:
+                if not salt.utils.which('tar'):
+                    raise CommandExecutionError('\'tar\' command not available')
+                if decompress_cmd is not None:
+                    # Guard against shell injection
+                    try:
+                        decompress_cmd = ' '.join(
+                            [_quote(x) for x in shlex.split(decompress_cmd)]
+                        )
+                    except AttributeError:
+                        raise CommandExecutionError('Invalid CLI options')
+                else:
+                    if salt.utils.which('xz') \
+                            and __salt__['cmd.retcode'](['xz', '-l', cached],
+                                                        python_shell=False,
+                                                        ignore_retcode=True) == 0:
+                        decompress_cmd = 'xz --decompress --stdout'
+
+                if decompress_cmd:
+                    fd, decompressed = tempfile.mkstemp()
+                    os.close(fd)
+                    try:
+                        cmd = '{0} {1} > {2}'.format(decompress_cmd,
+                                                     _quote(cached),
+                                                     _quote(decompressed))
+                        result = __salt__['cmd.run_all'](cmd, python_shell=True)
+                        if result['retcode'] != 0:
+                            raise CommandExecutionError(
+                                'Failed to decompress {0}'.format(name),
+                                info={'error': result['stderr']}
+                            )
+                        return _list_tar(name, decompressed, None, True)
+                    finally:
+                        try:
+                            os.remove(decompressed)
+                        except OSError as exc:
+                            if exc.errno != errno.ENOENT:
+                                log.warning(
+                                    'Failed to remove intermediate '
+                                    'decompressed archive %s: %s',
+                                    decompressed, exc.__str__()
+                                )

        raise CommandExecutionError(
            'Unable to list contents of {0}. If this is an XZ-compressed tar '
@ -182,31 +220,64 @@ def list_(name,
        )

    def _list_zip(name, cached):
-        # Password-protected ZIP archives can still be listed by zipfile, so
-        # there is no reason to invoke the unzip command.
+        '''
+        Password-protected ZIP archives can still be listed by zipfile, so
+        there is no reason to invoke the unzip command.
+        '''
+        dirs = []
+        files = []
+        links = []
        try:
            with contextlib.closing(zipfile.ZipFile(cached)) as zip_archive:
-                return zip_archive.namelist()
+                for member in zip_archive.infolist():
+                    mode = member.external_attr >> 16
+                    path = member.filename
+                    if stat.S_ISLNK(mode):
+                        links.append(path)
+                    elif stat.S_ISDIR(mode):
+                        dirs.append(path)
+                    else:
+                        files.append(path)
+            return dirs, files, links
        except zipfile.BadZipfile:
            raise CommandExecutionError('{0} is not a ZIP file'.format(name))

    def _list_rar(name, cached):
-        if not salt.utils.which('rar'):
-            raise CommandExecutionError(
-                'rar command not available, is it installed?'
-            )
-        output = __salt__['cmd.run'](
-            ['rar', 'lt', path],
-            python_shell=False,
-            ignore_retcode=False)
-        matches = re.findall(r'Name:\s*([^\n]+)\s*Type:\s*([^\n]+)', output)
-        ret = [x + '/' if y == 'Directory' else x for x, y in matches]
-        if not ret:
-            raise CommandExecutionError(
-                'Failed to list {0}, is it a rar file?'.format(name),
-                info={'error': output}
-            )
-        return ret
+        dirs = []
+        files = []
+        if HAS_RARFILE:
+            with rarfile.RarFile(cached) as rf:
+                for member in rf.infolist():
+                    path = member.filename.replace('\\', '/')
+                    if member.isdir():
+                        dirs.append(path + '/')
+                    else:
+                        files.append(path)
+        else:
+            if not salt.utils.which('rar'):
+                raise CommandExecutionError(
+                    'rar command not available, is it installed?'
+                )
+            output = __salt__['cmd.run'](
+                ['rar', 'lt', name],
+                python_shell=False,
+                ignore_retcode=False)
+            matches = re.findall(r'Name:\s*([^\n]+)\s*Type:\s*([^\n]+)', output)
+            for path, type_ in matches:
+                if type_ == 'Directory':
+                    dirs.append(path + '/')
+                else:
+                    files.append(path)
+            if not dirs and not files:
+                raise CommandExecutionError(
+                    'Failed to list {0}, is it a rar file? If so, the '
+                    'installed version of rar may be too old to list data in '
+                    'a parsable format. Installing the rarfile Python module '
+                    'may be an easier workaround if newer rar is not readily '
+                    'available.'.format(name),
+                    info={'error': output}
+                )
+        return dirs, files, []

    cached = __salt__['cp.cache_file'](name, saltenv)
    if not cached:
@ -249,7 +320,7 @@ def list_(name,

        args = (options,) if archive_format == 'tar' else ()
        try:
-            ret = func(name, cached, *args)
+            dirs, files, links = func(name, cached, *args)
        except (IOError, OSError) as exc:
            raise CommandExecutionError(
                'Failed to list contents of {0}: {1}'.format(
@ -276,38 +347,33 @@ def list_(name,
                    )

        if strip_components:
-            stripped_ret = []
-            for item in ret:
-                try:
-                    # Strip off the specified number of directory boundaries,
-                    # and grab what comes after the last stripped path
-                    # separator.
-                    stripped_item = item.split(
+            for item in (dirs, files, links):
+                for index, path in enumerate(item):
+                    try:
+                        # Strip off the specified number of directory
+                        # boundaries, and grab what comes after the last
+                        # stripped path separator.
+                        item[index] = item[index].split(
                            os.sep, strip_components)[strip_components]
-                    if stripped_item:
-                        stripped_ret.append(stripped_item)
-                except IndexError:
-                    # Path is excluded by strip_components because it is not
-                    # deep enough.
-                    pass
-            ret = stripped_ret
+                    except IndexError:
+                        # Path is excluded by strip_components because it is not
+                        # deep enough. Set this to an empty string so it can
+                        # be removed in the generator expression below.
+                        item[index] = ''
+
+                # Remove all paths which were excluded
+                item[:] = (x for x in item if x)
+                item.sort()

        if verbose:
-            verbose_ret = {'dirs': [],
-                           'files': [],
-                           'top_level_dirs': [],
-                           'top_level_files': []}
-            for item in ret:
-                if item.endswith('/'):
-                    verbose_ret['dirs'].append(item)
-                    if item.count('/') == 1:
-                        verbose_ret['top_level_dirs'].append(item)
-                else:
-                    verbose_ret['files'].append(item)
-                    if item.count('/') == 0:
-                        verbose_ret['top_level_files'].append(item)
-            ret = verbose_ret
+            ret = {'dirs': dirs, 'files': files, 'links': links}
+            ret['top_level_dirs'] = [x for x in dirs if x.count('/') == 1]
+            ret['top_level_files'] = [x for x in files if x.count('/') == 0]
+            ret['top_level_links'] = [x for x in links if x.count('/') == 0]
+        else:
+            ret = sorted(dirs + files + links)
        return ret
+
    except CommandExecutionError as exc:
        # Reraise with cache path in the error so that the user can examine the
        # cached archive for troubleshooting purposes.
@ -923,7 +989,7 @@ def unzip(zip_file,
                    if salt.utils.is_windows() is False:
                        info = zfile.getinfo(target)
                        # Check if zipped file is a symbolic link
-                        if info.external_attr == 2716663808:
+                        if stat.S_ISLNK(info.external_attr >> 16):
                            source = zfile.read(target)
                            os.symlink(source, os.path.join(dest, target))
                            continue
--- a/salt/states/archive.py
+++ b/salt/states/archive.py
@ -1009,7 +1009,7 @@ def extracted(name,
                          ))
        return ret

-    # Check to see if we need to extract the archive. Using os.stat() in a
+    # Check to see if we need to extract the archive. Using os.lstat() in a
    # try/except is considerably faster than using os.path.exists(), and we
    # already need to catch an OSError to cover edge cases where the minion is
    # running as a non-privileged user and is trying to check for the existence
@ -1024,7 +1024,7 @@ def extracted(name,
    if not if_missing_path_exists:
        if contents is None:
            try:
-                os.stat(if_missing)
+                os.lstat(if_missing)
                extraction_needed = False
            except OSError as exc:
                if exc.errno == errno.ENOENT:
@ -1037,12 +1037,15 @@ def extracted(name,
                    return ret
        else:
            incorrect_type = []
-            for path_list, func in ((contents['dirs'], stat.S_ISDIR),
-                                    (contents['files'], stat.S_ISREG)):
+            for path_list, func in \
+                    ((contents['dirs'], stat.S_ISDIR),
+                     (contents['files'], lambda x: not stat.S_ISLNK(x)
+                                         and not stat.S_ISDIR(x)),
+                     (contents['links'], stat.S_ISLNK)):
                for path in path_list:
                    full_path = os.path.join(name, path)
                    try:
-                        path_mode = os.stat(full_path.rstrip(os.sep)).st_mode
+                        path_mode = os.lstat(full_path.rstrip(os.sep)).st_mode
                        if not func(path_mode):
                            incorrect_type.append(path)
                    except OSError as exc:
@ -1050,7 +1053,7 @@ def extracted(name,
                            extraction_needed = True
                        elif exc.errno != errno.ENOTDIR:
                            # In cases where a directory path was occupied by a
-                            # file instead, all os.stat() calls to files within
+                            # file instead, all os.lstat() calls to files within
                            # that dir will raise an ENOTDIR OSError. So we
                            # expect these and will only abort here if the
                            # error code is something else.
@ -1063,8 +1066,8 @@ def extracted(name,
                )
                ret['comment'] = (
                    'The below paths (relative to {0}) exist, but are the '
-                    'incorrect type (i.e. file instead of directory or '
-                    'vice-versa).'.format(name)
+                    'incorrect type (file instead of directory, symlink '
+                    'instead of file, etc.).'.format(name)
                )
                if __opts__['test'] and clean and contents is not None:
                    ret['result'] = None
@ -1081,7 +1084,7 @@ def extracted(name,
                if not (clean and contents is not None):
                    if not force:
                        ret['comment'] += (
-                            'To proceed with extraction, set \'force\' to '
+                            ' To proceed with extraction, set \'force\' to '
                            'True. Note that this will remove these paths '
                            'before extracting.{0}'.format(incorrect_paths)
                        )
@ -1094,6 +1097,7 @@ def extracted(name,
                                salt.utils.rm_rf(full_path.rstrip(os.sep))
                                ret['changes'].setdefault(
                                    'removed', []).append(full_path)
+                                extraction_needed = True
                            except OSError as exc:
                                if exc.errno != errno.ENOENT:
                                    errors.append(exc.__str__())
@ -1301,12 +1305,19 @@ def extracted(name,
    enforce_failed = []
    if user or group:
        if enforce_ownership_on:
-            enforce_dirs = [enforce_ownership_on]
-            enforce_files = []
+            if os.path.isdir(enforce_ownership_on):
+                enforce_dirs = [enforce_ownership_on]
+                enforce_files = []
+                enforce_links = []
+            else:
+                enforce_dirs = []
+                enforce_files = [enforce_ownership_on]
+                enforce_links = []
        else:
            if contents is not None:
                enforce_dirs = contents['top_level_dirs']
                enforce_files = contents['top_level_files']
+                enforce_links = contents['top_level_links']

        recurse = []
        if user:
@ -1359,14 +1370,14 @@ def extracted(name,
                            dir_result, dirname
                        )

-        for filename in enforce_files:
+        for filename in enforce_files + enforce_links:
            full_path = os.path.join(name, filename)
            try:
-                # Using os.stat instead of calling out to
+                # Using os.lstat instead of calling out to
                # __salt__['file.stats'], since we may be doing this for a lot
-                # of files, and simply calling os.stat directly will speed
+                # of files, and simply calling os.lstat directly will speed
                # things up a bit.
-                file_stat = os.stat(full_path)
+                file_stat = os.lstat(full_path)
            except OSError as exc:
                if not __opts__['test']:
                    if exc.errno == errno.ENOENT:
@ -1385,7 +1396,7 @@ def extracted(name,
                        ret['changes']['updated ownership'] = True
                    else:
                        try:
-                            os.chown(full_path, uid, gid)
+                            os.lchown(full_path, uid, gid)
                            ret['changes']['updated ownership'] = True
                        except OSError:
                            enforce_failed.append(filename)
--- a/salt/utils/init.py
+++ b/salt/utils/init.py
@ -1961,10 +1961,10 @@ def rm_rf(path):
            func(path)
        else:
            raise  # pylint: disable=E0704
-    if os.path.isdir(path):
-        shutil.rmtree(path, onerror=_onerror)
-    else:
+    if os.path.islink(path) or not os.path.isdir(path):
        os.remove(path)
+    else:
+        shutil.rmtree(path, onerror=_onerror)


 def option(value, default='', opts=None, pillar=None):
--- a/tests/unit/states/archive_test.py
+++ b/tests/unit/states/archive_test.py
@ -81,15 +81,17 @@ class ArchiveTestCase(TestCase):

        mock_true = MagicMock(return_value=True)
        mock_false = MagicMock(return_value=False)
-        ret = {'stdout': ['saltines', 'cheese'], 'stderr': 'biscuits', 'retcode': '31337', 'pid': '1337'}
+        ret = {'stdout': ['cheese', 'ham', 'saltines'], 'stderr': 'biscuits', 'retcode': '31337', 'pid': '1337'}
        mock_run = MagicMock(return_value=ret)
        mock_source_list = MagicMock(return_value=(source, None))
        state_single_mock = MagicMock(return_value={'local': {'result': True}})
        list_mock = MagicMock(return_value={
            'dirs': [],
-            'files': ['saltines', 'cheese'],
+            'files': ['cheese', 'saltines'],
+            'links': ['ham'],
            'top_level_dirs': [],
-            'top_level_files': ['saltines', 'cheese'],
+            'top_level_files': ['cheese', 'saltines'],
+            'top_level_links': ['ham'],
        })
        isfile_mock = MagicMock(side_effect=_isfile_side_effect)

@ -127,8 +129,10 @@ class ArchiveTestCase(TestCase):
        list_mock = MagicMock(return_value={
            'dirs': [],
            'files': ['stdout'],
+            'links': [],
            'top_level_dirs': [],
            'top_level_files': ['stdout'],
+            'top_level_links': [],
        })
        isfile_mock = MagicMock(side_effect=_isfile_side_effect)

@ -162,8 +166,10 @@ class ArchiveTestCase(TestCase):
        list_mock = MagicMock(return_value={
            'dirs': [],
            'files': ['stderr'],
+            'links': [],
            'top_level_dirs': [],
            'top_level_files': ['stderr'],
+            'top_level_links': [],
        })
        isfile_mock = MagicMock(side_effect=_isfile_side_effect)