Rewrite file.extract_hash

This rewrite makes it possible for file.extract_hash to match on more
than just the filename being managed, also matching on the source URL as
well as a new user-specified name.

It also makes this function more efficient, as only a single pass is
performed when the hash_type is unspecified. Before, it would read the
entire file for each hash type.

Finally, it adds the ``source_hash_name`` argument to
``file.get_managed``, so that it can be passed to this function by
``file.managed`` states.
This commit is contained in:
Erik Johnson 2016-11-04 09:48:10 -05:00
parent 580eca709b
commit 52fe72d402

View file

@ -24,6 +24,7 @@ import os
import re
import shutil
import stat
import string
import sys
import tempfile
import time
@ -61,14 +62,14 @@ __func_alias__ = {
'makedirs_': 'makedirs'
}
HASHES = [
['sha512', 128],
['sha384', 96],
['sha256', 64],
['sha224', 56],
['sha1', 40],
['md5', 32],
]
HASHES = {
'sha512': 128,
'sha384': 96,
'sha256': 64,
'sha224': 56,
'sha1': 40,
'md5': 32,
}
def __virtual__():
@ -3467,13 +3468,14 @@ def get_managed(
template,
source,
source_hash,
source_hash_name,
user,
group,
mode,
saltenv,
context,
defaults,
skip_verify,
skip_verify=False,
**kwargs):
'''
Return the managed file data for file.managed
@ -3490,20 +3492,26 @@ def get_managed(
source_hash
hash of the source file
source_hash_name
When ``source_hash`` refers to a remote file, this specifies the
filename to look for in that file.
.. versionadded:: 2016.3.5
user
user owner
Owner of file
group
group owner
Group owner of file
mode
file mode
Permissions of file
context
variables to add to the environment
Variables to add to the template context
defaults
default values of for context_dict
Default values of for context_dict
skip_verify
If ``True``, hash verification of remote file sources (``http://``,
@ -3516,7 +3524,7 @@ def get_managed(
.. code-block:: bash
salt '*' file.get_managed /etc/httpd/conf.d/httpd.conf jinja salt://http/httpd.conf '{hash_type: 'md5', 'hsum': <md5sum>}' root root '755' base None None
salt '*' file.get_managed /etc/httpd/conf.d/httpd.conf jinja salt://http/httpd.conf '{hash_type: 'md5', 'hsum': <md5sum>}' None root root '755' base None None
'''
# Copy the file to the minion and templatize it
sfn = ''
@ -3530,7 +3538,6 @@ def get_managed(
'''
return {'hsum': get_hash(path, form='sha256'), 'hash_type': 'sha256'}
source_hash_name = kwargs.pop('source_hash_name', None)
# If we have a source defined, let's figure out what the hash is
if source:
urlparsed_source = _urlparse(source)
@ -3572,7 +3579,11 @@ def get_managed(
return '', {}, ('Source hash file {0} not found'
.format(source_hash))
source_sum = extract_hash(
hash_fn, '', source_hash_name or name)
hash_fn,
'',
name,
source,
source_hash_name)
if source_sum is None:
return _invalid_source_hash_format()
@ -3654,8 +3665,27 @@ def get_managed(
return sfn, source_sum, ''
def extract_hash(hash_fn, hash_type='sha256', file_name=''):
def extract_hash(hash_fn,
hash_type='sha256',
file_name='',
source='',
source_hash_name=None):
'''
.. versionchanged:: 2016.3.5
Prior to this version, only the ``file_name`` argument was considered
for filename matches in the hash file. This would be problematic for
cases in which the user was relying on a remote checksum file that they
do not control, and they wished to use a different name for that file
on the minion from the filename on the remote server (and in the
checksum file). For example, managing ``/tmp/myfile.tar.gz`` when the
remote file was at ``https://mydomain.tld/different_name.tar.gz``. The
:py:func:`file.managed <salt.states.file.managed>` state now also
passes this function the source URI as well as the ``source_hash_name``
(if specified). In cases where ``source_hash_name`` is specified, it
takes precedence over both the ``file_name`` and ``source``. When it is
not specified, ``file_name`` takes precedence over ``source``. This
allows for better capability for matching hashes.
This routine is called from the :mod:`file.managed
<salt.states.file.managed>` state to pull a hash from a remote file.
Regular expressions are used line by line on the ``source_hash`` file, to
@ -3677,49 +3707,183 @@ def extract_hash(hash_fn, hash_type='sha256', file_name=''):
.. code-block:: bash
salt '*' file.extract_hash /etc/foo sha512 /path/to/hash/file
salt '*' file.extract_hash /path/to/hash/file sha512 /etc/foo
'''
source_sum = None
partial_id = False
name_sought = os.path.basename(file_name)
log.debug('modules.file.py - extract_hash(): Extracting hash for file '
'named: {0}'.format(name_sought))
with salt.utils.fopen(hash_fn, 'r') as hash_fn_fopen:
for hash_variant in HASHES:
if hash_type == '' or hash_type == hash_variant[0]:
log.debug('modules.file.py - extract_hash(): Will use regex to get'
' a purely hexadecimal number of length ({0}), presumably hash'
' type : {1}'.format(hash_variant[1], hash_variant[0]))
hash_fn_fopen.seek(0)
for line in hash_fn_fopen.read().splitlines():
hash_array = re.findall(r'(?i)(?<![a-z0-9])[a-f0-9]{' + str(hash_variant[1]) + '}(?![a-z0-9])', line)
log.debug('modules.file.py - extract_hash(): From "line": {0} '
'got : {1}'.format(line, hash_array))
if hash_array:
if not partial_id:
source_sum = {'hsum': hash_array[0], 'hash_type': hash_variant[0]}
partial_id = True
log.debug('modules.file.py - extract_hash(): Found: {0} '
'-- {1}'.format(source_sum['hash_type'],
source_sum['hsum']))
if re.search(name_sought, line):
source_sum = {'hsum': hash_array[0], 'hash_type': hash_variant[0]}
log.debug('modules.file.py - extract_hash: For {0} -- '
'returning the {1} hash "{2}".'.format(
name_sought,
source_sum['hash_type'],
source_sum['hsum']))
return source_sum
if partial_id:
log.debug('modules.file.py - extract_hash: Returning the partially '
'identified {0} hash "{1}".'.format(
source_sum['hash_type'], source_sum['hsum']))
hash_len = HASHES.get(hash_type)
if hash_len is None:
if hash_type:
log.warning(
'file.extract_hash: Unsupported hash_type \'%s\', falling '
'back to matching any supported hash_type', hash_type
)
hash_type = ''
hash_len_expr = '{0},{1}'.format(min(six.itervalues(HASHES)),
max(six.itervalues(HASHES)))
else:
log.debug('modules.file.py - extract_hash: Returning None.')
return source_sum
hash_len_expr = str(hash_len)
filename_separators = string.whitespace + r'\/'
if source_hash_name is not None:
#if not isinstance(source_hash_name, six.string_types):
# source_hash_name = str(source_hash_name)
if not isinstance(source_hash_name, six.string_types):
source_hash_name = str(source_hash_name)
source_hash_name_idx = (len(source_hash_name) + 1) * -1
log.debug(
'file.extract_hash: Extracting %s hash for file matching '
'source_hash_name \'%s\'',
'any supported' if not hash_type else hash_type,
source_hash_name
)
else:
if not isinstance(file_name, six.string_types):
file_name = str(file_name)
if not isinstance(source, six.string_types):
source = str(source)
urlparsed_source = _urlparse(source)
source_basename = os.path.basename(
urlparsed_source.path or urlparsed_source.netloc
)
source_idx = (len(source_basename) + 1) * -1
file_name_basename = os.path.basename(file_name)
file_name_idx = (len(file_name_basename) + 1) * -1
searches = [x for x in (file_name, source) if x]
if searches:
log.debug(
'file.extract_hash: Extracting %s hash for file matching%s: %s',
'any supported' if not hash_type else hash_type,
'' if len(searches) == 1 else ' either of the following',
', '.join(searches)
)
partial = None
found = {}
hashes_revmap = dict([(y, x) for x, y in six.iteritems(HASHES)])
with salt.utils.fopen(hash_fn, 'r') as fp_:
for line in fp_:
line = line.strip()
hash_re = r'(?i)(?<![a-z0-9])([a-f0-9]{' + hash_len_expr + '})(?![a-z0-9])'
hash_match = re.search(hash_re, line)
matched = None
if hash_match:
matched_hsum = hash_match.group(1)
if matched_hsum is not None:
matched_type = hashes_revmap.get(len(matched_hsum))
if matched_type is None:
# There was a match, but it's not of the correct length
# to match one of the supported hash types.
matched = None
else:
matched = {'hsum': matched_hsum,
'hash_type': matched_type}
if matched is None:
log.debug(
'file.extract_hash: In line \'%s\', no %shash found',
line,
'' if not hash_type else hash_type + ' '
)
continue
if partial is None:
partial = matched
def _add_to_matches(found, line, match_type, value, matched):
log.debug(
'file.extract_hash: Line \'%s\' matches %s \'%s\'',
line, match_type, value
)
found.setdefault(match_type, []).append(matched)
hash_matched = False
if source_hash_name is not None:
if line.endswith(source_hash_name):
# Checking the character before where the basename
# should start for either whitespace or a path
# separator. We can't just rsplit on spaces/whitespace,
# because the filename may contain spaces.
try:
if line[source_hash_name_idx] in string.whitespace:
_add_to_matches(found, line, 'source_hash_name',
source_hash_name, matched)
hash_matched = True
except IndexError:
pass
elif re.match(source_hash_name.replace('.', r'\.') + r'\s+',
line):
_add_to_matches(found, line, 'source_hash_name',
source_hash_name, matched)
hash_matched = True
else:
if file_name:
if line.endswith(file_name_basename):
# Checking the character before where the basename
# should start for either whitespace or a path
# separator. We can't just rsplit on spaces/whitespace,
# because the filename may contain spaces.
try:
if line[file_name_idx] in filename_separators:
_add_to_matches(found, line, 'file_name',
file_name, matched)
hash_matched = True
except IndexError:
pass
elif re.match(file_name.replace('.', r'\.') + r'\s+', line):
_add_to_matches(found, line, 'file_name',
file_name, matched)
hash_matched = True
if source:
if line.endswith(source_basename):
# Same as above, we can't just do an rsplit here.
try:
if line[source_idx] in filename_separators:
_add_to_matches(found, line, 'source',
source, matched)
hash_matched = True
except IndexError:
pass
elif re.match(source.replace('.', r'\.') + r'\s+', line):
_add_to_matches(found, line, 'source', source, matched)
hash_matched = True
if not hash_matched:
log.debug(
'file.extract_hash: Line \'%s\' contains %s hash '
'\'%s\', but line did not meet the search criteria',
line, matched['hash_type'], matched['hsum']
)
for found_type, found_str in (('source_hash_name', source_hash_name),
('file_name', file_name),
('source', source)):
if found_type in found:
if len(found[found_type]) > 1:
log.debug(
'file.extract_hash: Multiple matches for %s: %s',
found_str,
', '.join(
['{0} ({1})'.format(x['hsum'], x['hash_type'])
for x in found[found_type]]
)
)
ret = found[found_type][0]
log.debug(
'file.extract_hash: Returning %s hash \'%s\' as a match of %s',
ret['hash_type'], ret['hsum'], found_str
)
return ret
if partial:
log.debug(
'file.extract_hash: Returning the partially identified %s hash '
'\'%s\'', partial['hash_type'], partial['hsum']
)
return partial
log.debug('file.extract_hash: No matches, returning None')
return None
def check_perms(name, ret, user, group, mode, follow_symlinks=False):