Source code for NIWLittleUtils.filesystem

import os
import sys
import re
import codecs
from unicodedata import normalize

try:
    from os import scandir
except ImportError:
    from scandir import scandir

_windows_device_files = ('CON', 'AUX', 'COM1', 'COM2', 'COM3', 'COM4', 'LPT1',
'LPT2', 'LPT3', 'PRN', 'NUL')

[docs]def secure_filename(filename, codec='utf8'): r"""Pass it a filename and it will return a secure version of it. This finction is a modified version of |werkzeug-secure_filename|_. .. |werkzeug-secure_filename| replace:: ``werkzeug.utils.secure_filename`` .. _werkzeug-secure_filename: http://werkzeug.pocoo.org/docs/0.9/utils/#werkzeug.utils.secure_filename The filename can then safely be stored on a regular file system and passed to :func:`os.path.join`. You can use parameter ``codec`` to specify the codec which is used to encode the filename. The ``codec`` could only be *utf8* or *ascii*. If you need high portability, you should let ``codec`` to be ``'ascii'``. It will be ``'utf8'`` by default. On windows systems the function also makes sure that the file is not named after one of the special device files. >>> secure_filename("My cool movie.mov") 'My_cool_movie.mov' >>> secure_filename("../../../etc/passwd") 'etc_passwd' >>> secure_filename('i contain cool \xfcml\xe4uts.txt') 'i_contain_cool_ümläuts.txt' >>> secure_filename('i contain cool \xfcml\xe4uts.txt', 'ascii') 'i_contain_cool_umlauts.txt' The function might return an empty filename. It's your responsibility to ensure that the filename is unique and that you generate random filename if the function returned an empty one. User should remember, this function will make sure filename is secure, but it cannot make sure file itself is secure. For example, if a user upload a file named ``test.sh``, this function will return same filename. If this file has security vulnerabilities script inside and you execute it... well, good luck. :param filename: the filename to secure """ codec = codecs.lookup(codec).name if codec not in ('utf-8', 'ascii'): raise ValueError('Argument ``codec`` should be *utf8* or *ascii*.') normal_form = 'NFC' if codec == 'utf-8' else 'NFKD' if isinstance(filename, str): filename, ext = os.path.splitext(filename) filename = normalize(normal_form, filename).encode(codec, 'ignore').decode(codec) ext = normalize('NFKD', ext).encode('ascii', 'ignore').decode('ascii') else: raise TypeError('Filename should be a instance of str.') for sep in os.path.sep, os.path.altsep: if sep: filename = filename.replace(sep, ' ') ext = ext.replace(sep, ' ') filename = '_'.join(filename.split()) + '_'.join(ext.split()) special_characters = re.compile(r'[\\/:,\[\]{}()!;*?\'"<>|]') filename = special_characters.sub('', filename).strip('._') if os.name == 'nt' and filename and filename.split('.')[0].upper() in _windows_device_files: filename = '_' + filename return filename
[docs]def scantree(path='.'): r"""Recursively yield DirEntry objects for given directory. Python 3.5 and higher version has :func:`os.scandir()` to get the files and folders and their file attribute information in folder given by *path*. But Python version won't scandir recursively, this function can scan folder recursively. Notice that this function will not follow symlinks to avoid symlink loops. :param path: Path to folder which you want to scan. """ for entry in scandir(path): yield entry if entry.is_dir(follow_symlinks=False): yield from scantree(entry.path)