import os
import sys
import re
import codecs
from unicodedata import normalize
try:
from os import scandir
except ImportError:
from scandir import scandir
_windows_device_files = ('CON', 'AUX', 'COM1', 'COM2', 'COM3', 'COM4', 'LPT1',
'LPT2', 'LPT3', 'PRN', 'NUL')
[docs]def secure_filename(filename, codec='utf8'):
r"""Pass it a filename and it will return a secure version of it.
This finction is a modified version of |werkzeug-secure_filename|_.
.. |werkzeug-secure_filename| replace:: ``werkzeug.utils.secure_filename``
.. _werkzeug-secure_filename: http://werkzeug.pocoo.org/docs/0.9/utils/#werkzeug.utils.secure_filename
The filename can then safely be stored on a regular file system and passed
to :func:`os.path.join`.
You can use parameter ``codec`` to specify the codec which is used
to encode the filename. The ``codec`` could only be *utf8* or *ascii*.
If you need high portability, you should let ``codec`` to be ``'ascii'``.
It will be ``'utf8'`` by default.
On windows systems the function also makes sure that the file is not
named after one of the special device files.
>>> secure_filename("My cool movie.mov")
'My_cool_movie.mov'
>>> secure_filename("../../../etc/passwd")
'etc_passwd'
>>> secure_filename('i contain cool \xfcml\xe4uts.txt')
'i_contain_cool_ümläuts.txt'
>>> secure_filename('i contain cool \xfcml\xe4uts.txt', 'ascii')
'i_contain_cool_umlauts.txt'
The function might return an empty filename. It's your responsibility
to ensure that the filename is unique and that you generate random
filename if the function returned an empty one.
User should remember, this function will make sure filename is secure,
but it cannot make sure file itself is secure. For example, if a user
upload a file named ``test.sh``, this function will return same filename.
If this file has security vulnerabilities script inside and you execute
it... well, good luck.
:param filename: the filename to secure
"""
codec = codecs.lookup(codec).name
if codec not in ('utf-8', 'ascii'):
raise ValueError('Argument ``codec`` should be *utf8* or *ascii*.')
normal_form = 'NFC' if codec == 'utf-8' else 'NFKD'
if isinstance(filename, str):
filename, ext = os.path.splitext(filename)
filename = normalize(normal_form, filename).encode(codec, 'ignore').decode(codec)
ext = normalize('NFKD', ext).encode('ascii', 'ignore').decode('ascii')
else:
raise TypeError('Filename should be a instance of str.')
for sep in os.path.sep, os.path.altsep:
if sep:
filename = filename.replace(sep, ' ')
ext = ext.replace(sep, ' ')
filename = '_'.join(filename.split()) + '_'.join(ext.split())
special_characters = re.compile(r'[\\/:,\[\]{}()!;*?\'"<>|]')
filename = special_characters.sub('', filename).strip('._')
if os.name == 'nt' and filename and filename.split('.')[0].upper() in _windows_device_files:
filename = '_' + filename
return filename
[docs]def scantree(path='.'):
r"""Recursively yield DirEntry objects for given directory.
Python 3.5 and higher version has :func:`os.scandir()` to get
the files and folders and their file attribute information
in folder given by *path*. But Python version won't scandir
recursively, this function can scan folder recursively.
Notice that this function will not follow symlinks to avoid
symlink loops.
:param path: Path to folder which you want to scan.
"""
for entry in scandir(path):
yield entry
if entry.is_dir(follow_symlinks=False):
yield from scantree(entry.path)