extract_tar.py 2.39 KB
Newer Older
Julian Arni committed
1 2 3 4 5 6 7 8
"""
Safe version of tarfile.extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.

Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
import logging
9 10 11 12 13
from os.path import join as joinpath
from os.path import abspath, dirname, realpath

from django.conf import settings
from django.core.exceptions import SuspiciousOperation
Julian Arni committed
14

15 16
log = logging.getLogger(__name__)

Julian Arni committed
17 18 19 20 21 22 23

def resolved(rpath):
    """
    Returns the canonical absolute path of `rpath`.
    """
    return realpath(abspath(rpath))

24

Julian Arni committed
25 26 27 28 29 30
def _is_bad_path(path, base):
    """
    Is (the canonical absolute path of) `path` outside `base`?
    """
    return not resolved(joinpath(base, path)).startswith(base)

31

Julian Arni committed
32 33
def _is_bad_link(info, base):
    """
34
    Does the file sym- or hard-link to files outside `base`?
Julian Arni committed
35 36 37 38 39
    """
    # Links are interpreted relative to the directory containing the link
    tip = resolved(joinpath(base, dirname(info.name)))
    return _is_bad_path(info.linkname, base=tip)

40

41
def safemembers(members, base):
Julian Arni committed
42 43 44 45
    """
    Check that all elements of a tar file are safe.
    """

46 47 48 49 50
    base = resolved(base)

    # check that we're not trying to import outside of the data_dir
    if not base.startswith(resolved(settings.DATA_DIR)):
        raise SuspiciousOperation("Attempted to import course outside of data dir")
Julian Arni committed
51 52 53 54 55 56

    for finfo in members:
        if _is_bad_path(finfo.name, base):
            log.debug("File %r is blocked (illegal path)", finfo.name)
            raise SuspiciousOperation("Illegal path")
        elif finfo.issym() and _is_bad_link(finfo, base):
57
            log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
Julian Arni committed
58 59 60
            raise SuspiciousOperation("Hard link")
        elif finfo.islnk() and _is_bad_link(finfo, base):
            log.debug("File %r is blocked: Symlink to %r", finfo.name,
61
                      finfo.linkname)
Julian Arni committed
62 63 64
            raise SuspiciousOperation("Symlink")
        elif finfo.isdev():
            log.debug("File %r is blocked: FIFO, device or character file",
65
                      finfo.name)
Julian Arni committed
66 67 68 69
            raise SuspiciousOperation("Dev file")

    return members

70

Adam Palay committed
71
def safetar_extractall(tar_file, path=".", members=None):  # pylint: disable=unused-argument
Julian Arni committed
72
    """
73
    Safe version of `tar_file.extractall()`.
Julian Arni committed
74
    """
75
    return tar_file.extractall(path, safemembers(tar_file, path))