Commit 602eb876 by jkarni

Merge pull request #1227 from edx/jkarni/hotfix/safetar

Use wrapped tar extraction
parents cbebb42c dadc766f
......@@ -6,6 +6,7 @@ import shutil
import tarfile
import tempfile
import copy
from path import path
from uuid import uuid4
from pymongo import MongoClient
......@@ -19,6 +20,7 @@ from xmodule.contentstore.django import _CONTENTSTORE
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['OPTIONS']['db'] = 'test_xcontent_%s' % uuid4().hex
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ImportTestCase(CourseTestCase):
"""
......@@ -32,7 +34,7 @@ class ImportTestCase(CourseTestCase):
'course': self.course.location.course,
'name': self.course.location.name,
})
self.content_dir = tempfile.mkdtemp()
self.content_dir = path(tempfile.mkdtemp())
def touch(name):
""" Equivalent to shell's 'touch'"""
......@@ -60,11 +62,15 @@ class ImportTestCase(CourseTestCase):
with tarfile.open(self.bad_tar, "w:gz") as btar:
btar.add(bad_dir)
self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
def tearDown(self):
shutil.rmtree(self.content_dir)
MongoClient().drop_database(TEST_DATA_CONTENTSTORE['OPTIONS']['db'])
_CONTENTSTORE.clear()
def test_no_coursexml(self):
"""
Check that the response for a tar.gz import without a course.xml is
......@@ -92,3 +98,88 @@ class ImportTestCase(CourseTestCase):
"course-data": [gtar]
})
self.assertEquals(resp.status_code, 200)
## Unsafe tar methods #####################################################
# Each of these methods creates a tarfile with a single type of unsafe
# content.
def _fifo_tar(self):
"""
Tar file with FIFO
"""
fifop = self.unsafe_common_dir / "fifo.file"
fifo_tar = self.unsafe_common_dir / "fifo.tar.gz"
os.mkfifo(fifop)
with tarfile.open(fifo_tar, "w:gz") as tar:
tar.add(fifop)
return fifo_tar
def _symlink_tar(self):
"""
Tarfile with symlink to path outside directory.
"""
outsidep = self.unsafe_common_dir / "unsafe_file.txt"
symlinkp = self.unsafe_common_dir / "symlink.txt"
symlink_tar = self.unsafe_common_dir / "symlink.tar.gz"
outsidep.symlink(symlinkp)
with tarfile.open(symlink_tar, "w:gz" ) as tar:
tar.add(symlinkp)
return symlink_tar
def _outside_tar(self):
"""
Tarfile with file that extracts to outside directory.
Extracting this tarfile in directory <dir> will put its contents
directly in <dir> (rather than <dir/tarname>).
"""
outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
with tarfile.open(outside_tar, "w:gz") as tar:
tar.addfile(tarfile.TarInfo(str(self.content_dir / "a_file")))
return outside_tar
def _outside_tar2(self):
"""
Tarfile with file that extracts to outside directory.
The path here matches the basename (`self.unsafe_common_dir`), but
then "cd's out". E.g. "/usr/../etc" == "/etc", but the naive basename
of the first (but not the second) is "/usr"
Extracting this tarfile in directory <dir> will also put its contents
directly in <dir> (rather than <dir/tarname>).
"""
outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
with tarfile.open(outside_tar, "w:gz") as tar:
tar.addfile(tarfile.TarInfo(str(self.unsafe_common_dir / "../a_file")))
return outside_tar
def test_unsafe_tar(self):
"""
Check that safety measure work.
This includes:
'tarbombs' which include files or symlinks with paths
outside or directly in the working directory,
'special files' (character device, block device or FIFOs),
all raise exceptions/400s.
"""
def try_tar(tarpath):
with open(tarpath) as tar:
resp = self.client.post(
self.url,
{ "name": tarpath, "course-data": [tar] }
)
self.assertEquals(resp.status_code, 400)
self.assertTrue("SuspiciousFileOperation" in resp.content)
try_tar(self._fifo_tar())
try_tar(self._symlink_tar())
try_tar(self._outside_tar())
try_tar(self._outside_tar2())
......@@ -19,6 +19,7 @@ from django.core.urlresolvers import reverse
from django.core.servers.basehttp import FileWrapper
from django.core.files.temp import NamedTemporaryFile
from django.views.decorators.http import require_http_methods
from django.core.exceptions import SuspiciousOperation
from mitxmako.shortcuts import render_to_response
from auth.authz import create_all_course_groups
......@@ -32,6 +33,7 @@ from xmodule.exceptions import SerializationError
from .access import get_location_and_verify_access
from util.json_request import JsonResponse
from extract_tar import safetar_extractall
__all__ = ['import_course', 'generate_export_course', 'export_course']
......@@ -154,7 +156,16 @@ def import_course(request, org, course, name):
sf.write("Extracting")
tar_file = tarfile.open(temp_filepath)
tar_file.extractall((course_dir + '/').encode('utf-8'))
try:
safetar_extractall(tar_file, (course_dir + '/').encode('utf-8'))
except SuspiciousOperation as exc:
return JsonResponse(
{
'ErrMsg': 'Unsafe tar file. Aborting import.',
'SuspiciousFileOperationMsg': exc.args[0]
},
status=400
)
with open(status_file, 'w+') as sf:
sf.write("Verifying")
......
"""
Safe version of tarfile.extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.
Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
from os.path import abspath, realpath, dirname, join as joinpath
from django.core.exceptions import SuspiciousOperation
import logging
log = logging.getLogger(__name__) #pylint: disable=C0103
def resolved(rpath):
"""
Returns the canonical absolute path of `rpath`.
"""
return realpath(abspath(rpath))
def _is_bad_path(path, base):
"""
Is (the canonical absolute path of) `path` outside `base`?
"""
return not resolved(joinpath(base, path)).startswith(base)
def _is_bad_link(info, base):
"""
Does the file sym- ord hard-link to files outside `base`?
"""
# Links are interpreted relative to the directory containing the link
tip = resolved(joinpath(base, dirname(info.name)))
return _is_bad_path(info.linkname, base=tip)
def safemembers(members):
"""
Check that all elements of a tar file are safe.
"""
base = resolved(".")
for finfo in members:
if _is_bad_path(finfo.name, base):
log.debug("File %r is blocked (illegal path)", finfo.name)
raise SuspiciousOperation("Illegal path")
elif finfo.issym() and _is_bad_link(finfo, base):
log.debug( "File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Hard link")
elif finfo.islnk() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Symlink to %r", finfo.name,
finfo.linkname)
raise SuspiciousOperation("Symlink")
elif finfo.isdev():
log.debug("File %r is blocked: FIFO, device or character file",
finfo.name)
raise SuspiciousOperation("Dev file")
return members
def safetar_extractall(tarf, *args, **kwargs):
"""
Safe version of `tarf.extractall()`.
"""
return tarf.extractall(members=safemembers(tarf), *args, **kwargs)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment