Commit 602eb876 by jkarni

Merge pull request #1227 from edx/jkarni/hotfix/safetar

Use wrapped tar extraction
parents cbebb42c dadc766f
...@@ -6,6 +6,7 @@ import shutil ...@@ -6,6 +6,7 @@ import shutil
import tarfile import tarfile
import tempfile import tempfile
import copy import copy
from path import path
from uuid import uuid4 from uuid import uuid4
from pymongo import MongoClient from pymongo import MongoClient
...@@ -19,6 +20,7 @@ from xmodule.contentstore.django import _CONTENTSTORE ...@@ -19,6 +20,7 @@ from xmodule.contentstore.django import _CONTENTSTORE
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['OPTIONS']['db'] = 'test_xcontent_%s' % uuid4().hex TEST_DATA_CONTENTSTORE['OPTIONS']['db'] = 'test_xcontent_%s' % uuid4().hex
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ImportTestCase(CourseTestCase): class ImportTestCase(CourseTestCase):
""" """
...@@ -32,7 +34,7 @@ class ImportTestCase(CourseTestCase): ...@@ -32,7 +34,7 @@ class ImportTestCase(CourseTestCase):
'course': self.course.location.course, 'course': self.course.location.course,
'name': self.course.location.name, 'name': self.course.location.name,
}) })
self.content_dir = tempfile.mkdtemp() self.content_dir = path(tempfile.mkdtemp())
def touch(name): def touch(name):
""" Equivalent to shell's 'touch'""" """ Equivalent to shell's 'touch'"""
...@@ -60,11 +62,15 @@ class ImportTestCase(CourseTestCase): ...@@ -60,11 +62,15 @@ class ImportTestCase(CourseTestCase):
with tarfile.open(self.bad_tar, "w:gz") as btar: with tarfile.open(self.bad_tar, "w:gz") as btar:
btar.add(bad_dir) btar.add(bad_dir)
self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
def tearDown(self): def tearDown(self):
shutil.rmtree(self.content_dir) shutil.rmtree(self.content_dir)
MongoClient().drop_database(TEST_DATA_CONTENTSTORE['OPTIONS']['db']) MongoClient().drop_database(TEST_DATA_CONTENTSTORE['OPTIONS']['db'])
_CONTENTSTORE.clear() _CONTENTSTORE.clear()
def test_no_coursexml(self): def test_no_coursexml(self):
""" """
Check that the response for a tar.gz import without a course.xml is Check that the response for a tar.gz import without a course.xml is
...@@ -92,3 +98,88 @@ class ImportTestCase(CourseTestCase): ...@@ -92,3 +98,88 @@ class ImportTestCase(CourseTestCase):
"course-data": [gtar] "course-data": [gtar]
}) })
self.assertEquals(resp.status_code, 200) self.assertEquals(resp.status_code, 200)
## Unsafe tar methods #####################################################
# Each of these methods creates a tarfile with a single type of unsafe
# content.
def _fifo_tar(self):
"""
Tar file with FIFO
"""
fifop = self.unsafe_common_dir / "fifo.file"
fifo_tar = self.unsafe_common_dir / "fifo.tar.gz"
os.mkfifo(fifop)
with tarfile.open(fifo_tar, "w:gz") as tar:
tar.add(fifop)
return fifo_tar
def _symlink_tar(self):
"""
Tarfile with symlink to path outside directory.
"""
outsidep = self.unsafe_common_dir / "unsafe_file.txt"
symlinkp = self.unsafe_common_dir / "symlink.txt"
symlink_tar = self.unsafe_common_dir / "symlink.tar.gz"
outsidep.symlink(symlinkp)
with tarfile.open(symlink_tar, "w:gz" ) as tar:
tar.add(symlinkp)
return symlink_tar
def _outside_tar(self):
"""
Tarfile with file that extracts to outside directory.
Extracting this tarfile in directory <dir> will put its contents
directly in <dir> (rather than <dir/tarname>).
"""
outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
with tarfile.open(outside_tar, "w:gz") as tar:
tar.addfile(tarfile.TarInfo(str(self.content_dir / "a_file")))
return outside_tar
def _outside_tar2(self):
"""
Tarfile with file that extracts to outside directory.
The path here matches the basename (`self.unsafe_common_dir`), but
then "cd's out". E.g. "/usr/../etc" == "/etc", but the naive basename
of the first (but not the second) is "/usr"
Extracting this tarfile in directory <dir> will also put its contents
directly in <dir> (rather than <dir/tarname>).
"""
outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
with tarfile.open(outside_tar, "w:gz") as tar:
tar.addfile(tarfile.TarInfo(str(self.unsafe_common_dir / "../a_file")))
return outside_tar
def test_unsafe_tar(self):
"""
Check that safety measure work.
This includes:
'tarbombs' which include files or symlinks with paths
outside or directly in the working directory,
'special files' (character device, block device or FIFOs),
all raise exceptions/400s.
"""
def try_tar(tarpath):
with open(tarpath) as tar:
resp = self.client.post(
self.url,
{ "name": tarpath, "course-data": [tar] }
)
self.assertEquals(resp.status_code, 400)
self.assertTrue("SuspiciousFileOperation" in resp.content)
try_tar(self._fifo_tar())
try_tar(self._symlink_tar())
try_tar(self._outside_tar())
try_tar(self._outside_tar2())
...@@ -19,6 +19,7 @@ from django.core.urlresolvers import reverse ...@@ -19,6 +19,7 @@ from django.core.urlresolvers import reverse
from django.core.servers.basehttp import FileWrapper from django.core.servers.basehttp import FileWrapper
from django.core.files.temp import NamedTemporaryFile from django.core.files.temp import NamedTemporaryFile
from django.views.decorators.http import require_http_methods from django.views.decorators.http import require_http_methods
from django.core.exceptions import SuspiciousOperation
from mitxmako.shortcuts import render_to_response from mitxmako.shortcuts import render_to_response
from auth.authz import create_all_course_groups from auth.authz import create_all_course_groups
...@@ -32,6 +33,7 @@ from xmodule.exceptions import SerializationError ...@@ -32,6 +33,7 @@ from xmodule.exceptions import SerializationError
from .access import get_location_and_verify_access from .access import get_location_and_verify_access
from util.json_request import JsonResponse from util.json_request import JsonResponse
from extract_tar import safetar_extractall
__all__ = ['import_course', 'generate_export_course', 'export_course'] __all__ = ['import_course', 'generate_export_course', 'export_course']
...@@ -154,7 +156,16 @@ def import_course(request, org, course, name): ...@@ -154,7 +156,16 @@ def import_course(request, org, course, name):
sf.write("Extracting") sf.write("Extracting")
tar_file = tarfile.open(temp_filepath) tar_file = tarfile.open(temp_filepath)
tar_file.extractall((course_dir + '/').encode('utf-8')) try:
safetar_extractall(tar_file, (course_dir + '/').encode('utf-8'))
except SuspiciousOperation as exc:
return JsonResponse(
{
'ErrMsg': 'Unsafe tar file. Aborting import.',
'SuspiciousFileOperationMsg': exc.args[0]
},
status=400
)
with open(status_file, 'w+') as sf: with open(status_file, 'w+') as sf:
sf.write("Verifying") sf.write("Verifying")
......
"""
Safe version of tarfile.extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.
Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
from os.path import abspath, realpath, dirname, join as joinpath
from django.core.exceptions import SuspiciousOperation
import logging
log = logging.getLogger(__name__) #pylint: disable=C0103
def resolved(rpath):
"""
Returns the canonical absolute path of `rpath`.
"""
return realpath(abspath(rpath))
def _is_bad_path(path, base):
"""
Is (the canonical absolute path of) `path` outside `base`?
"""
return not resolved(joinpath(base, path)).startswith(base)
def _is_bad_link(info, base):
"""
Does the file sym- ord hard-link to files outside `base`?
"""
# Links are interpreted relative to the directory containing the link
tip = resolved(joinpath(base, dirname(info.name)))
return _is_bad_path(info.linkname, base=tip)
def safemembers(members):
"""
Check that all elements of a tar file are safe.
"""
base = resolved(".")
for finfo in members:
if _is_bad_path(finfo.name, base):
log.debug("File %r is blocked (illegal path)", finfo.name)
raise SuspiciousOperation("Illegal path")
elif finfo.issym() and _is_bad_link(finfo, base):
log.debug( "File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Hard link")
elif finfo.islnk() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Symlink to %r", finfo.name,
finfo.linkname)
raise SuspiciousOperation("Symlink")
elif finfo.isdev():
log.debug("File %r is blocked: FIFO, device or character file",
finfo.name)
raise SuspiciousOperation("Dev file")
return members
def safetar_extractall(tarf, *args, **kwargs):
"""
Safe version of `tarf.extractall()`.
"""
return tarf.extractall(members=safemembers(tarf), *args, **kwargs)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment