Commit 0e2d833a by jkarni

Merge pull request #830 from edx/jkarni/fix/unicode-import

Fix import errors with unicode filenames
parents d35a03a0 2aaade0f
...@@ -154,7 +154,7 @@ def import_course(request, org, course, name): ...@@ -154,7 +154,7 @@ def import_course(request, org, course, name):
sf.write("Extracting") sf.write("Extracting")
tar_file = tarfile.open(temp_filepath) tar_file = tarfile.open(temp_filepath)
tar_file.extractall(course_dir + '/') tar_file.extractall((course_dir + '/').encode('utf-8'))
with open(status_file, 'w+') as sf: with open(status_file, 'w+') as sf:
sf.write("Verifying") sf.write("Verifying")
......
...@@ -2,8 +2,10 @@ from pprint import pprint ...@@ -2,8 +2,10 @@ from pprint import pprint
# pylint: disable=E0611 # pylint: disable=E0611
from nose.tools import assert_equals, assert_raises, \ from nose.tools import assert_equals, assert_raises, \
assert_not_equals, assert_false assert_not_equals, assert_false
from itertools import ifilter
# pylint: enable=E0611 # pylint: enable=E0611
import pymongo import pymongo
import logging
from uuid import uuid4 from uuid import uuid4
from xblock.fields import Scope from xblock.fields import Scope
...@@ -19,6 +21,7 @@ from xmodule.contentstore.mongo import MongoContentStore ...@@ -19,6 +21,7 @@ from xmodule.contentstore.mongo import MongoContentStore
from xmodule.modulestore.tests.test_modulestore import check_path_to_location from xmodule.modulestore.tests.test_modulestore import check_path_to_location
log = logging.getLogger(__name__)
HOST = 'localhost' HOST = 'localhost'
PORT = 27017 PORT = 27017
...@@ -59,7 +62,7 @@ class TestMongoModuleStore(object): ...@@ -59,7 +62,7 @@ class TestMongoModuleStore(object):
# #
draft_store = DraftModuleStore(HOST, DB, COLLECTION, FS_ROOT, RENDER_TEMPLATE, default_class=DEFAULT_CLASS) draft_store = DraftModuleStore(HOST, DB, COLLECTION, FS_ROOT, RENDER_TEMPLATE, default_class=DEFAULT_CLASS)
# Explicitly list the courses to load (don't want the big one) # Explicitly list the courses to load (don't want the big one)
courses = ['toy', 'simple', 'simple_with_draft'] courses = ['toy', 'simple', 'simple_with_draft', 'test_unicode']
import_from_xml(store, DATA_DIR, courses, draft_store=draft_store, static_content_store=content_store) import_from_xml(store, DATA_DIR, courses, draft_store=draft_store, static_content_store=content_store)
# also test a course with no importing of static content # also test a course with no importing of static content
...@@ -86,6 +89,19 @@ class TestMongoModuleStore(object): ...@@ -86,6 +89,19 @@ class TestMongoModuleStore(object):
def tearDown(self): def tearDown(self):
pass pass
def get_course_by_id(self, name):
"""
Returns the first course with `id` of `name`, or `None` if there are none.
"""
courses = self.store.get_courses()
return next(ifilter(lambda x: x.id == name, courses), None)
def course_with_id_exists(self, name):
"""
Returns true iff there exists some course with `id` of `name`.
"""
return (self.get_course_by_id(name) is not None)
def test_init(self): def test_init(self):
'''Make sure the db loads, and print all the locations in the db. '''Make sure the db loads, and print all the locations in the db.
Call this directly from failing tests to see what is loaded''' Call this directly from failing tests to see what is loaded'''
...@@ -100,12 +116,12 @@ class TestMongoModuleStore(object): ...@@ -100,12 +116,12 @@ class TestMongoModuleStore(object):
def test_get_courses(self): def test_get_courses(self):
'''Make sure the course objects loaded properly''' '''Make sure the course objects loaded properly'''
courses = self.store.get_courses() courses = self.store.get_courses()
assert_equals(len(courses), 4) assert_equals(len(courses), 5)
courses.sort(key=lambda c: c.id) assert self.course_with_id_exists('edX/simple/2012_Fall')
assert_equals(courses[0].id, 'edX/simple/2012_Fall') assert self.course_with_id_exists('edX/simple_with_draft/2012_Fall')
assert_equals(courses[1].id, 'edX/simple_with_draft/2012_Fall') assert self.course_with_id_exists('edX/test_import_course/2012_Fall')
assert_equals(courses[2].id, 'edX/test_import_course/2012_Fall') assert self.course_with_id_exists('edX/test_unicode/2012_Fall')
assert_equals(courses[3].id, 'edX/toy/2012_Fall') assert self.course_with_id_exists('edX/toy/2012_Fall')
def test_loads(self): def test_loads(self):
assert_not_equals( assert_not_equals(
...@@ -120,6 +136,22 @@ class TestMongoModuleStore(object): ...@@ -120,6 +136,22 @@ class TestMongoModuleStore(object):
self.store.get_item("i4x://edX/toy/video/Welcome"), self.store.get_item("i4x://edX/toy/video/Welcome"),
None) None)
def test_unicode_loads(self):
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/course/2012_Fall"),
None)
# All items with ascii-only filenames should load properly.
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/video/Welcome"),
None)
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/video/Welcome"),
None)
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/chapter/Overview"),
None)
def test_find_one(self): def test_find_one(self):
assert_not_equals( assert_not_equals(
self.store._find_one(Location("i4x://edX/toy/course/2012_Fall")), self.store._find_one(Location("i4x://edX/toy/course/2012_Fall")),
...@@ -153,15 +185,15 @@ class TestMongoModuleStore(object): ...@@ -153,15 +185,15 @@ class TestMongoModuleStore(object):
) )
def test_static_tab_names(self): def test_static_tab_names(self):
courses = self.store.get_courses()
def get_tab_name(index): def get_tab_name(index):
""" """
Helper function for pulling out the name of a given static tab. Helper function for pulling out the name of a given static tab.
Assumes the information is desired for courses[1] ('toy' course). Assumes the information is desired for courses[4] ('toy' course).
""" """
return courses[2].tabs[index]['name'] course = self.get_course_by_id('edX/toy/2012_Fall')
return course.tabs[index]['name']
# There was a bug where model.save was not getting called after the static tab name # There was a bug where model.save was not getting called after the static tab name
# was set set for tabs that have a URL slug. 'Syllabus' and 'Resources' fall into that # was set set for tabs that have a URL slug. 'Syllabus' and 'Resources' fall into that
......
...@@ -173,7 +173,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): ...@@ -173,7 +173,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
# Didn't load properly. Fall back on loading as an error # Didn't load properly. Fall back on loading as an error
# descriptor. This should never error due to formatting. # descriptor. This should never error due to formatting.
msg = "Error loading from xml. " + str(err)[:200] msg = "Error loading from xml. " + unicode(err)[:200]
log.warning(msg) log.warning(msg)
# Normally, we don't want lots of exception traces in our logs from common # Normally, we don't want lots of exception traces in our logs from common
# content problems. But if you're debugging the xml loading code itself, # content problems. But if you're debugging the xml loading code itself,
...@@ -317,7 +317,8 @@ class XMLModuleStore(ModuleStoreBase): ...@@ -317,7 +317,8 @@ class XMLModuleStore(ModuleStoreBase):
try: try:
course_descriptor = self.load_course(course_dir, errorlog.tracker) course_descriptor = self.load_course(course_dir, errorlog.tracker)
except Exception as e: except Exception as e:
msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir, str(e)) msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir.encode("utf-8"),
unicode(e))
log.exception(msg) log.exception(msg)
errorlog.tracker(msg) errorlog.tracker(msg)
...@@ -493,8 +494,9 @@ class XMLModuleStore(ModuleStoreBase): ...@@ -493,8 +494,9 @@ class XMLModuleStore(ModuleStoreBase):
module.save() module.save()
self.modules[course_descriptor.id][module.location] = module self.modules[course_descriptor.id][module.location] = module
except Exception, e: except Exception, e:
logging.exception("Failed to load {0}. Skipping... Exception: {1}".format(filepath, str(e))) logging.exception("Failed to load %s. Skipping... \
system.error_tracker("ERROR: " + str(e)) Exception: %s", filepath, unicode(e))
system.error_tracker("ERROR: " + unicode(e))
def get_instance(self, course_id, location, depth=0): def get_instance(self, course_id, location, depth=0):
""" """
......
...@@ -31,7 +31,7 @@ def import_static_content(modules, course_loc, course_data_path, static_content_ ...@@ -31,7 +31,7 @@ def import_static_content(modules, course_loc, course_data_path, static_content_
try: try:
content_path = os.path.join(dirname, filename) content_path = os.path.join(dirname, filename)
if verbose: if verbose:
log.debug('importing static content {0}...'.format(content_path)) log.debug('importing static content %s...', content_path)
fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name
if fullname_with_subpath.startswith('/'): if fullname_with_subpath.startswith('/'):
......
...@@ -133,7 +133,7 @@ class SequenceDescriptor(SequenceFields, MakoModuleDescriptor, XmlDescriptor): ...@@ -133,7 +133,7 @@ class SequenceDescriptor(SequenceFields, MakoModuleDescriptor, XmlDescriptor):
except Exception as e: except Exception as e:
log.exception("Unable to load child when parsing Sequence. Continuing...") log.exception("Unable to load child when parsing Sequence. Continuing...")
if system.error_tracker is not None: if system.error_tracker is not None:
system.error_tracker("ERROR: " + str(e)) system.error_tracker("ERROR: " + unicode(e))
continue continue
return {}, children return {}, children
......
...@@ -369,6 +369,32 @@ class ImportTestCase(BaseCourseTestCase): ...@@ -369,6 +369,32 @@ class ImportTestCase(BaseCourseTestCase):
html = modulestore.get_instance(course_id, loc) html = modulestore.get_instance(course_id, loc)
self.assertEquals(html.display_name, "Toy lab") self.assertEquals(html.display_name, "Toy lab")
def test_unicode(self):
"""Check that courses with unicode characters in filenames and in
org/course/name import properly. Currently, this means: (a) Having
files with unicode names does not prevent import; (b) if files are not
loaded because of unicode filenames, there are appropriate
exceptions/errors to that effect."""
print("Starting import")
modulestore = XMLModuleStore(DATA_DIR, course_dirs=['test_unicode'])
courses = modulestore.get_courses()
self.assertEquals(len(courses), 1)
course = courses[0]
print("course errors:")
# Expect to find an error/exception about characters in "®esources"
expect = "Invalid characters in '®esources'"
errors = [(msg.encode("utf-8"), err.encode("utf-8"))
for msg, err in
modulestore.get_item_errors(course.location)]
self.assertTrue(any(expect in msg or expect in err
for msg, err in errors))
chapters = course.get_children()
self.assertEqual(len(chapters), 3)
def test_url_name_mangling(self): def test_url_name_mangling(self):
""" """
Make sure that url_names are only mangled once. Make sure that url_names are only mangled once.
......
<chapter display_name="åñ html ƒile">
<sequential display_name="åñ html ƒile">
<html display_name="åñ html ƒile">
<p> This is upside down text:</p>
<p>˙ʇxəʇ uʍop əpısdn sı sıɥʇ</p>
</html>
</sequential>
</chapter>
<course org="edX" course="test_unicode" url_name="2012_Fall"/>
<course>
<chapter display_name="Overview">
<video url_name="Welcome" youtube_id_1_0="p2Q6BrNhdh8" display_name="Welcome"/>
</chapter>
<chapter url_name="sîmple_√ideo"/>
<chapter url_name="simple_html"/>
<chapter url_name="vertical_container"/>
</course>
{
"course/2012_Fall": {
"graceperiod": "2 days 5 hours 59 minutes 59 seconds",
"start": "2015-07-17T12:00",
"display_name": "Üñîçø∂e †es† Course",
"graded": "true",
"tabs": [
{"type": "courseware"},
{"type": "course_info", "name": "Course Info"},
{"type": "static_tab", "url_slug": "syllabus", "name": "ßyllabus"},
{"type": "static_tab", "url_slug": "resources", "name": "®esources"},
{"type": "discussion", "name": "∂iscussion"},
{"type": "wiki", "name": "∑iki"},
{"type": "progress", "name": "πrogress"}
]
},
"chapter/Overview": {
"display_name": "O√erview"
},
"video/Welcome": {
"display_name": "Welcome"
}
}
<sequential>
<vertical filename="vertical_test" slug="vertical_test" />
<html slug="unicode"></html>
</sequential>
\ No newline at end of file
Upside down unicode text (http://www.sunnyneo.com/upsidedowntext.php?)
˙ʇʇɐld uoɯəl plos əɥs ˙pəʌıl əuɹʎq ʎʇʇəq əɹəɥʍ pɐoɹ əɥʇ uʍop əɯɐɔ ʍoɔ-ooɯ əɥʇ
˙ooʞɔnʇ ʎqɐq sɐʍ əɥ ˙əɔɐɟ ʎɹıɐɥ ɐ pɐɥ əɥ :ssɐlƃ ɐ ɥƃnoɹɥʇ ɯıɥ ʇɐ pəʞool ɹəɥʇɐɟ
sıɥ :ʎɹoʇs ʇɐɥʇ ɯıɥ ploʇ ɹəɥʇɐɟ sıɥ
˙˙˙ooʞɔnʇ ʎqɐq pəɯɐu ʎoq əlʇʇıl suəɔıu ɐ ʇəɯ pɐoɹ əɥʇ ƃuolɐ uʍop ƃuıɯoɔ sɐʍ ʇɐɥʇ
ʍoɔ-ooɯ sıɥʇ puɐ pɐoɹ əɥʇ ƃuolɐ uʍop ƃuıɯoɔ ʍoɔ-ooɯ ɐ sɐʍ əɹəɥʇ sɐʍ ʇı əɯıʇ pooƃ
ʎɹəʌ ɐ puɐ əɯıʇ ɐ uodn əɔuo
<div>resources!</div>
<div>resources!</div>
<vertical>
<video url_name="separate_file_video"/>
</vertical>
...@@ -16,5 +16,5 @@ ...@@ -16,5 +16,5 @@
# Our libraries: # Our libraries:
-e git+https://github.com/edx/XBlock.git@aa0d60627#egg=XBlock -e git+https://github.com/edx/XBlock.git@aa0d60627#egg=XBlock
-e git+https://github.com/edx/codejail.git@0a1b468#egg=codejail -e git+https://github.com/edx/codejail.git@0a1b468#egg=codejail
-e git+https://github.com/edx/diff-cover.git@v0.2.2#egg=diff_cover -e git+https://github.com/edx/diff-cover.git@v0.2.3#egg=diff_cover
-e git+https://github.com/edx/js-test-tool.git@v0.0.7#egg=js_test_tool -e git+https://github.com/edx/js-test-tool.git@v0.0.7#egg=js_test_tool
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment