Commit 0e2d833a by jkarni

Merge pull request #830 from edx/jkarni/fix/unicode-import

Fix import errors with unicode filenames
parents d35a03a0 2aaade0f
......@@ -154,7 +154,7 @@ def import_course(request, org, course, name):
sf.write("Extracting")
tar_file = tarfile.open(temp_filepath)
tar_file.extractall(course_dir + '/')
tar_file.extractall((course_dir + '/').encode('utf-8'))
with open(status_file, 'w+') as sf:
sf.write("Verifying")
......
......@@ -2,8 +2,10 @@ from pprint import pprint
# pylint: disable=E0611
from nose.tools import assert_equals, assert_raises, \
assert_not_equals, assert_false
from itertools import ifilter
# pylint: enable=E0611
import pymongo
import logging
from uuid import uuid4
from xblock.fields import Scope
......@@ -19,6 +21,7 @@ from xmodule.contentstore.mongo import MongoContentStore
from xmodule.modulestore.tests.test_modulestore import check_path_to_location
log = logging.getLogger(__name__)
HOST = 'localhost'
PORT = 27017
......@@ -59,7 +62,7 @@ class TestMongoModuleStore(object):
#
draft_store = DraftModuleStore(HOST, DB, COLLECTION, FS_ROOT, RENDER_TEMPLATE, default_class=DEFAULT_CLASS)
# Explicitly list the courses to load (don't want the big one)
courses = ['toy', 'simple', 'simple_with_draft']
courses = ['toy', 'simple', 'simple_with_draft', 'test_unicode']
import_from_xml(store, DATA_DIR, courses, draft_store=draft_store, static_content_store=content_store)
# also test a course with no importing of static content
......@@ -86,6 +89,19 @@ class TestMongoModuleStore(object):
def tearDown(self):
pass
def get_course_by_id(self, name):
"""
Returns the first course with `id` of `name`, or `None` if there are none.
"""
courses = self.store.get_courses()
return next(ifilter(lambda x: x.id == name, courses), None)
def course_with_id_exists(self, name):
"""
Returns true iff there exists some course with `id` of `name`.
"""
return (self.get_course_by_id(name) is not None)
def test_init(self):
'''Make sure the db loads, and print all the locations in the db.
Call this directly from failing tests to see what is loaded'''
......@@ -100,12 +116,12 @@ class TestMongoModuleStore(object):
def test_get_courses(self):
'''Make sure the course objects loaded properly'''
courses = self.store.get_courses()
assert_equals(len(courses), 4)
courses.sort(key=lambda c: c.id)
assert_equals(courses[0].id, 'edX/simple/2012_Fall')
assert_equals(courses[1].id, 'edX/simple_with_draft/2012_Fall')
assert_equals(courses[2].id, 'edX/test_import_course/2012_Fall')
assert_equals(courses[3].id, 'edX/toy/2012_Fall')
assert_equals(len(courses), 5)
assert self.course_with_id_exists('edX/simple/2012_Fall')
assert self.course_with_id_exists('edX/simple_with_draft/2012_Fall')
assert self.course_with_id_exists('edX/test_import_course/2012_Fall')
assert self.course_with_id_exists('edX/test_unicode/2012_Fall')
assert self.course_with_id_exists('edX/toy/2012_Fall')
def test_loads(self):
assert_not_equals(
......@@ -120,6 +136,22 @@ class TestMongoModuleStore(object):
self.store.get_item("i4x://edX/toy/video/Welcome"),
None)
def test_unicode_loads(self):
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/course/2012_Fall"),
None)
# All items with ascii-only filenames should load properly.
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/video/Welcome"),
None)
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/video/Welcome"),
None)
assert_not_equals(
self.store.get_item("i4x://edX/test_unicode/chapter/Overview"),
None)
def test_find_one(self):
assert_not_equals(
self.store._find_one(Location("i4x://edX/toy/course/2012_Fall")),
......@@ -153,15 +185,15 @@ class TestMongoModuleStore(object):
)
def test_static_tab_names(self):
courses = self.store.get_courses()
def get_tab_name(index):
"""
Helper function for pulling out the name of a given static tab.
Assumes the information is desired for courses[1] ('toy' course).
Assumes the information is desired for courses[4] ('toy' course).
"""
return courses[2].tabs[index]['name']
course = self.get_course_by_id('edX/toy/2012_Fall')
return course.tabs[index]['name']
# There was a bug where model.save was not getting called after the static tab name
# was set set for tabs that have a URL slug. 'Syllabus' and 'Resources' fall into that
......
......@@ -173,7 +173,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
# Didn't load properly. Fall back on loading as an error
# descriptor. This should never error due to formatting.
msg = "Error loading from xml. " + str(err)[:200]
msg = "Error loading from xml. " + unicode(err)[:200]
log.warning(msg)
# Normally, we don't want lots of exception traces in our logs from common
# content problems. But if you're debugging the xml loading code itself,
......@@ -317,7 +317,8 @@ class XMLModuleStore(ModuleStoreBase):
try:
course_descriptor = self.load_course(course_dir, errorlog.tracker)
except Exception as e:
msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir, str(e))
msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir.encode("utf-8"),
unicode(e))
log.exception(msg)
errorlog.tracker(msg)
......@@ -493,8 +494,9 @@ class XMLModuleStore(ModuleStoreBase):
module.save()
self.modules[course_descriptor.id][module.location] = module
except Exception, e:
logging.exception("Failed to load {0}. Skipping... Exception: {1}".format(filepath, str(e)))
system.error_tracker("ERROR: " + str(e))
logging.exception("Failed to load %s. Skipping... \
Exception: %s", filepath, unicode(e))
system.error_tracker("ERROR: " + unicode(e))
def get_instance(self, course_id, location, depth=0):
"""
......
......@@ -31,7 +31,7 @@ def import_static_content(modules, course_loc, course_data_path, static_content_
try:
content_path = os.path.join(dirname, filename)
if verbose:
log.debug('importing static content {0}...'.format(content_path))
log.debug('importing static content %s...', content_path)
fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name
if fullname_with_subpath.startswith('/'):
......
......@@ -133,7 +133,7 @@ class SequenceDescriptor(SequenceFields, MakoModuleDescriptor, XmlDescriptor):
except Exception as e:
log.exception("Unable to load child when parsing Sequence. Continuing...")
if system.error_tracker is not None:
system.error_tracker("ERROR: " + str(e))
system.error_tracker("ERROR: " + unicode(e))
continue
return {}, children
......
......@@ -369,6 +369,32 @@ class ImportTestCase(BaseCourseTestCase):
html = modulestore.get_instance(course_id, loc)
self.assertEquals(html.display_name, "Toy lab")
def test_unicode(self):
"""Check that courses with unicode characters in filenames and in
org/course/name import properly. Currently, this means: (a) Having
files with unicode names does not prevent import; (b) if files are not
loaded because of unicode filenames, there are appropriate
exceptions/errors to that effect."""
print("Starting import")
modulestore = XMLModuleStore(DATA_DIR, course_dirs=['test_unicode'])
courses = modulestore.get_courses()
self.assertEquals(len(courses), 1)
course = courses[0]
print("course errors:")
# Expect to find an error/exception about characters in "®esources"
expect = "Invalid characters in '®esources'"
errors = [(msg.encode("utf-8"), err.encode("utf-8"))
for msg, err in
modulestore.get_item_errors(course.location)]
self.assertTrue(any(expect in msg or expect in err
for msg, err in errors))
chapters = course.get_children()
self.assertEqual(len(chapters), 3)
def test_url_name_mangling(self):
"""
Make sure that url_names are only mangled once.
......
<chapter display_name="åñ html ƒile">
<sequential display_name="åñ html ƒile">
<html display_name="åñ html ƒile">
<p> This is upside down text:</p>
<p>˙ʇxəʇ uʍop əpısdn sı sıɥʇ</p>
</html>
</sequential>
</chapter>
<course org="edX" course="test_unicode" url_name="2012_Fall"/>
<course>
<chapter display_name="Overview">
<video url_name="Welcome" youtube_id_1_0="p2Q6BrNhdh8" display_name="Welcome"/>
</chapter>
<chapter url_name="sîmple_√ideo"/>
<chapter url_name="simple_html"/>
<chapter url_name="vertical_container"/>
</course>
{
"course/2012_Fall": {
"graceperiod": "2 days 5 hours 59 minutes 59 seconds",
"start": "2015-07-17T12:00",
"display_name": "Üñîçø∂e †es† Course",
"graded": "true",
"tabs": [
{"type": "courseware"},
{"type": "course_info", "name": "Course Info"},
{"type": "static_tab", "url_slug": "syllabus", "name": "ßyllabus"},
{"type": "static_tab", "url_slug": "resources", "name": "®esources"},
{"type": "discussion", "name": "∂iscussion"},
{"type": "wiki", "name": "∑iki"},
{"type": "progress", "name": "πrogress"}
]
},
"chapter/Overview": {
"display_name": "O√erview"
},
"video/Welcome": {
"display_name": "Welcome"
}
}
<sequential>
<vertical filename="vertical_test" slug="vertical_test" />
<html slug="unicode"></html>
</sequential>
\ No newline at end of file
Upside down unicode text (http://www.sunnyneo.com/upsidedowntext.php?)
˙ʇʇɐld uoɯəl plos əɥs ˙pəʌıl əuɹʎq ʎʇʇəq əɹəɥʍ pɐoɹ əɥʇ uʍop əɯɐɔ ʍoɔ-ooɯ əɥʇ
˙ooʞɔnʇ ʎqɐq sɐʍ əɥ ˙əɔɐɟ ʎɹıɐɥ ɐ pɐɥ əɥ :ssɐlƃ ɐ ɥƃnoɹɥʇ ɯıɥ ʇɐ pəʞool ɹəɥʇɐɟ
sıɥ :ʎɹoʇs ʇɐɥʇ ɯıɥ ploʇ ɹəɥʇɐɟ sıɥ
˙˙˙ooʞɔnʇ ʎqɐq pəɯɐu ʎoq əlʇʇıl suəɔıu ɐ ʇəɯ pɐoɹ əɥʇ ƃuolɐ uʍop ƃuıɯoɔ sɐʍ ʇɐɥʇ
ʍoɔ-ooɯ sıɥʇ puɐ pɐoɹ əɥʇ ƃuolɐ uʍop ƃuıɯoɔ ʍoɔ-ooɯ ɐ sɐʍ əɹəɥʇ sɐʍ ʇı əɯıʇ pooƃ
ʎɹəʌ ɐ puɐ əɯıʇ ɐ uodn əɔuo
<div>resources!</div>
<div>resources!</div>
<vertical>
<video url_name="separate_file_video"/>
</vertical>
......@@ -16,5 +16,5 @@
# Our libraries:
-e git+https://github.com/edx/XBlock.git@aa0d60627#egg=XBlock
-e git+https://github.com/edx/codejail.git@0a1b468#egg=codejail
-e git+https://github.com/edx/diff-cover.git@v0.2.2#egg=diff_cover
-e git+https://github.com/edx/diff-cover.git@v0.2.3#egg=diff_cover
-e git+https://github.com/edx/js-test-tool.git@v0.0.7#egg=js_test_tool
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment