Commit 62b23235 by Ned Batchelder

Merge pull request #94 from edx/ned/fix-silly-xml-encoding-errors

Fix encoding errors when hashing XML
parents 0b4e03e8 fea67926
......@@ -87,7 +87,7 @@ class ErrorDescriptor(ErrorFields, JSONEditingDescriptor):
# but url_names aren't guaranteed to be unique between descriptor types,
# and ErrorDescriptor can wrap any type. When the wrapped module is fixed,
# it will be written out with the original url_name.
name=hashlib.sha1(contents).hexdigest()
name=hashlib.sha1(contents.encode('utf8')).hexdigest()
)
# real metadata stays in the content, but add a display name
......
from xmodule.modulestore import Location
import os.path
from xmodule.course_module import CourseDescriptor
from xmodule.modulestore.xml import XMLModuleStore
from xmodule.modulestore.xml_importer import import_from_xml
from nose.tools import assert_raises
from .test_modulestore import check_path_to_location
from . import DATA_DIR
......@@ -15,3 +18,22 @@ class TestXMLModuleStore(object):
print "finished import"
check_path_to_location(modulestore)
def test_unicode_chars_in_xml_content(self):
# edX/full/6.002_Spring_2012 has non-ASCII chars, and during
# uniquification of names, would raise a UnicodeError. It no longer does.
# Ensure that there really is a non-ASCII character in the course.
with open(os.path.join(DATA_DIR, "full/sequential/Administrivia_and_Circuit_Elements.xml")) as xmlf:
xml = xmlf.read()
with assert_raises(UnicodeDecodeError):
xml.decode('ascii')
# Load the course, but don't make error modules. This will succeed,
# but will record the errors.
modulestore = XMLModuleStore(DATA_DIR, course_dirs=['full'], load_error_modules=False)
# Look up the errors during load. There should be none.
location = CourseDescriptor.id_to_location("edX/full/6.002_Spring_2012")
errors = modulestore.get_item_errors(location)
assert errors == []
......@@ -108,7 +108,8 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
orig_name = orig_name[len(tag) + 1:-12]
# append the hash of the content--the first 12 bytes should be plenty.
orig_name = "_" + orig_name if orig_name not in (None, "") else ""
return tag + orig_name + "_" + hashlib.sha1(xml).hexdigest()[:12]
xml_bytes = xml.encode('utf8')
return tag + orig_name + "_" + hashlib.sha1(xml_bytes).hexdigest()[:12]
# Fallback if there was nothing we could use:
if url_name is None or url_name == "":
......@@ -322,7 +323,7 @@ class XMLModuleStore(ModuleStoreBase):
'''
String representation - for debugging
'''
return '<XMLModuleStore>data_dir=%s, %d courses, %d modules' % (
return '<XMLModuleStore data_dir=%r, %d courses, %d modules>' % (
self.data_dir, len(self.courses), len(self.modules))
def load_policy(self, policy_path, tracker):
......
......@@ -33,8 +33,8 @@ def test_system():
"""
Construct a test ModuleSystem instance.
By default, the render_template() method simply returns the context it is
passed as a string. You can override this behavior by monkey patching::
By default, the render_template() method simply returns the repr of the
context it is passed. You can override this behavior by monkey patching::
system = test_system()
system.render_template = my_render_func
......@@ -46,7 +46,7 @@ def test_system():
ajax_url='courses/course_id/modx/a_location',
track_function=Mock(),
get_module=Mock(),
render_template=lambda template, context: str(context),
render_template=lambda template, context: repr(context),
replace_urls=lambda html: str(html),
user=Mock(is_staff=False),
filestore=Mock(),
......
......@@ -18,8 +18,7 @@ class TestErrorModule(unittest.TestCase):
self.org = "org"
self.course = "course"
self.location = Location(['i4x', self.org, self.course, None, None])
self.valid_xml = "<problem />"
self.broken_xml = "<problem>"
self.valid_xml = u"<problem>ABC \N{SNOWMAN}</problem>"
self.error_msg = "Error"
def test_error_module_xml_rendering(self):
......@@ -27,9 +26,9 @@ class TestErrorModule(unittest.TestCase):
self.valid_xml, self.system, self.org, self.course, self.error_msg)
self.assertTrue(isinstance(descriptor, error_module.ErrorDescriptor))
module = descriptor.xmodule(self.system)
rendered_html = module.get_html()
self.assertIn(self.error_msg, rendered_html)
self.assertIn(self.valid_xml, rendered_html)
context_repr = module.get_html()
self.assertIn(self.error_msg, context_repr)
self.assertIn(repr(self.valid_xml), context_repr)
def test_error_module_from_descriptor(self):
descriptor = MagicMock([XModuleDescriptor],
......@@ -41,9 +40,9 @@ class TestErrorModule(unittest.TestCase):
descriptor, self.error_msg)
self.assertTrue(isinstance(error_descriptor, error_module.ErrorDescriptor))
module = error_descriptor.xmodule(self.system)
rendered_html = module.get_html()
self.assertIn(self.error_msg, rendered_html)
self.assertIn(str(descriptor), rendered_html)
context_repr = module.get_html()
self.assertIn(self.error_msg, context_repr)
self.assertIn(repr(descriptor), context_repr)
class TestNonStaffErrorModule(TestErrorModule):
......@@ -60,9 +59,9 @@ class TestNonStaffErrorModule(TestErrorModule):
descriptor = error_module.NonStaffErrorDescriptor.from_xml(
self.valid_xml, self.system, self.org, self.course)
module = descriptor.xmodule(self.system)
rendered_html = module.get_html()
self.assertNotIn(self.error_msg, rendered_html)
self.assertNotIn(self.valid_xml, rendered_html)
context_repr = module.get_html()
self.assertNotIn(self.error_msg, context_repr)
self.assertNotIn(repr(self.valid_xml), context_repr)
def test_error_module_from_descriptor(self):
descriptor = MagicMock([XModuleDescriptor],
......@@ -74,6 +73,6 @@ class TestNonStaffErrorModule(TestErrorModule):
descriptor, self.error_msg)
self.assertTrue(isinstance(error_descriptor, error_module.ErrorDescriptor))
module = error_descriptor.xmodule(self.system)
rendered_html = module.get_html()
self.assertNotIn(self.error_msg, rendered_html)
self.assertNotIn(str(descriptor), rendered_html)
context_repr = module.get_html()
self.assertNotIn(self.error_msg, context_repr)
self.assertNotIn(str(descriptor), context_repr)
......@@ -41,7 +41,7 @@ class DummySystem(ImportSystem):
)
def render_template(self, template, context):
raise Exception("Shouldn't be called")
raise Exception("Shouldn't be called")
class BaseCourseTestCase(unittest.TestCase):
......@@ -66,13 +66,13 @@ class ImportTestCase(BaseCourseTestCase):
def test_fallback(self):
'''Check that malformed xml loads as an ErrorDescriptor.'''
bad_xml = '''<sequential display_name="oops"><video url="hi"></sequential>'''
# Use an exotic character to also flush out Unicode issues.
bad_xml = u'''<sequential display_name="oops\N{SNOWMAN}"><video url="hi"></sequential>'''
system = self.get_system()
descriptor = system.process_xml(bad_xml)
self.assertEqual(descriptor.__class__.__name__,
'ErrorDescriptor')
self.assertEqual(descriptor.__class__.__name__, 'ErrorDescriptor')
def test_unique_url_names(self):
'''Check that each error gets its very own url_name'''
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment