Commit ef134df8 by Calen Pennington

Allow the HTML module to use the lxml HTML parser when parsing html file includes

parent 2223aaa6
import logging
from lxml import etree
from xmodule.x_module import XModule
from xmodule.raw_module import RawDescriptor
......@@ -26,3 +27,8 @@ class HtmlDescriptor(RawDescriptor):
js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]}
js_module = 'HTML'
@classmethod
def file_to_xml(cls, file_object):
parser = etree.HTMLParser()
return etree.parse(file_object, parser).getroot()
......@@ -91,6 +91,16 @@ class XmlDescriptor(XModuleDescriptor):
del xml_object.attrib[attr]
@classmethod
def file_to_xml(cls, file_object):
"""
Used when this module wants to parse a file object to xml
that will be converted to the definition.
Returns an lxml Element
"""
return etree.parse(file_object).getroot()
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
......@@ -127,7 +137,7 @@ class XmlDescriptor(XModuleDescriptor):
filepath = cls._format_filepath(xml_object.tag, filename)
with system.resources_fs.open(filepath) as file:
try:
definition_xml = etree.parse(file).getroot()
definition_xml = cls.file_to_xml(file)
except:
log.exception("Failed to parse xml in file %s" % filepath)
raise
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment