Commit 1b118024 by ichuang

Merge pull request #174 from MITx/cpennington/cms-backcompat

CMS import backwards compatibility

 - edx4edx and 8.01x course repo errors fixed and verified
 - moving forward, it would be nice if the LMS+CMS xml import failed more gracefully when encountering errors in the xml files, when in debug or authoring mode.  For example, an inadvertent xml error in one problem (eg created by a mistake in an import tool used in the CMS) should not bring down the whole LMS. Ditto if one problem gets deleted from the mongodb or filesystem.
parents 926fdd01 b29649c4
...@@ -6,6 +6,7 @@ import logging ...@@ -6,6 +6,7 @@ import logging
import traceback import traceback
import re import re
import StringIO import StringIO
import os
from datetime import timedelta from datetime import timedelta
from lxml import etree from lxml import etree
...@@ -503,3 +504,12 @@ class CapaDescriptor(RawDescriptor): ...@@ -503,3 +504,12 @@ class CapaDescriptor(RawDescriptor):
""" """
module_class = CapaModule module_class = CapaModule
# TODO (cpennington): Delete this method once all fall 2012 course are being
# edited in the cms
@classmethod
def backcompat_paths(cls, path):
return [
'problems/' + path[8:],
path[8:],
]
import logging import logging
import os
from lxml import etree from lxml import etree
from xmodule.x_module import XModule from xmodule.x_module import XModule
...@@ -28,6 +29,19 @@ class HtmlDescriptor(RawDescriptor): ...@@ -28,6 +29,19 @@ class HtmlDescriptor(RawDescriptor):
js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]} js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]}
js_module = 'HTML' js_module = 'HTML'
# TODO (cpennington): Delete this method once all fall 2012 course are being
# edited in the cms
@classmethod
def backcompat_paths(cls, path):
if path.endswith('.html.html'):
path = path[:-5]
candidates = []
while os.sep in path:
candidates.append(path)
_, _, path = path.partition(os.sep)
return candidates
@classmethod @classmethod
def file_to_xml(cls, file_object): def file_to_xml(cls, file_object):
parser = etree.HTMLParser() parser = etree.HTMLParser()
......
...@@ -5,7 +5,9 @@ from lxml import etree ...@@ -5,7 +5,9 @@ from lxml import etree
from path import path from path import path
from xmodule.x_module import XModuleDescriptor, XMLParsingSystem from xmodule.x_module import XModuleDescriptor, XMLParsingSystem
from xmodule.mako_module import MakoDescriptorSystem from xmodule.mako_module import MakoDescriptorSystem
from cStringIO import StringIO
import os import os
import re
from . import ModuleStore, Location from . import ModuleStore, Location
from .exceptions import ItemNotFoundError from .exceptions import ItemNotFoundError
...@@ -16,6 +18,13 @@ etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False, ...@@ -16,6 +18,13 @@ etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False,
log = logging.getLogger('mitx.' + __name__) log = logging.getLogger('mitx.' + __name__)
# TODO (cpennington): Remove this once all fall 2012 courses have been imported into the cms from xml
def clean_out_mako_templating(xml_string):
xml_string = xml_string.replace('%include', 'include')
xml_string = re.sub("(?m)^\s*%.*$", '', xml_string)
return xml_string
class XMLModuleStore(ModuleStore): class XMLModuleStore(ModuleStore):
""" """
An XML backed ModuleStore An XML backed ModuleStore
...@@ -54,8 +63,11 @@ class XMLModuleStore(ModuleStore): ...@@ -54,8 +63,11 @@ class XMLModuleStore(ModuleStore):
if not os.path.exists(self.data_dir / course_dir / "course.xml"): if not os.path.exists(self.data_dir / course_dir / "course.xml"):
continue continue
course_descriptor = self.load_course(course_dir) try:
self.courses[course_dir] = course_descriptor course_descriptor = self.load_course(course_dir)
self.courses[course_dir] = course_descriptor
except:
log.exception("Failed to load course %s" % course_dir)
def load_course(self, course_dir): def load_course(self, course_dir):
""" """
...@@ -65,6 +77,9 @@ class XMLModuleStore(ModuleStore): ...@@ -65,6 +77,9 @@ class XMLModuleStore(ModuleStore):
with open(self.data_dir / course_dir / "course.xml") as course_file: with open(self.data_dir / course_dir / "course.xml") as course_file:
# TODO (cpennington): Remove this once all fall 2012 courses have been imported into the cms from xml
course_file = StringIO(clean_out_mako_templating(course_file.read()))
course_data = etree.parse(course_file).getroot() course_data = etree.parse(course_file).getroot()
org = course_data.get('org') org = course_data.get('org')
...@@ -91,6 +106,8 @@ class XMLModuleStore(ModuleStore): ...@@ -91,6 +106,8 @@ class XMLModuleStore(ModuleStore):
def process_xml(xml): def process_xml(xml):
try: try:
# TODO (cpennington): Remove this once all fall 2012 courses have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml) xml_data = etree.fromstring(xml)
except: except:
log.exception("Unable to parse xml: {xml}".format(xml=xml)) log.exception("Unable to parse xml: {xml}".format(xml=xml))
......
...@@ -4,6 +4,8 @@ from lxml import etree ...@@ -4,6 +4,8 @@ from lxml import etree
import copy import copy
import logging import logging
from collections import namedtuple from collections import namedtuple
from fs.errors import ResourceNotFoundError
import os
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -154,13 +156,30 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -154,13 +156,30 @@ class XmlDescriptor(XModuleDescriptor):
definition_xml = copy.deepcopy(xml_object) definition_xml = copy.deepcopy(xml_object)
else: else:
filepath = cls._format_filepath(xml_object.tag, filename) filepath = cls._format_filepath(xml_object.tag, filename)
log.debug('filepath=%s, resources_fs=%s' % (filepath,system.resources_fs))
with system.resources_fs.open(filepath) as file: # TODO (cpennington): If the file doesn't exist at the right path,
try: # give the class a chance to fix it up. The file will be written out again
definition_xml = cls.file_to_xml(file) # in the correct format.
except: # This should go away once the CMS is online and has imported all current (fall 2012)
log.exception("Failed to parse xml in file %s" % filepath) # courses from xml
raise if not system.resources_fs.exists(filepath) and hasattr(cls, 'backcompat_paths'):
candidates = cls.backcompat_paths(filepath)
for candidate in candidates:
if system.resources_fs.exists(candidate):
filepath = candidate
break
log.debug('filepath=%s, resources_fs=%s' % (filepath, system.resources_fs))
try:
with system.resources_fs.open(filepath) as file:
try:
definition_xml = cls.file_to_xml(file)
except:
log.exception("Failed to parse xml in file %s" % filepath)
raise
except ResourceNotFoundError:
log.exception('Unable to load file contents at path %s' % filepath)
return {'data': 'Error loading file contents at path %s' % filepath}
cls.clean_metadata_from_xml(definition_xml) cls.clean_metadata_from_xml(definition_xml)
return cls.definition_from_xml(definition_xml, system) return cls.definition_from_xml(definition_xml, system)
...@@ -200,7 +219,7 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -200,7 +219,7 @@ class XmlDescriptor(XModuleDescriptor):
if len(list(xml_object.iter())) > 5: if len(list(xml_object.iter())) > 5:
filepath = self.__class__._format_filepath(self.category, self.name) filepath = self.__class__._format_filepath(self.category, self.name)
resource_fs.makedir(self.category, allow_recreate=True) resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True)
with resource_fs.open(filepath, 'w') as file: with resource_fs.open(filepath, 'w') as file:
file.write(etree.tostring(xml_object, pretty_print=True)) file.write(etree.tostring(xml_object, pretty_print=True))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment