Commit 5f84e619 by Victor Shnayder

Add hook for error handling during xml import

* add error_handler member to DescriptorSystem
* call it where import errors happen
* also includes some refactoring in xml.py
* some more line length and docstring cleanups
parent 63f34f2e
...@@ -29,6 +29,7 @@ def process_includes(fn): ...@@ -29,6 +29,7 @@ def process_includes(fn):
etree.tostring(next_include, pretty_print=True)) etree.tostring(next_include, pretty_print=True))
msg += 'Cannot find file %s in %s' % (file, dir) msg += 'Cannot find file %s in %s' % (file, dir)
log.exception(msg) log.exception(msg)
system.error_handler(msg)
raise raise
try: try:
# read in and convert to XML # read in and convert to XML
...@@ -38,6 +39,7 @@ def process_includes(fn): ...@@ -38,6 +39,7 @@ def process_includes(fn):
etree.tostring(next_include, pretty_print=True)) etree.tostring(next_include, pretty_print=True))
msg += 'Cannot parse XML in %s' % (file) msg += 'Cannot parse XML in %s' % (file)
log.exception(msg) log.exception(msg)
system.error_handler(msg)
raise raise
# insert new XML into tree in place of inlcude # insert new XML into tree in place of inlcude
parent = next_include.getparent() parent = next_include.getparent()
......
import sys
def strict_error_handler(msg, exc_info=None):
'''
Do not let errors pass. If exc_info is not None, ignore msg, and just
re-raise. Otherwise, check if we are in an exception-handling context.
If so, re-raise. Otherwise, raise Exception(msg).
Meant for use in validation, where any errors should trap.
'''
if exc_info is not None:
raise exc_info[0], exc_info[1], exc_info[2]
# Check if there is an exception being handled somewhere up the stack
if sys.exc_info() != (None, None, None):
raise
raise Exception(msg)
def ignore_errors_handler(msg, exc_info=None):
'''Ignore all errors, relying on the caller to workaround.
Meant for use in the LMS, where an error in one part of the course
shouldn't bring down the whole system'''
pass
...@@ -3,6 +3,7 @@ from fs.osfs import OSFS ...@@ -3,6 +3,7 @@ from fs.osfs import OSFS
from importlib import import_module from importlib import import_module
from lxml import etree from lxml import etree
from path import path from path import path
from xmodule.errorhandlers import strict_error_handler
from xmodule.x_module import XModuleDescriptor, XMLParsingSystem from xmodule.x_module import XModuleDescriptor, XMLParsingSystem
from xmodule.mako_module import MakoDescriptorSystem from xmodule.mako_module import MakoDescriptorSystem
from cStringIO import StringIO from cStringIO import StringIO
...@@ -18,32 +19,112 @@ etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False, ...@@ -18,32 +19,112 @@ etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False,
log = logging.getLogger('mitx.' + __name__) log = logging.getLogger('mitx.' + __name__)
# TODO (cpennington): Remove this once all fall 2012 courses have been imported into the cms from xml # VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses have been imported
# into the cms from xml
def clean_out_mako_templating(xml_string): def clean_out_mako_templating(xml_string):
xml_string = xml_string.replace('%include', 'include') xml_string = xml_string.replace('%include', 'include')
xml_string = re.sub("(?m)^\s*%.*$", '', xml_string) xml_string = re.sub("(?m)^\s*%.*$", '', xml_string)
return xml_string return xml_string
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore, org, course, course_dir,
error_handler):
"""
A class that handles loading from xml. Does some munging to ensure that
all elements have unique slugs.
xmlstore: the XMLModuleStore to store the loaded modules in
"""
self.unnamed_modules = 0
self.used_slugs = set()
def process_xml(xml):
try:
# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses
# have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml)
except:
log.exception("Unable to parse xml: {xml}".format(xml=xml))
raise
# TODO (vshnayder): is the slug munging permanent, or also intended
# to be taken out?
if xml_data.get('slug') is None:
if xml_data.get('name'):
slug = Location.clean(xml_data.get('name'))
else:
self.unnamed_modules += 1
slug = '{tag}_{count}'.format(tag=xml_data.tag,
count=self.unnamed_modules)
if slug in self.used_slugs:
self.unnamed_modules += 1
slug = '{slug}_{count}'.format(slug=slug,
count=self.unnamed_modules)
self.used_slugs.add(slug)
# log.debug('-> slug=%s' % slug)
xml_data.set('slug', slug)
module = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, org,
course, xmlstore.default_class)
log.debug('==> importing module location %s' % repr(module.location))
module.metadata['data_dir'] = course_dir
xmlstore.modules[module.location] = module
if xmlstore.eager:
module.get_children()
return module
system_kwargs = dict(
render_template=lambda: '',
load_item=xmlstore.get_item,
resources_fs=OSFS(xmlstore.data_dir / course_dir),
process_xml=process_xml,
error_handler=error_handler,
)
MakoDescriptorSystem.__init__(self, **system_kwargs)
XMLParsingSystem.__init__(self, **system_kwargs)
class XMLModuleStore(ModuleStore): class XMLModuleStore(ModuleStore):
""" """
An XML backed ModuleStore An XML backed ModuleStore
""" """
def __init__(self, data_dir, default_class=None, eager=False, course_dirs=None): def __init__(self, data_dir, default_class=None, eager=False,
course_dirs=None,
error_handler=strict_error_handler):
""" """
Initialize an XMLModuleStore from data_dir Initialize an XMLModuleStore from data_dir
data_dir: path to data directory containing the course directories data_dir: path to data directory containing the course directories
default_class: dot-separated string defining the default descriptor class to use if non is specified in entry_points
eager: If true, load the modules children immediately to force the entire course tree to be parsed default_class: dot-separated string defining the default descriptor
course_dirs: If specified, the list of course_dirs to load. Otherwise, load class to use if none is specified in entry_points
all course dirs
eager: If true, load the modules children immediately to force the
entire course tree to be parsed
course_dirs: If specified, the list of course_dirs to load. Otherwise,
load all course dirs
error_handler: The error handler used here and in the underlying
DescriptorSystem. By default, raise exceptions for all errors.
See the comments in x_module.py:DescriptorSystem
""" """
self.eager = eager self.eager = eager
self.data_dir = path(data_dir) self.data_dir = path(data_dir)
self.modules = {} # location -> XModuleDescriptor self.modules = {} # location -> XModuleDescriptor
self.courses = {} # course_dir -> XModuleDescriptor for the course self.courses = {} # course_dir -> XModuleDescriptor for the course
self.error_handler = error_handler
if default_class is None: if default_class is None:
self.default_class = None self.default_class = None
...@@ -54,110 +135,63 @@ class XMLModuleStore(ModuleStore): ...@@ -54,110 +135,63 @@ class XMLModuleStore(ModuleStore):
self.default_class = class_ self.default_class = class_
# TODO (cpennington): We need a better way of selecting specific sets of debug messages to enable. These were drowning out important messages # TODO (cpennington): We need a better way of selecting specific sets of debug messages to enable. These were drowning out important messages
#log.debug('XMLModuleStore: eager=%s, data_dir = %s' % (eager, self.data_dir)) log.debug('XMLModuleStore: eager=%s, data_dir = %s' % (eager, self.data_dir))
#log.debug('default_class = %s' % self.default_class) log.debug('default_class = %s' % self.default_class)
for course_dir in os.listdir(self.data_dir): # If we are specifically asked for missing courses, that should
if course_dirs is not None and course_dir not in course_dirs: # be an error. If we are asked for "all" courses, find the ones
continue # that have a course.xml
if course_dirs is None:
if not os.path.exists(self.data_dir / course_dir / "course.xml"): course_dirs = [d for d in os.listdir(self.data_dir) if
continue os.path.exists(self.data_dir / d / "course.xml")]
for course_dir in course_dirs:
try: try:
course_descriptor = self.load_course(course_dir) course_descriptor = self.load_course(course_dir)
self.courses[course_dir] = course_descriptor self.courses[course_dir] = course_descriptor
except: except:
log.exception("Failed to load course %s" % course_dir) msg = "Failed to load course '%s'" % course_dir
log.exception(msg)
error_handler(msg)
def load_course(self, course_dir): def load_course(self, course_dir):
""" """
Load a course into this module store Load a course into this module store
course_path: Course directory name course_path: Course directory name
returns a CourseDescriptor for the course
""" """
with open(self.data_dir / course_dir / "course.xml") as course_file: with open(self.data_dir / course_dir / "course.xml") as course_file:
# TODO (cpennington): Remove this once all fall 2012 courses have been imported # VS[compat]
# into the cms from xml # TODO (cpennington): Remove this once all fall 2012 courses have
# been imported into the cms from xml
course_file = StringIO(clean_out_mako_templating(course_file.read())) course_file = StringIO(clean_out_mako_templating(course_file.read()))
course_data = etree.parse(course_file).getroot() course_data = etree.parse(course_file).getroot()
org = course_data.get('org') org = course_data.get('org')
if org is None: if org is None:
log.error( log.error("No 'org' attribute set for course in {dir}. "
"No 'org' attribute set for course in {dir}. Using default 'edx'".format( "Using default 'edx'".format(dir=course_dir))
dir=course_dir))
org = 'edx' org = 'edx'
course = course_data.get('course') course = course_data.get('course')
if course is None: if course is None:
log.error( log.error("No 'course' attribute set for course in {dir}."
"No 'course' attribute set for course in {dir}. Using default '{default}'".format( " Using default '{default}'".format(
dir=course_dir, dir=course_dir,
default=course_dir default=course_dir
)) ))
course = course_dir course = course_dir
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): system = ImportSystem(self, org, course, course_dir,
def __init__(self, xmlstore): self.error_handler)
"""
xmlstore: the XMLModuleStore to store the loaded modules in
"""
self.unnamed_modules = 0
self.used_slugs = set()
def process_xml(xml):
try:
# TODO (cpennington): Remove this once all fall 2012 courses
# have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml)
except:
log.exception("Unable to parse xml: {xml}".format(xml=xml))
raise
if xml_data.get('slug') is None:
if xml_data.get('name'):
slug = Location.clean(xml_data.get('name'))
else:
self.unnamed_modules += 1
slug = '{tag}_{count}'.format(tag=xml_data.tag,
count=self.unnamed_modules)
if slug in self.used_slugs:
self.unnamed_modules += 1
slug = '{slug}_{count}'.format(slug=slug,
count=self.unnamed_modules)
self.used_slugs.add(slug)
# log.debug('-> slug=%s' % slug)
xml_data.set('slug', slug)
module = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, org,
course, xmlstore.default_class)
log.debug('==> importing module location %s' % repr(module.location))
module.metadata['data_dir'] = course_dir
xmlstore.modules[module.location] = module
if xmlstore.eager: course_descriptor = system.process_xml(etree.tostring(course_data))
module.get_children()
return module
system_kwargs = dict(
render_template=lambda: '',
load_item=xmlstore.get_item,
resources_fs=OSFS(xmlstore.data_dir / course_dir),
process_xml=process_xml
)
MakoDescriptorSystem.__init__(self, **system_kwargs)
XMLParsingSystem.__init__(self, **system_kwargs)
course_descriptor = ImportSystem(self).process_xml(etree.tostring(course_data))
log.debug('========> Done with course import') log.debug('========> Done with course import')
return course_descriptor return course_descriptor
...@@ -169,7 +203,9 @@ class XMLModuleStore(ModuleStore): ...@@ -169,7 +203,9 @@ class XMLModuleStore(ModuleStore):
If any segment of the location is None except revision, raises If any segment of the location is None except revision, raises
xmodule.modulestore.exceptions.InsufficientSpecificationError xmodule.modulestore.exceptions.InsufficientSpecificationError
If no object is found at that location, raises xmodule.modulestore.exceptions.ItemNotFoundError
If no object is found at that location, raises
xmodule.modulestore.exceptions.ItemNotFoundError
location: Something that can be passed to Location location: Something that can be passed to Location
""" """
......
...@@ -31,8 +31,11 @@ class RawDescriptor(MakoModuleDescriptor, XmlDescriptor): ...@@ -31,8 +31,11 @@ class RawDescriptor(MakoModuleDescriptor, XmlDescriptor):
except etree.XMLSyntaxError as err: except etree.XMLSyntaxError as err:
lines = self.definition['data'].split('\n') lines = self.definition['data'].split('\n')
line, offset = err.position line, offset = err.position
log.exception("Unable to create xml for problem {loc}. Context: '{context}'".format( msg = ("Unable to create xml for problem {loc}. "
"Context: '{context}'".format(
context=lines[line-1][offset - 40:offset + 40], context=lines[line-1][offset - 40:offset + 40],
loc=self.location loc=self.location))
)) log.exception(msg)
self.system.error_handler(msg)
# no workaround possible, so just re-raise
raise raise
...@@ -203,13 +203,16 @@ class XModule(HTMLSnippet): ...@@ -203,13 +203,16 @@ class XModule(HTMLSnippet):
Return module instances for all the children of this module. Return module instances for all the children of this module.
''' '''
if self._loaded_children is None: if self._loaded_children is None:
self._loaded_children = [self.system.get_module(child) for child in self.definition.get('children', [])] self._loaded_children = [
self.system.get_module(child)
for child in self.definition.get('children', [])]
return self._loaded_children return self._loaded_children
def get_display_items(self): def get_display_items(self):
''' '''
Returns a list of descendent module instances that will display immediately Returns a list of descendent module instances that will display
inside this module immediately inside this module
''' '''
items = [] items = []
for child in self.get_children(): for child in self.get_children():
...@@ -219,8 +222,8 @@ class XModule(HTMLSnippet): ...@@ -219,8 +222,8 @@ class XModule(HTMLSnippet):
def displayable_items(self): def displayable_items(self):
''' '''
Returns list of displayable modules contained by this module. If this module Returns list of displayable modules contained by this module. If this
is visible, should return [self] module is visible, should return [self]
''' '''
return [self] return [self]
...@@ -439,16 +442,19 @@ class XModuleDescriptor(Plugin, HTMLSnippet): ...@@ -439,16 +442,19 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
on the contents of xml_data. on the contents of xml_data.
xml_data must be a string containing valid xml xml_data must be a string containing valid xml
system is an XMLParsingSystem system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers org and course are optional strings that will be used in the generated
modules url identifiers
""" """
class_ = XModuleDescriptor.load_class( class_ = XModuleDescriptor.load_class(
etree.fromstring(xml_data).tag, etree.fromstring(xml_data).tag,
default_class default_class
) )
# leave next line in code, commented out - useful for low-level debugging # leave next line, commented out - useful for low-level debugging
# log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (etree.fromstring(xml_data).tag,class_)) log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (
etree.fromstring(xml_data).tag,class_))
return class_.from_xml(xml_data, system, org, course) return class_.from_xml(xml_data, system, org, course)
@classmethod @classmethod
...@@ -457,35 +463,42 @@ class XModuleDescriptor(Plugin, HTMLSnippet): ...@@ -457,35 +463,42 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
Creates an instance of this descriptor from the supplied xml_data. Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for xml_data: A string of xml that will be translated into data and children
this module for this module
system is an XMLParsingSystem system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers org and course are optional strings that will be used in the generated
module's url identifiers
""" """
raise NotImplementedError('Modules must implement from_xml to be parsable from xml') raise NotImplementedError(
'Modules must implement from_xml to be parsable from xml')
def export_to_xml(self, resource_fs): def export_to_xml(self, resource_fs):
""" """
Returns an xml string representing this module, and all modules underneath it. Returns an xml string representing this module, and all modules
May also write required resources out to resource_fs underneath it. May also write required resources out to resource_fs
Assumes that modules have single parantage (that no module appears twice in the same course), Assumes that modules have single parentage (that no module appears twice
and that it is thus safe to nest modules as xml children as appropriate. in the same course), and that it is thus safe to nest modules as xml
children as appropriate.
The returned XML should be able to be parsed back into an identical XModuleDescriptor The returned XML should be able to be parsed back into an identical
using the from_xml method with the same system, org, and course XModuleDescriptor using the from_xml method with the same system, org,
and course
""" """
raise NotImplementedError('Modules must implement export_to_xml to enable xml export') raise NotImplementedError(
'Modules must implement export_to_xml to enable xml export')
# =============================== Testing =================================== # =============================== Testing ==================================
def get_sample_state(self): def get_sample_state(self):
""" """
Return a list of tuples of instance_state, shared_state. Each tuple defines a sample case for this module Return a list of tuples of instance_state, shared_state. Each tuple
defines a sample case for this module
""" """
return [('{}', '{}')] return [('{}', '{}')]
# =============================== BUILTIN METHODS =========================== # =============================== BUILTIN METHODS ==========================
def __eq__(self, other): def __eq__(self, other):
eq = (self.__class__ == other.__class__ and eq = (self.__class__ == other.__class__ and
all(getattr(self, attr, None) == getattr(other, attr, None) all(getattr(self, attr, None) == getattr(other, attr, None)
...@@ -493,38 +506,76 @@ class XModuleDescriptor(Plugin, HTMLSnippet): ...@@ -493,38 +506,76 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
if not eq: if not eq:
for attr in self.equality_attributes: for attr in self.equality_attributes:
print getattr(self, attr, None), getattr(other, attr, None), getattr(self, attr, None) == getattr(other, attr, None) print(getattr(self, attr, None),
getattr(other, attr, None),
getattr(self, attr, None) == getattr(other, attr, None))
return eq return eq
def __repr__(self): def __repr__(self):
return "{class_}({system!r}, {definition!r}, location={location!r}, metadata={metadata!r})".format( return ("{class_}({system!r}, {definition!r}, location={location!r},"
" metadata={metadata!r})".format(
class_=self.__class__.__name__, class_=self.__class__.__name__,
system=self.system, system=self.system,
definition=self.definition, definition=self.definition,
location=self.location, location=self.location,
metadata=self.metadata metadata=self.metadata
) ))
class DescriptorSystem(object): class DescriptorSystem(object):
def __init__(self, load_item, resources_fs, **kwargs): def __init__(self, load_item, resources_fs,
error_handler,
**kwargs):
""" """
load_item: Takes a Location and returns an XModuleDescriptor load_item: Takes a Location and returns an XModuleDescriptor
resources_fs: A Filesystem object that contains all of the resources_fs: A Filesystem object that contains all of the
resources needed for the course resources needed for the course
error_handler: A hook for handling errors in loading the descriptor.
Must be a function of (error_msg, exc_info=None).
See errorhandlers.py for some simple ones.
Patterns for using the error handler:
try:
x = access_some_resource()
check_some_format(x)
except SomeProblem:
msg = 'Grommet {0} is broken'.format(x)
log.exception(msg) # don't rely on handler to log
self.system.error_handler(msg)
# if we get here, work around if possible
raise # if no way to work around
OR
return 'Oops, couldn't load grommet'
OR, if not in an exception context:
if not check_something(thingy):
msg = "thingy {0} is broken".format(thingy)
log.critical(msg)
error_handler(msg)
# if we get here, work around
pass # e.g. if no workaround needed
""" """
self.load_item = load_item self.load_item = load_item
self.resources_fs = resources_fs self.resources_fs = resources_fs
self.error_handler = error_handler
class XMLParsingSystem(DescriptorSystem): class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, resources_fs, process_xml, **kwargs): def __init__(self, load_item, resources_fs, process_xml, **kwargs):
""" """
process_xml: Takes an xml string, and returns the the XModuleDescriptor created from that xml load_item: Takes a Location and returns an XModuleDescriptor
process_xml: Takes an xml string, and returns a XModuleDescriptor
created from that xml
""" """
DescriptorSystem.__init__(self, load_item, resources_fs) DescriptorSystem.__init__(self, load_item, resources_fs, **kwargs)
self.process_xml = process_xml self.process_xml = process_xml
......
...@@ -187,7 +187,10 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -187,7 +187,10 @@ class XmlDescriptor(XModuleDescriptor):
with system.resources_fs.open(filepath) as file: with system.resources_fs.open(filepath) as file:
definition_xml = cls.file_to_xml(file) definition_xml = cls.file_to_xml(file)
except (ResourceNotFoundError, etree.XMLSyntaxError): except (ResourceNotFoundError, etree.XMLSyntaxError):
log.exception('Unable to load file contents at path %s' % filepath) msg = 'Unable to load file contents at path %s' % filepath
log.exception(msg)
system.error_handler(msg)
# if error_handler didn't reraise, work around it.
return {'data': 'Error loading file contents at path %s' % filepath} return {'data': 'Error loading file contents at path %s' % filepath}
cls.clean_metadata_from_xml(definition_xml) cls.clean_metadata_from_xml(definition_xml)
...@@ -206,20 +209,24 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -206,20 +209,24 @@ class XmlDescriptor(XModuleDescriptor):
@classmethod @classmethod
def _format_filepath(cls, category, name): def _format_filepath(cls, category, name):
return u'{category}/{name}.{ext}'.format(category=category, name=name, ext=cls.filename_extension) return u'{category}/{name}.{ext}'.format(category=category,
name=name,
ext=cls.filename_extension)
def export_to_xml(self, resource_fs): def export_to_xml(self, resource_fs):
""" """
Returns an xml string representing this module, and all modules underneath it. Returns an xml string representing this module, and all modules
May also write required resources out to resource_fs underneath it. May also write required resources out to resource_fs
Assumes that modules have single parantage (that no module appears twice in the same course), Assumes that modules have single parentage (that no module appears twice
and that it is thus safe to nest modules as xml children as appropriate. in the same course), and that it is thus safe to nest modules as xml
children as appropriate.
The returned XML should be able to be parsed back into an identical XModuleDescriptor The returned XML should be able to be parsed back into an identical
using the from_xml method with the same system, org, and course XModuleDescriptor using the from_xml method with the same system, org,
and course
resource_fs is a pyfilesystem office (from the fs package) resource_fs is a pyfilesystem object (from the fs package)
""" """
xml_object = self.definition_to_xml(resource_fs) xml_object = self.definition_to_xml(resource_fs)
self.__class__.clean_metadata_from_xml(xml_object) self.__class__.clean_metadata_from_xml(xml_object)
...@@ -244,7 +251,8 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -244,7 +251,8 @@ class XmlDescriptor(XModuleDescriptor):
attr_map = self.xml_attribute_map.get(attr, AttrMap(attr)) attr_map = self.xml_attribute_map.get(attr, AttrMap(attr))
metadata_key = attr_map.metadata_key metadata_key = attr_map.metadata_key
if metadata_key not in self.metadata or metadata_key in self._inherited_metadata: if (metadata_key not in self.metadata or
metadata_key in self._inherited_metadata):
continue continue
val = attr_map.from_metadata(self.metadata[metadata_key]) val = attr_map.from_metadata(self.metadata[metadata_key])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment