Commit 5f84e619 by Victor Shnayder

Add hook for error handling during xml import

* add error_handler member to DescriptorSystem
* call it where import errors happen
* also includes some refactoring in xml.py
* some more line length and docstring cleanups
parent 63f34f2e
......@@ -29,6 +29,7 @@ def process_includes(fn):
etree.tostring(next_include, pretty_print=True))
msg += 'Cannot find file %s in %s' % (file, dir)
log.exception(msg)
system.error_handler(msg)
raise
try:
# read in and convert to XML
......@@ -38,6 +39,7 @@ def process_includes(fn):
etree.tostring(next_include, pretty_print=True))
msg += 'Cannot parse XML in %s' % (file)
log.exception(msg)
system.error_handler(msg)
raise
# insert new XML into tree in place of inlcude
parent = next_include.getparent()
......
import sys
def strict_error_handler(msg, exc_info=None):
'''
Do not let errors pass. If exc_info is not None, ignore msg, and just
re-raise. Otherwise, check if we are in an exception-handling context.
If so, re-raise. Otherwise, raise Exception(msg).
Meant for use in validation, where any errors should trap.
'''
if exc_info is not None:
raise exc_info[0], exc_info[1], exc_info[2]
# Check if there is an exception being handled somewhere up the stack
if sys.exc_info() != (None, None, None):
raise
raise Exception(msg)
def ignore_errors_handler(msg, exc_info=None):
'''Ignore all errors, relying on the caller to workaround.
Meant for use in the LMS, where an error in one part of the course
shouldn't bring down the whole system'''
pass
......@@ -3,6 +3,7 @@ from fs.osfs import OSFS
from importlib import import_module
from lxml import etree
from path import path
from xmodule.errorhandlers import strict_error_handler
from xmodule.x_module import XModuleDescriptor, XMLParsingSystem
from xmodule.mako_module import MakoDescriptorSystem
from cStringIO import StringIO
......@@ -18,32 +19,112 @@ etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False,
log = logging.getLogger('mitx.' + __name__)
# TODO (cpennington): Remove this once all fall 2012 courses have been imported into the cms from xml
# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses have been imported
# into the cms from xml
def clean_out_mako_templating(xml_string):
xml_string = xml_string.replace('%include', 'include')
xml_string = re.sub("(?m)^\s*%.*$", '', xml_string)
return xml_string
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore, org, course, course_dir,
error_handler):
"""
A class that handles loading from xml. Does some munging to ensure that
all elements have unique slugs.
xmlstore: the XMLModuleStore to store the loaded modules in
"""
self.unnamed_modules = 0
self.used_slugs = set()
def process_xml(xml):
try:
# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses
# have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml)
except:
log.exception("Unable to parse xml: {xml}".format(xml=xml))
raise
# TODO (vshnayder): is the slug munging permanent, or also intended
# to be taken out?
if xml_data.get('slug') is None:
if xml_data.get('name'):
slug = Location.clean(xml_data.get('name'))
else:
self.unnamed_modules += 1
slug = '{tag}_{count}'.format(tag=xml_data.tag,
count=self.unnamed_modules)
if slug in self.used_slugs:
self.unnamed_modules += 1
slug = '{slug}_{count}'.format(slug=slug,
count=self.unnamed_modules)
self.used_slugs.add(slug)
# log.debug('-> slug=%s' % slug)
xml_data.set('slug', slug)
module = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, org,
course, xmlstore.default_class)
log.debug('==> importing module location %s' % repr(module.location))
module.metadata['data_dir'] = course_dir
xmlstore.modules[module.location] = module
if xmlstore.eager:
module.get_children()
return module
system_kwargs = dict(
render_template=lambda: '',
load_item=xmlstore.get_item,
resources_fs=OSFS(xmlstore.data_dir / course_dir),
process_xml=process_xml,
error_handler=error_handler,
)
MakoDescriptorSystem.__init__(self, **system_kwargs)
XMLParsingSystem.__init__(self, **system_kwargs)
class XMLModuleStore(ModuleStore):
"""
An XML backed ModuleStore
"""
def __init__(self, data_dir, default_class=None, eager=False, course_dirs=None):
def __init__(self, data_dir, default_class=None, eager=False,
course_dirs=None,
error_handler=strict_error_handler):
"""
Initialize an XMLModuleStore from data_dir
data_dir: path to data directory containing the course directories
default_class: dot-separated string defining the default descriptor class to use if non is specified in entry_points
eager: If true, load the modules children immediately to force the entire course tree to be parsed
course_dirs: If specified, the list of course_dirs to load. Otherwise, load
all course dirs
default_class: dot-separated string defining the default descriptor
class to use if none is specified in entry_points
eager: If true, load the modules children immediately to force the
entire course tree to be parsed
course_dirs: If specified, the list of course_dirs to load. Otherwise,
load all course dirs
error_handler: The error handler used here and in the underlying
DescriptorSystem. By default, raise exceptions for all errors.
See the comments in x_module.py:DescriptorSystem
"""
self.eager = eager
self.data_dir = path(data_dir)
self.modules = {} # location -> XModuleDescriptor
self.courses = {} # course_dir -> XModuleDescriptor for the course
self.error_handler = error_handler
if default_class is None:
self.default_class = None
......@@ -54,110 +135,63 @@ class XMLModuleStore(ModuleStore):
self.default_class = class_
# TODO (cpennington): We need a better way of selecting specific sets of debug messages to enable. These were drowning out important messages
#log.debug('XMLModuleStore: eager=%s, data_dir = %s' % (eager, self.data_dir))
#log.debug('default_class = %s' % self.default_class)
for course_dir in os.listdir(self.data_dir):
if course_dirs is not None and course_dir not in course_dirs:
continue
log.debug('XMLModuleStore: eager=%s, data_dir = %s' % (eager, self.data_dir))
log.debug('default_class = %s' % self.default_class)
if not os.path.exists(self.data_dir / course_dir / "course.xml"):
continue
# If we are specifically asked for missing courses, that should
# be an error. If we are asked for "all" courses, find the ones
# that have a course.xml
if course_dirs is None:
course_dirs = [d for d in os.listdir(self.data_dir) if
os.path.exists(self.data_dir / d / "course.xml")]
for course_dir in course_dirs:
try:
course_descriptor = self.load_course(course_dir)
self.courses[course_dir] = course_descriptor
except:
log.exception("Failed to load course %s" % course_dir)
msg = "Failed to load course '%s'" % course_dir
log.exception(msg)
error_handler(msg)
def load_course(self, course_dir):
"""
Load a course into this module store
course_path: Course directory name
returns a CourseDescriptor for the course
"""
with open(self.data_dir / course_dir / "course.xml") as course_file:
# TODO (cpennington): Remove this once all fall 2012 courses have been imported
# into the cms from xml
# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses have
# been imported into the cms from xml
course_file = StringIO(clean_out_mako_templating(course_file.read()))
course_data = etree.parse(course_file).getroot()
org = course_data.get('org')
if org is None:
log.error(
"No 'org' attribute set for course in {dir}. Using default 'edx'".format(
dir=course_dir))
log.error("No 'org' attribute set for course in {dir}. "
"Using default 'edx'".format(dir=course_dir))
org = 'edx'
course = course_data.get('course')
if course is None:
log.error(
"No 'course' attribute set for course in {dir}. Using default '{default}'".format(
dir=course_dir,
default=course_dir
))
log.error("No 'course' attribute set for course in {dir}."
" Using default '{default}'".format(
dir=course_dir,
default=course_dir
))
course = course_dir
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore):
"""
xmlstore: the XMLModuleStore to store the loaded modules in
"""
self.unnamed_modules = 0
self.used_slugs = set()
def process_xml(xml):
try:
# TODO (cpennington): Remove this once all fall 2012 courses
# have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml)
except:
log.exception("Unable to parse xml: {xml}".format(xml=xml))
raise
if xml_data.get('slug') is None:
if xml_data.get('name'):
slug = Location.clean(xml_data.get('name'))
else:
self.unnamed_modules += 1
slug = '{tag}_{count}'.format(tag=xml_data.tag,
count=self.unnamed_modules)
if slug in self.used_slugs:
self.unnamed_modules += 1
slug = '{slug}_{count}'.format(slug=slug,
count=self.unnamed_modules)
self.used_slugs.add(slug)
# log.debug('-> slug=%s' % slug)
xml_data.set('slug', slug)
module = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, org,
course, xmlstore.default_class)
log.debug('==> importing module location %s' % repr(module.location))
module.metadata['data_dir'] = course_dir
xmlstore.modules[module.location] = module
if xmlstore.eager:
module.get_children()
return module
system_kwargs = dict(
render_template=lambda: '',
load_item=xmlstore.get_item,
resources_fs=OSFS(xmlstore.data_dir / course_dir),
process_xml=process_xml
)
MakoDescriptorSystem.__init__(self, **system_kwargs)
XMLParsingSystem.__init__(self, **system_kwargs)
course_descriptor = ImportSystem(self).process_xml(etree.tostring(course_data))
system = ImportSystem(self, org, course, course_dir,
self.error_handler)
course_descriptor = system.process_xml(etree.tostring(course_data))
log.debug('========> Done with course import')
return course_descriptor
......@@ -169,7 +203,9 @@ class XMLModuleStore(ModuleStore):
If any segment of the location is None except revision, raises
xmodule.modulestore.exceptions.InsufficientSpecificationError
If no object is found at that location, raises xmodule.modulestore.exceptions.ItemNotFoundError
If no object is found at that location, raises
xmodule.modulestore.exceptions.ItemNotFoundError
location: Something that can be passed to Location
"""
......
......@@ -31,8 +31,11 @@ class RawDescriptor(MakoModuleDescriptor, XmlDescriptor):
except etree.XMLSyntaxError as err:
lines = self.definition['data'].split('\n')
line, offset = err.position
log.exception("Unable to create xml for problem {loc}. Context: '{context}'".format(
context=lines[line-1][offset - 40:offset + 40],
loc=self.location
))
msg = ("Unable to create xml for problem {loc}. "
"Context: '{context}'".format(
context=lines[line-1][offset - 40:offset + 40],
loc=self.location))
log.exception(msg)
self.system.error_handler(msg)
# no workaround possible, so just re-raise
raise
......@@ -203,13 +203,16 @@ class XModule(HTMLSnippet):
Return module instances for all the children of this module.
'''
if self._loaded_children is None:
self._loaded_children = [self.system.get_module(child) for child in self.definition.get('children', [])]
self._loaded_children = [
self.system.get_module(child)
for child in self.definition.get('children', [])]
return self._loaded_children
def get_display_items(self):
'''
Returns a list of descendent module instances that will display immediately
inside this module
Returns a list of descendent module instances that will display
immediately inside this module
'''
items = []
for child in self.get_children():
......@@ -219,8 +222,8 @@ class XModule(HTMLSnippet):
def displayable_items(self):
'''
Returns list of displayable modules contained by this module. If this module
is visible, should return [self]
Returns list of displayable modules contained by this module. If this
module is visible, should return [self]
'''
return [self]
......@@ -439,16 +442,19 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
on the contents of xml_data.
xml_data must be a string containing valid xml
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
org and course are optional strings that will be used in the generated
modules url identifiers
"""
class_ = XModuleDescriptor.load_class(
etree.fromstring(xml_data).tag,
default_class
)
# leave next line in code, commented out - useful for low-level debugging
# log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (etree.fromstring(xml_data).tag,class_))
# leave next line, commented out - useful for low-level debugging
log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (
etree.fromstring(xml_data).tag,class_))
return class_.from_xml(xml_data, system, org, course)
@classmethod
......@@ -457,35 +463,42 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
xml_data: A string of xml that will be translated into data and children
for this module
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
org and course are optional strings that will be used in the generated
module's url identifiers
"""
raise NotImplementedError('Modules must implement from_xml to be parsable from xml')
raise NotImplementedError(
'Modules must implement from_xml to be parsable from xml')
def export_to_xml(self, resource_fs):
"""
Returns an xml string representing this module, and all modules underneath it.
May also write required resources out to resource_fs
Returns an xml string representing this module, and all modules
underneath it. May also write required resources out to resource_fs
Assumes that modules have single parantage (that no module appears twice in the same course),
and that it is thus safe to nest modules as xml children as appropriate.
Assumes that modules have single parentage (that no module appears twice
in the same course), and that it is thus safe to nest modules as xml
children as appropriate.
The returned XML should be able to be parsed back into an identical XModuleDescriptor
using the from_xml method with the same system, org, and course
The returned XML should be able to be parsed back into an identical
XModuleDescriptor using the from_xml method with the same system, org,
and course
"""
raise NotImplementedError('Modules must implement export_to_xml to enable xml export')
raise NotImplementedError(
'Modules must implement export_to_xml to enable xml export')
# =============================== Testing ===================================
# =============================== Testing ==================================
def get_sample_state(self):
"""
Return a list of tuples of instance_state, shared_state. Each tuple defines a sample case for this module
Return a list of tuples of instance_state, shared_state. Each tuple
defines a sample case for this module
"""
return [('{}', '{}')]
# =============================== BUILTIN METHODS ===========================
# =============================== BUILTIN METHODS ==========================
def __eq__(self, other):
eq = (self.__class__ == other.__class__ and
all(getattr(self, attr, None) == getattr(other, attr, None)
......@@ -493,38 +506,76 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
if not eq:
for attr in self.equality_attributes:
print getattr(self, attr, None), getattr(other, attr, None), getattr(self, attr, None) == getattr(other, attr, None)
print(getattr(self, attr, None),
getattr(other, attr, None),
getattr(self, attr, None) == getattr(other, attr, None))
return eq
def __repr__(self):
return "{class_}({system!r}, {definition!r}, location={location!r}, metadata={metadata!r})".format(
return ("{class_}({system!r}, {definition!r}, location={location!r},"
" metadata={metadata!r})".format(
class_=self.__class__.__name__,
system=self.system,
definition=self.definition,
location=self.location,
metadata=self.metadata
)
))
class DescriptorSystem(object):
def __init__(self, load_item, resources_fs, **kwargs):
def __init__(self, load_item, resources_fs,
error_handler,
**kwargs):
"""
load_item: Takes a Location and returns an XModuleDescriptor
resources_fs: A Filesystem object that contains all of the
resources needed for the course
error_handler: A hook for handling errors in loading the descriptor.
Must be a function of (error_msg, exc_info=None).
See errorhandlers.py for some simple ones.
Patterns for using the error handler:
try:
x = access_some_resource()
check_some_format(x)
except SomeProblem:
msg = 'Grommet {0} is broken'.format(x)
log.exception(msg) # don't rely on handler to log
self.system.error_handler(msg)
# if we get here, work around if possible
raise # if no way to work around
OR
return 'Oops, couldn't load grommet'
OR, if not in an exception context:
if not check_something(thingy):
msg = "thingy {0} is broken".format(thingy)
log.critical(msg)
error_handler(msg)
# if we get here, work around
pass # e.g. if no workaround needed
"""
self.load_item = load_item
self.resources_fs = resources_fs
self.error_handler = error_handler
class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, resources_fs, process_xml, **kwargs):
"""
process_xml: Takes an xml string, and returns the the XModuleDescriptor created from that xml
load_item: Takes a Location and returns an XModuleDescriptor
process_xml: Takes an xml string, and returns a XModuleDescriptor
created from that xml
"""
DescriptorSystem.__init__(self, load_item, resources_fs)
DescriptorSystem.__init__(self, load_item, resources_fs, **kwargs)
self.process_xml = process_xml
......
......@@ -187,7 +187,10 @@ class XmlDescriptor(XModuleDescriptor):
with system.resources_fs.open(filepath) as file:
definition_xml = cls.file_to_xml(file)
except (ResourceNotFoundError, etree.XMLSyntaxError):
log.exception('Unable to load file contents at path %s' % filepath)
msg = 'Unable to load file contents at path %s' % filepath
log.exception(msg)
system.error_handler(msg)
# if error_handler didn't reraise, work around it.
return {'data': 'Error loading file contents at path %s' % filepath}
cls.clean_metadata_from_xml(definition_xml)
......@@ -206,20 +209,24 @@ class XmlDescriptor(XModuleDescriptor):
@classmethod
def _format_filepath(cls, category, name):
return u'{category}/{name}.{ext}'.format(category=category, name=name, ext=cls.filename_extension)
return u'{category}/{name}.{ext}'.format(category=category,
name=name,
ext=cls.filename_extension)
def export_to_xml(self, resource_fs):
"""
Returns an xml string representing this module, and all modules underneath it.
May also write required resources out to resource_fs
Returns an xml string representing this module, and all modules
underneath it. May also write required resources out to resource_fs
Assumes that modules have single parantage (that no module appears twice in the same course),
and that it is thus safe to nest modules as xml children as appropriate.
Assumes that modules have single parentage (that no module appears twice
in the same course), and that it is thus safe to nest modules as xml
children as appropriate.
The returned XML should be able to be parsed back into an identical XModuleDescriptor
using the from_xml method with the same system, org, and course
The returned XML should be able to be parsed back into an identical
XModuleDescriptor using the from_xml method with the same system, org,
and course
resource_fs is a pyfilesystem office (from the fs package)
resource_fs is a pyfilesystem object (from the fs package)
"""
xml_object = self.definition_to_xml(resource_fs)
self.__class__.clean_metadata_from_xml(xml_object)
......@@ -244,7 +251,8 @@ class XmlDescriptor(XModuleDescriptor):
attr_map = self.xml_attribute_map.get(attr, AttrMap(attr))
metadata_key = attr_map.metadata_key
if metadata_key not in self.metadata or metadata_key in self._inherited_metadata:
if (metadata_key not in self.metadata or
metadata_key in self._inherited_metadata):
continue
val = attr_map.from_metadata(self.metadata[metadata_key])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment