Commit be45209d by Calen Pennington

Make imports into the CMS ignore broken modules, while still loading bad xml as…

Make imports into the CMS ignore broken modules, while still loading bad xml as error modules in the LMS
parent 89fba018
......@@ -26,4 +26,4 @@ class Command(BaseCommand):
print "Importing. Data_dir={data}, course_dirs={courses}".format(
data=data_dir,
courses=course_dirs)
import_from_xml(modulestore(), data_dir, course_dirs)
import_from_xml(modulestore(), data_dir, course_dirs, load_error_modules=False)
......@@ -195,6 +195,7 @@ STATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'
# prep it for use in pipeline js
from xmodule.x_module import XModuleDescriptor
from xmodule.raw_module import RawDescriptor
from xmodule.error_module import ErrorDescriptor
js_file_dir = PROJECT_ROOT / "static" / "coffee" / "module"
css_file_dir = PROJECT_ROOT / "static" / "sass" / "module"
module_styles_path = css_file_dir / "_module-styles.scss"
......@@ -210,7 +211,7 @@ for dir_ in (js_file_dir, css_file_dir):
js_fragments = set()
css_fragments = defaultdict(set)
for _, descriptor in XModuleDescriptor.load_classes() + [(None, RawDescriptor)]:
for _, descriptor in XModuleDescriptor.load_classes() + [(None, RawDescriptor), (None, ErrorDescriptor)]:
descriptor_js = descriptor.get_javascript()
module_js = descriptor.module_class.get_javascript()
......
import json
import random
import logging
from lxml import etree
from xmodule.x_module import XModule
......@@ -10,6 +11,9 @@ from xmodule.exceptions import InvalidDefinitionError
DEFAULT = "_DEFAULT_GROUP"
log = logging.getLogger(__name__)
def group_from_value(groups, v):
"""
Given group: (('a', 0.3), ('b', 0.4), ('c', 0.3)) and random value v
......@@ -127,10 +131,13 @@ class ABTestDescriptor(RawDescriptor, XmlDescriptor):
name = group.get('name')
definition['data']['group_portions'][name] = float(group.get('portion', 0))
child_content_urls = [
system.process_xml(etree.tostring(child)).location.url()
for child in group
]
child_content_urls = []
for child in group:
try:
child_content_urls.append(system.process_xml(etree.tostring(child)).location.url())
except:
log.exception("Unable to load child when parsing ABTest. Continuing...")
continue
definition['data']['group_content'][name] = child_content_urls
definition['children'].extend(child_content_urls)
......
......@@ -3,16 +3,16 @@ import json
import logging
import os
import re
import sys
from collections import defaultdict
from cStringIO import StringIO
from fs.osfs import OSFS
from importlib import import_module
from lxml import etree
from lxml.html import HtmlComment
from path import path
from xmodule.errortracker import ErrorLog, make_error_tracker
from xmodule.errortracker import make_error_tracker, exc_info_to_str
from xmodule.course_module import CourseDescriptor
from xmodule.mako_module import MakoDescriptorSystem
from xmodule.x_module import XModuleDescriptor, XMLParsingSystem
......@@ -27,6 +27,7 @@ etree.set_default_parser(edx_xml_parser)
log = logging.getLogger('mitx.' + __name__)
# VS[compat]
# TODO (cpennington): Remove this once all fall 2012 courses have been imported
# into the cms from xml
......@@ -35,9 +36,11 @@ def clean_out_mako_templating(xml_string):
xml_string = re.sub("(?m)^\s*%.*$", '', xml_string)
return xml_string
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore, course_id, course_dir,
policy, error_tracker, parent_tracker, **kwargs):
policy, error_tracker, parent_tracker,
load_error_modules=True, **kwargs):
"""
A class that handles loading from xml. Does some munging to ensure that
all elements have unique slugs.
......@@ -47,6 +50,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
self.unnamed = defaultdict(int) # category -> num of new url_names for that category
self.used_names = defaultdict(set) # category -> set of used url_names
self.org, self.course, self.url_name = course_id.split('/')
self.load_error_modules = load_error_modules
def process_xml(xml):
"""Takes an xml string, and returns a XModuleDescriptor created from
......@@ -93,7 +97,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
because we want it to be lazy."""
if looks_like_fallback(orig_name):
# We're about to re-hash, in case something changed, so get rid of the tag_ and hash
orig_name = orig_name[len(tag)+1:-12]
orig_name = orig_name[len(tag) + 1:-12]
# append the hash of the content--the first 12 bytes should be plenty.
orig_name = "_" + orig_name if orig_name not in (None, "") else ""
return tag + orig_name + "_" + hashlib.sha1(xml).hexdigest()[:12]
......@@ -144,16 +148,40 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
# have been imported into the cms from xml
xml = clean_out_mako_templating(xml)
xml_data = etree.fromstring(xml)
make_name_unique(xml_data)
descriptor = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, self.org,
self.course, xmlstore.default_class)
except Exception as err:
log.warning("Unable to parse xml: {err}, xml: {xml}".format(
err=str(err), xml=xml))
raise
print err, self.load_error_modules
if not self.load_error_modules:
raise
make_name_unique(xml_data)
# Didn't load properly. Fall back on loading as an error
# descriptor. This should never error due to formatting.
# Put import here to avoid circular import errors
from xmodule.error_module import ErrorDescriptor
msg = "Error loading from xml. " + str(err)[:200]
log.warning(msg)
# Normally, we don't want lots of exception traces in our logs from common
# content problems. But if you're debugging the xml loading code itself,
# uncomment the next line.
# log.exception(msg)
self.error_tracker(msg)
err_msg = msg + "\n" + exc_info_to_str(sys.exc_info())
descriptor = ErrorDescriptor.from_xml(
xml,
self,
self.org,
self.course,
err_msg
)
descriptor = XModuleDescriptor.load_from_xml(
etree.tostring(xml_data), self, self.org,
self.course, xmlstore.default_class)
descriptor.metadata['data_dir'] = course_dir
xmlstore.modules[course_id][descriptor.location] = descriptor
......@@ -219,7 +247,7 @@ class XMLModuleStore(ModuleStoreBase):
"""
An XML backed ModuleStore
"""
def __init__(self, data_dir, default_class=None, course_dirs=None):
def __init__(self, data_dir, default_class=None, course_dirs=None, load_error_modules=True):
"""
Initialize an XMLModuleStore from data_dir
......@@ -238,6 +266,8 @@ class XMLModuleStore(ModuleStoreBase):
self.courses = {} # course_dir -> XModuleDescriptor for the course
self.errored_courses = {} # course_dir -> errorlog, for dirs that failed to load
self.load_error_modules = load_error_modules
if default_class is None:
self.default_class = None
else:
......@@ -396,7 +426,15 @@ class XMLModuleStore(ModuleStoreBase):
course_id = CourseDescriptor.make_id(org, course, url_name)
system = ImportSystem(self, course_id, course_dir, policy, tracker, self.parent_tracker)
system = ImportSystem(
self,
course_id,
course_dir,
policy,
tracker,
self.parent_tracker,
self.load_error_modules,
)
course_descriptor = system.process_xml(etree.tostring(course_data))
......
......@@ -7,7 +7,8 @@ log = logging.getLogger(__name__)
def import_from_xml(store, data_dir, course_dirs=None,
default_class='xmodule.raw_module.RawDescriptor'):
default_class='xmodule.raw_module.RawDescriptor',
load_error_modules=True):
"""
Import the specified xml data_dir into the "store" modulestore,
using org and course as the location org and course.
......@@ -19,7 +20,8 @@ def import_from_xml(store, data_dir, course_dirs=None,
module_store = XMLModuleStore(
data_dir,
default_class=default_class,
course_dirs=course_dirs
course_dirs=course_dirs,
load_error_modules=load_error_modules,
)
for course_id in module_store.modules.keys():
for module in module_store.modules[course_id].itervalues():
......
......@@ -120,10 +120,14 @@ class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor):
@classmethod
def definition_from_xml(cls, xml_object, system):
return {'children': [
system.process_xml(etree.tostring(child_module)).location.url()
for child_module in xml_object
]}
children = []
for child in xml_object:
try:
children.append(system.process_xml(etree.tostring(child)).location.url())
except:
log.exception("Unable to load child when parsing Sequence. Continuing...")
continue
return {'children': children}
def definition_to_xml(self, resource_fs):
xml_object = etree.Element('sequential')
......
......@@ -3,12 +3,14 @@ import unittest
from fs.memoryfs import MemoryFS
from lxml import etree
from mock import Mock, patch
from collections import defaultdict
from xmodule.x_module import XMLParsingSystem, XModuleDescriptor
from xmodule.xml_module import is_pointer_tag
from xmodule.errortracker import make_error_tracker
from xmodule.modulestore import Location
from xmodule.modulestore.xml import XMLModuleStore
from xmodule.modulestore.xml import ImportSystem, XMLModuleStore
from xmodule.modulestore.exceptions import ItemNotFoundError
from .test_export import DATA_DIR
......@@ -16,35 +18,28 @@ from .test_export import DATA_DIR
ORG = 'test_org'
COURSE = 'test_course'
class DummySystem(XMLParsingSystem):
def __init__(self):
self.modules = {}
self.resources_fs = MemoryFS()
self.errorlog = make_error_tracker()
class DummySystem(ImportSystem):
def load_item(loc):
loc = Location(loc)
if loc in self.modules:
return self.modules[loc]
print "modules: "
print self.modules
raise ItemNotFoundError("Can't find item at loc: {0}".format(loc))
def process_xml(xml):
print "loading {0}".format(xml)
descriptor = XModuleDescriptor.load_from_xml(xml, self, ORG, COURSE, None)
# Need to save module so we can find it later
self.modules[descriptor.location] = descriptor
# always eager
descriptor.get_children()
return descriptor
@patch('xmodule.modulestore.xml.OSFS', lambda dir: MemoryFS())
def __init__(self, load_error_modules):
xmlstore = XMLModuleStore("data_dir", course_dirs=[], load_error_modules=load_error_modules)
course_id = "/".join([ORG, COURSE, 'test_run'])
course_dir = "test_dir"
policy = {}
XMLParsingSystem.__init__(self, load_item, self.resources_fs,
self.errorlog.tracker, process_xml, policy)
error_tracker = Mock()
parent_tracker = Mock()
super(DummySystem, self).__init__(
xmlstore,
course_id,
course_dir,
policy,
error_tracker,
parent_tracker,
load_error_modules=load_error_modules,
)
def render_template(self, template, context):
raise Exception("Shouldn't be called")
......@@ -53,9 +48,9 @@ class DummySystem(XMLParsingSystem):
class ImportTestCase(unittest.TestCase):
'''Make sure module imports work properly, including for malformed inputs'''
@staticmethod
def get_system():
def get_system(load_error_modules=True):
'''Get a dummy system'''
return DummySystem()
return DummySystem(load_error_modules)
def test_fallback(self):
'''Check that malformed xml loads as an ErrorDescriptor.'''
......@@ -63,8 +58,7 @@ class ImportTestCase(unittest.TestCase):
bad_xml = '''<sequential display_name="oops"><video url="hi"></sequential>'''
system = self.get_system()
descriptor = XModuleDescriptor.load_from_xml(bad_xml, system, 'org', 'course',
None)
descriptor = system.process_xml(bad_xml)
self.assertEqual(descriptor.__class__.__name__,
'ErrorDescriptor')
......@@ -76,11 +70,8 @@ class ImportTestCase(unittest.TestCase):
bad_xml2 = '''<sequential url_name="oops"><video url="hi"></sequential>'''
system = self.get_system()
descriptor1 = XModuleDescriptor.load_from_xml(bad_xml, system, 'org',
'course', None)
descriptor2 = XModuleDescriptor.load_from_xml(bad_xml2, system, 'org',
'course', None)
descriptor1 = system.process_xml(bad_xml)
descriptor2 = system.process_xml(bad_xml2)
self.assertNotEqual(descriptor1.location, descriptor2.location)
......@@ -91,13 +82,12 @@ class ImportTestCase(unittest.TestCase):
self.maxDiff = None
bad_xml = '''<sequential display_name="oops"><video url="hi"></sequential>'''
system = self.get_system()
descriptor = XModuleDescriptor.load_from_xml(bad_xml, system, 'org', 'course',
None)
descriptor = system.process_xml(bad_xml)
resource_fs = None
tag_xml = descriptor.export_to_xml(resource_fs)
re_import_descriptor = XModuleDescriptor.load_from_xml(tag_xml, system,
'org', 'course',
None)
re_import_descriptor = system.process_xml(tag_xml)
self.assertEqual(re_import_descriptor.__class__.__name__,
'ErrorDescriptor')
......@@ -116,8 +106,8 @@ class ImportTestCase(unittest.TestCase):
# load it
system = self.get_system()
descriptor = XModuleDescriptor.load_from_xml(xml_str_in, system, 'org', 'course',
None)
descriptor = system.process_xml(xml_str_in)
# export it
resource_fs = None
xml_str_out = descriptor.export_to_xml(resource_fs)
......@@ -133,8 +123,6 @@ class ImportTestCase(unittest.TestCase):
"""
system = self.get_system()
v = '1 hour'
org = 'foo'
course = 'bbhh'
url_name = 'test1'
start_xml = '''
<course org="{org}" course="{course}"
......@@ -142,9 +130,8 @@ class ImportTestCase(unittest.TestCase):
<chapter url="hi" url_name="ch" display_name="CH">
<html url_name="h" display_name="H">Two houses, ...</html>
</chapter>
</course>'''.format(grace=v, org=org, course=course, url_name=url_name)
descriptor = XModuleDescriptor.load_from_xml(start_xml, system,
org, course)
</course>'''.format(grace=v, org=ORG, course=COURSE, url_name=url_name)
descriptor = system.process_xml(start_xml)
print descriptor, descriptor.metadata
self.assertEqual(descriptor.metadata['graceperiod'], v)
......@@ -166,8 +153,8 @@ class ImportTestCase(unittest.TestCase):
pointer = etree.fromstring(exported_xml)
self.assertTrue(is_pointer_tag(pointer))
# but it's a special case course pointer
self.assertEqual(pointer.attrib['course'], course)
self.assertEqual(pointer.attrib['org'], org)
self.assertEqual(pointer.attrib['course'], COURSE)
self.assertEqual(pointer.attrib['org'], ORG)
# Does the course still have unicorns?
with resource_fs.open('course/{url_name}.xml'.format(url_name=url_name)) as f:
......@@ -317,3 +304,10 @@ class ImportTestCase(unittest.TestCase):
self.assertEqual(len(video.url_name), len('video_') + 12)
def test_error_on_import(self):
'''Check that when load_error_module is false, an exception is raised, rather than returning an ErrorModule'''
bad_xml = '''<sequential display_name="oops"><video url="hi"></sequential>'''
system = self.get_system(False)
self.assertRaises(etree.XMLSyntaxError, system.process_xml, bad_xml)
import logging
import pkg_resources
import sys
import json
import os
from fs.errors import ResourceNotFoundError
from functools import partial
from lxml import etree
from lxml.etree import XMLSyntaxError
from pprint import pprint
from collections import namedtuple
from pkg_resources import resource_listdir, resource_string, resource_isdir
from xmodule.errortracker import exc_info_to_str
from xmodule.modulestore import Location
from xmodule.timeparse import parse_time
......@@ -219,6 +215,7 @@ class XModule(HTMLSnippet):
'''
return self.metadata.get('display_name',
self.url_name.replace('_', ' '))
def __unicode__(self):
return '<x_module(id={0})>'.format(self.id)
......@@ -332,8 +329,6 @@ def policy_key(location):
Template = namedtuple("Template", "metadata data children")
class ResourceTemplates(object):
@classmethod
def templates(cls):
......@@ -378,9 +373,9 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
module_class = XModule
# Attributes for inpsection of the descriptor
stores_state = False # Indicates whether the xmodule state should be
stores_state = False # Indicates whether the xmodule state should be
# stored in a database (independent of shared state)
has_score = False # This indicates whether the xmodule is a problem-type.
has_score = False # This indicates whether the xmodule is a problem-type.
# It should respond to max_score() and grade(). It can be graded or ungraded
# (like a practice problem).
......@@ -485,10 +480,9 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
"""
Return the metadata that is not inherited, but was defined on this module.
"""
return dict((k,v) for k,v in self.metadata.items()
return dict((k, v) for k, v in self.metadata.items()
if k not in self._inherited_metadata)
@staticmethod
def compute_inherited_metadata(node):
"""Given a descriptor, traverse all of its descendants and do metadata
......@@ -529,7 +523,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
return self._child_instances
def get_child_by_url_name(self, url_name):
"""
Return a child XModuleDescriptor with the specified url_name, if it exists, and None otherwise.
......@@ -613,36 +606,15 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
org and course are optional strings that will be used in the generated
module's url identifiers
"""
try:
class_ = XModuleDescriptor.load_class(
etree.fromstring(xml_data).tag,
default_class
)
# leave next line, commented out - useful for low-level debugging
# log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (
# etree.fromstring(xml_data).tag,class_))
descriptor = class_.from_xml(xml_data, system, org, course)
except Exception as err:
# Didn't load properly. Fall back on loading as an error
# descriptor. This should never error due to formatting.
# Put import here to avoid circular import errors
from xmodule.error_module import ErrorDescriptor
msg = "Error loading from xml."
log.warning(msg + " " + str(err)[:200])
# Normally, we don't want lots of exception traces in our logs from common
# content problems. But if you're debugging the xml loading code itself,
# uncomment the next line.
# log.exception(msg)
system.error_tracker(msg)
err_msg = msg + "\n" + exc_info_to_str(sys.exc_info())
descriptor = ErrorDescriptor.from_xml(xml_data, system, org, course,
err_msg)
return descriptor
class_ = XModuleDescriptor.load_class(
etree.fromstring(xml_data).tag,
default_class
)
# leave next line, commented out - useful for low-level debugging
# log.debug('[XModuleDescriptor.load_from_xml] tag=%s, class_=%s' % (
# etree.fromstring(xml_data).tag,class_))
return class_.from_xml(xml_data, system, org, course)
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
......@@ -727,7 +699,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet, ResourceTemplates):
return None
class DescriptorSystem(object):
def __init__(self, load_item, resources_fs, error_tracker, **kwargs):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment