Commit ff96fbf5 by ichuang

Merge pull request #146 from MITx/cpennington/cms-xml-processing

These changes make xml import into the cms use XModuleDescriptors
parents 81a1dd89 3c60d1a9
###
###
### One-off script for importing courseware form XML format
###
#import mitxmako.middleware
#from courseware import content_parser
#from django.contrib.auth.models import User
import os.path
from StringIO import StringIO
from mako.template import Template
from django.core.management.base import BaseCommand, CommandError
from keystore.django import keystore
from raw_module import RawDescriptor
from lxml import etree
from fs.osfs import OSFS
from mako.lookup import TemplateLookup
from collections import defaultdict
from django.core.management.base import BaseCommand
from keystore.django import keystore
from path import path
from x_module import XModuleDescriptor, XMLParsingSystem
unnamed_modules = 0
etree.set_default_parser(etree.XMLParser(dtd_validation=False, load_dtd=False,
remove_comments=True))
from lxml import etree
class Command(BaseCommand):
help = \
''' Run FTP server.'''
'''Import the specified data directory into the default keystore'''
def handle(self, *args, **options):
print args
data_dir = args[0]
parser = etree.XMLParser(remove_comments = True)
if len(args) != 3:
raise CommandError("import requires 3 arguments: <org> <course> <data directory>")
org, course, data_dir = args
data_dir = path(data_dir)
class ImportSystem(XMLParsingSystem):
def __init__(self):
self.load_item = keystore().get_item
self.fs = OSFS(data_dir)
def process_xml(self, xml):
try:
xml_data = etree.fromstring(xml)
except:
raise CommandError("Unable to parse xml: " + xml)
if not xml_data.get('name'):
global unnamed_modules
unnamed_modules += 1
xml_data.set('name', '{tag}_{count}'.format(tag=xml_data.tag, count=unnamed_modules))
module = XModuleDescriptor.load_from_xml(etree.tostring(xml_data), self, org, course, RawDescriptor)
keystore().create_item(module.url)
if 'data' in module.definition:
keystore().update_item(module.url, module.definition['data'])
if 'children' in module.definition:
keystore().update_children(module.url, module.definition['children'])
return module
lookup = TemplateLookup(directories=[data_dir])
template = lookup.get_template("course.xml")
course_string = template.render(groups=[])
course = etree.parse(StringIO(course_string), parser=parser)
elements = list(course.iter())
tag_to_category = {
# Custom tags
'videodev': 'Custom',
'slides': 'Custom',
'book': 'Custom',
'image': 'Custom',
'discuss': 'Custom',
# Simple lists
'chapter': 'Week',
'course': 'Course',
'section': defaultdict(lambda: 'Section', {
'Lab': 'Lab',
'Lecture Sequence': 'LectureSequence',
'Homework': 'Homework',
'Tutorial Index': 'TutorialIndex',
'Video': 'VideoSegment',
'Midterm': 'Exam',
'Final': 'Exam',
'Problems': 'ProblemSet',
}),
'videosequence': 'VideoSequence',
'problemset': 'ProblemSet',
'vertical': 'Section',
'sequential': 'Section',
'tab': 'Section',
# True types
'video': 'VideoSegment',
'html': 'HTML',
'problem': 'Problem',
}
name_index = 0
for e in elements:
name = e.attrib.get('name', None)
for f in elements:
if f != e and f.attrib.get('name', None) == name:
name = None
if not name:
name = "{tag}_{index}".format(tag=e.tag, index=name_index)
name_index = name_index + 1
if e.tag in tag_to_category:
category = tag_to_category[e.tag]
if isinstance(category, dict):
category = category[e.get('format')]
category = category.replace('/', '-')
name = name.replace('/', '-')
e.set('url', 'i4x://mit.edu/6002xs12/{category}/{name}'.format(
category=category,
name=name))
else:
print "Skipping element with tag", e.tag
def handle_skip(e):
print "Skipping ", e
results = {}
def handle_custom(e):
data = {'type':'i4x://mit.edu/6002xs12/tag/{tag}'.format(tag=e.tag),
'attrib':dict(e.attrib)}
results[e.attrib['url']] = {'data':data}
def handle_list(e):
if e.attrib.get("class", None) == "tutorials":
return
children = [le.attrib['url'] for le in e.getchildren()]
results[e.attrib['url']] = {'children':children}
def handle_video(e):
url = e.attrib['url']
clip_url = url.replace('VideoSegment', 'VideoClip')
# Take: 0.75:izygArpw-Qo,1.0:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8
# Make: [(0.75, 'izygArpw-Qo'), (1.0, 'p2Q6BrNhdh8'), (1.25, '1EeWXzPdhSA'), (1.5, 'rABDYkeK0x8')]
youtube_str = e.attrib['youtube']
youtube_list = [(float(x), y) for x,y in map(lambda x:x.split(':'), youtube_str.split(','))]
clip_infos = [{ "status": "ready",
"format": "youtube",
"sane": True,
"location": "youtube",
"speed": speed,
"id": youtube_id,
"size": None} \
for (speed, youtube_id) \
in youtube_list]
results[clip_url] = {'data':{'clip_infos':clip_infos}}
results[url] = {'children' : [{'url':clip_url}]}
def handle_html(e):
if 'src' in e.attrib:
text = open(data_dir+'html/'+e.attrib['src']).read()
else:
textlist=[e.text]+[etree.tostring(elem) for elem in e]+[e.tail]
textlist=[i for i in textlist if type(i)==str]
text = "".join(textlist)
results[e.attrib['url']] = {'data':{'text':text}}
def handle_problem(e):
data = open(os.path.join(data_dir, 'problems', e.attrib['filename']+'.xml')).read()
results[e.attrib['url']] = {'data':{'statement':data}}
element_actions = {# Inside HTML ==> Skip these
'a': handle_skip,
'h1': handle_skip,
'h2': handle_skip,
'hr': handle_skip,
'strong': handle_skip,
'ul': handle_skip,
'li': handle_skip,
'p': handle_skip,
# Custom tags
'videodev': handle_custom,
'slides': handle_custom,
'book': handle_custom,
'image': handle_custom,
'discuss': handle_custom,
# Simple lists
'chapter': handle_list,
'course': handle_list,
'sequential': handle_list,
'vertical': handle_list,
'section': handle_list,
'videosequence': handle_list,
'problemset': handle_list,
'tab': handle_list,
# True types
'video': handle_video,
'html': handle_html,
'problem': handle_problem,
}
for e in elements:
element_actions[e.tag](e)
for k in results:
keystore().create_item(k, 'Piotr Mitros')
if 'data' in results[k]:
keystore().update_item(k, results[k]['data'])
if 'children' in results[k]:
keystore().update_children(k, results[k]['children'])
ImportSystem().process_xml(course_string)
......@@ -11,7 +11,7 @@ def index(request):
org = 'mit.edu'
course = '6002xs12'
name = '6.002 Spring 2012'
course = keystore().get_item(['i4x', org, course, 'Course', name])
course = keystore().get_item(['i4x', org, course, 'course', name])
weeks = course.get_children()
return render_to_response('index.html', {'weeks': weeks})
......@@ -21,6 +21,7 @@ def edit_item(request):
item = keystore().get_item(item_id)
return render_to_response('unit.html', {
'contents': item.get_html(),
'js_module': item.js_module_name(),
'type': item.type,
'name': item.name,
})
......
......@@ -158,6 +158,7 @@ PIPELINE_CSS = {
PIPELINE_ALWAYS_RECOMPILE = ['sass/base-style.scss']
from xmodule.x_module import XModuleDescriptor
from xmodule.raw_module import RawDescriptor
js_file_dir = PROJECT_ROOT / "static" / "coffee" / "module"
try:
os.makedirs(js_file_dir)
......@@ -168,7 +169,7 @@ except OSError as exc:
raise
module_js_sources = []
for xmodule in XModuleDescriptor.load_classes():
for xmodule in XModuleDescriptor.load_classes() + [RawDescriptor]:
js = xmodule.get_javascript()
for filetype in ('coffee', 'js'):
for idx, fragment in enumerate(js.get(filetype, [])):
......
class @CMS
@setHeight = =>
windowHeight = $(this).height()
@contentHeight = windowHeight - 29
@bind = =>
$('a.module-edit').click ->
CMS.edit_item($(this).attr('id'))
return false
$(window).bind('resize', CMS.setHeight)
@edit_item = (id) =>
$.get('/edit_item', {id: id}, (data) =>
$('#module-html').empty().append(data)
CMS.bind()
$('section.edit-pane').show()
$('body.content .cal').css('height', @contentHeight)
$('body').addClass('content')
$('section.edit-pane').show()
new Unit('unit-wrapper', id)
)
......@@ -78,6 +84,7 @@ $ ->
$('.problem-new a').click ->
$('section.edit-pane').show()
return false
CMS.setHeight()
CMS.bind()
<section id="unit-wrapper" class="${type}">
<section id="unit-wrapper" class="${js_module}">
<header>
<section>
<h1 class="editable">${name}</h1>
......
......@@ -38,7 +38,7 @@
% for week in weeks:
<li>
<header>
<h1><a href="#" class="week-edit" id="${week.url}">${week.name}</a></h1>
<h1><a href="#" class="module-edit" id="${week.url}">${week.name}</a></h1>
<ul>
% if week.goals:
% for goal in week.goals:
......
<section class="raw-edit">
<section class="meta wip">
<section class="status-settings">
<ul>
<li><a href="#" class="current">Scrap</a></li>
<li><a href="#">Draft</a></li>
<li><a href="#">Proofed</a></li>
<li><a href="#">Published</a></li>
</ul>
<a href="#" class="settings">Settings</a>
</section>
<section class="author">
<dl>
<dt>Last modified:</dt>
<dd>mm/dd/yy</dd>
<dt>By</dt>
<dd>Anant Agarwal</dd>
</dl>
</section>
<section class="tags">
<div>
<h2>Tags:</h2>
<p class="editable">Click to edit</p>
</div>
<div>
<h2>Goal</h2>
<p class="editable">Click to edit</p>
</div>
</section>
</section>
<textarea name="" class="edit-box" rows="8" cols="40">${data}</textarea>
<pre class="preview">${data | h}</pre>
<div class="actions wip">
<a href="" class="save-update">Save &amp; Update</a>
<a href="#" class="cancel">Cancel</a>
</div>
<%include file="notes.html"/>
</section>
......@@ -125,7 +125,7 @@ class ModuleStore(object):
"""
An abstract interface for a database backend that stores XModuleDescriptor instances
"""
def get_item(self, location):
def get_item(self, location, default_class=None):
"""
Returns an XModuleDescriptor instance for the item at location.
If location.revision is None, returns the item with the most
......@@ -136,6 +136,8 @@ class ModuleStore(object):
If no object is found at that location, raises keystore.exceptions.ItemNotFoundError
location: Something that can be passed to Location
default_class: An XModuleDescriptor subclass to use if no plugin matching the
location is found
"""
raise NotImplementedError
......
......@@ -8,6 +8,7 @@ from __future__ import absolute_import
from django.conf import settings
from .mongo import MongoModuleStore
from raw_module import RawDescriptor
_KEYSTORES = {}
......@@ -16,6 +17,9 @@ def keystore(name='default'):
global _KEYSTORES
if name not in _KEYSTORES:
_KEYSTORES[name] = MongoModuleStore(**settings.KEYSTORE[name])
# TODO (cpennington): Load the default class from a string
_KEYSTORES[name] = MongoModuleStore(
default_class=RawDescriptor,
**settings.KEYSTORE[name])
return _KEYSTORES[name]
......@@ -8,7 +8,7 @@ class MongoModuleStore(ModuleStore):
"""
A Mongodb backed ModuleStore
"""
def __init__(self, host, db, collection, port=27017):
def __init__(self, host, db, collection, port=27017, default_class=None):
self.collection = pymongo.connection.Connection(
host=host,
port=port
......@@ -16,6 +16,7 @@ class MongoModuleStore(ModuleStore):
# Force mongo to report errors, at the expense of performance
self.collection.safe = True
self.default_class = default_class
def get_item(self, location):
"""
......@@ -28,6 +29,8 @@ class MongoModuleStore(ModuleStore):
If no object is found at that location, raises keystore.exceptions.ItemNotFoundError
location: Something that can be passed to Location
default_class: An XModuleDescriptor subclass to use if no plugin matching the
location is found
"""
query = {}
......@@ -45,9 +48,10 @@ class MongoModuleStore(ModuleStore):
if item is None:
raise ItemNotFoundError(location)
return XModuleDescriptor.load_from_json(item, DescriptorSystem(self.get_item))
return XModuleDescriptor.load_from_json(
item, DescriptorSystem(self.get_item), self.default_class)
def create_item(self, location, editor):
def create_item(self, location):
"""
Create an empty item at the specified location with the supplied editor
......@@ -55,7 +59,6 @@ class MongoModuleStore(ModuleStore):
"""
self.collection.insert({
'location': Location(location).dict(),
'editor': editor
})
def update_item(self, location, data):
......
......@@ -16,9 +16,31 @@ class HtmlModuleDescriptor(MakoModuleDescriptor):
"""
mako_template = "widgets/html-edit.html"
# TODO (cpennington): Make this into a proper module
js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]}
js_module = 'HTML'
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
system: An XModuleSystem for interacting with external resources
org and course are optional strings that will be used in the generated modules
url identifiers
"""
xml_object = etree.fromstring(xml_data)
return cls(
system,
definition={'data': {'text': xml_data}},
location=['i4x',
org,
course,
xml_object.tag,
xml_object.get('name')]
)
class Module(XModule):
id_attribute = 'filename'
......
class @Raw
constructor: (@id) ->
@edit_box = $("##{@id} .edit-box")
@preview = $("##{@id} .preview")
@edit_box.on('input', =>
@preview.empty().text(@edit_box.val())
)
save: -> @edit_box.val()
......@@ -12,7 +12,11 @@ class MakoModuleDescriptor(XModuleDescriptor):
the descriptor as the `module` parameter to that template
"""
def get_context(self):
"""
Return the context to render the mako template with
"""
return {'module': self}
def get_html(self):
return render_to_string(self.mako_template, {
'module': self
})
return render_to_string(self.mako_template, self.get_context())
from pkg_resources import resource_string
from mako_module import MakoModuleDescriptor
from lxml import etree
class RawDescriptor(MakoModuleDescriptor):
"""
Module that provides a raw editing view of it's data and children
"""
mako_template = "widgets/raw-edit.html"
js = {'coffee': [resource_string(__name__, 'js/module/raw.coffee')]}
js_module = 'Raw'
def get_context(self):
return {
'module': self,
'data': self.definition['data'],
}
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
system: An XModuleSystem for interacting with external resources
org and course are optional strings that will be used in the generated modules
url identifiers
"""
xml_object = etree.fromstring(xml_data)
return cls(
system,
definition={'data': xml_data},
location=['i4x',
org,
course,
xml_object.tag,
xml_object.get('name')]
)
......@@ -115,5 +115,23 @@ class Module(XModule):
self.rendered = False
class SectionDescriptor(MakoModuleDescriptor):
class SequenceDescriptor(MakoModuleDescriptor):
mako_template = 'widgets/sequence-edit.html'
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
xml_object = etree.fromstring(xml_data)
children = [
system.process_xml(etree.tostring(child_module)).url
for child_module in xml_object
]
return cls(
system, {'children': children},
location=['i4x',
org,
course,
xml_object.tag,
xml_object.get('name')]
)
......@@ -13,18 +13,14 @@ setup(
# for a description of entry_points
entry_points={
'xmodule.v1': [
"Course = seq_module:SectionDescriptor",
"Week = seq_module:SectionDescriptor",
"Section = seq_module:SectionDescriptor",
"LectureSequence = seq_module:SectionDescriptor",
"Lab = seq_module:SectionDescriptor",
"Homework = seq_module:SectionDescriptor",
"TutorialIndex = seq_module:SectionDescriptor",
"Exam = seq_module:SectionDescriptor",
"VideoSegment = video_module:VideoSegmentDescriptor",
"ProblemSet = seq_module:SectionDescriptor",
"Problem = capa_module:CapaModuleDescriptor",
"HTML = html_module:HtmlModuleDescriptor",
"chapter = seq_module:SequenceDescriptor",
"course = seq_module:SequenceDescriptor",
"html = html_module:HtmlModuleDescriptor",
"section = translation_module:SemanticSectionDescriptor",
"sequential = seq_module:SequenceDescriptor",
"vertical = seq_module:SequenceDescriptor",
"problemset = seq_module:SequenceDescriptor",
"videosequence = seq_module:SequenceDescriptor",
]
}
)
"""
These modules exist to translate old format XML into newer, semantic forms
"""
from x_module import XModuleDescriptor
from lxml import etree
from functools import wraps
import logging
log = logging.getLogger(__name__)
def process_includes(fn):
"""
Wraps a XModuleDescriptor.from_xml method, and modifies xml_data to replace
any immediate child <include> items with the contents of the file that they are
supposed to include
"""
@wraps(fn)
def from_xml(cls, xml_data, system, org=None, course=None):
xml_object = etree.fromstring(xml_data)
next_include = xml_object.find('include')
while next_include is not None:
file = next_include.get('file')
if file is not None:
try:
ifp = system.fs.open(file)
except Exception:
log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True)))
log.exception('Cannot find file %s in %s' % (file, dir))
raise
try:
# read in and convert to XML
incxml = etree.XML(ifp.read())
except Exception:
log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True)))
log.exception('Cannot parse XML in %s' % (file))
raise
# insert new XML into tree in place of inlcude
parent = next_include.getparent()
parent.insert(parent.index(next_include), incxml)
parent.remove(next_include)
next_include = xml_object.find('include')
return fn(cls, etree.tostring(xml_object), system, org, course)
return from_xml
class SemanticSectionDescriptor(XModuleDescriptor):
@classmethod
@process_includes
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Removes sections single child elements in favor of just embedding the child element
"""
xml_object = etree.fromstring(xml_data)
if len(xml_object) == 1:
for (key, val) in xml_object.items():
if key == 'format':
continue
xml_object[0].set(key, val)
return system.process_xml(etree.tostring(xml_object[0]))
else:
xml_object.tag = 'sequence'
return system.process_xml(etree.tostring(xml_object))
......@@ -15,8 +15,24 @@ class ModuleMissingError(Exception):
class Plugin(object):
"""
Base class for a system that uses entry_points to load plugins.
Implementing classes are expected to have the following attributes:
entry_point: The name of the entry point to load plugins from
"""
@classmethod
def load_class(cls, identifier):
def load_class(cls, identifier, default=None):
"""
Loads a single class intance specified by identifier. If identifier
specifies more than a single class, then logs a warning and returns the first
class identified.
If default is not None, will return default if no entry_point matching identifier
is found. Otherwise, will raise a ModuleMissingError
"""
identifier = identifier.lower()
classes = list(pkg_resources.iter_entry_points(cls.entry_point, name=identifier))
if len(classes) > 1:
log.warning("Found multiple classes for {entry_point} with identifier {id}: {classes}. Returning the first one.".format(
......@@ -25,6 +41,8 @@ class Plugin(object):
classes=", ".join(class_.module_name for class_ in classes)))
if len(classes) == 0:
if default is not None:
return default
raise ModuleMissingError(identifier)
return classes[0].load()
......@@ -160,9 +178,10 @@ class XModuleDescriptor(Plugin):
"""
entry_point = "xmodule.v1"
js = {}
js_module = None
@staticmethod
def load_from_json(json_data, system):
def load_from_json(json_data, system, default_class=None):
"""
This method instantiates the correct subclass of XModuleDescriptor based
on the contents of json_data.
......@@ -170,7 +189,10 @@ class XModuleDescriptor(Plugin):
json_data must contain a 'location' element, and must be suitable to be
passed into the subclasses `from_json` method.
"""
class_ = XModuleDescriptor.load_class(json_data['location']['category'])
class_ = XModuleDescriptor.load_class(
json_data['location']['category'],
default_class
)
return class_.from_json(json_data, system)
@classmethod
......@@ -184,6 +206,37 @@ class XModuleDescriptor(Plugin):
"""
return cls(system=system, **json_data)
@staticmethod
def load_from_xml(xml_data, system, org=None, course=None, default_class=None):
"""
This method instantiates the correct subclass of XModuleDescriptor based
on the contents of xml_data.
xml_data must be a string containing valid xml
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
"""
class_ = XModuleDescriptor.load_class(
etree.fromstring(xml_data).tag,
default_class
)
return class_.from_xml(xml_data, system, org, course)
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
"""
raise NotImplementedError('Modules must implement from_xml to be parsable from xml')
@classmethod
def get_javascript(cls):
"""
......@@ -196,6 +249,12 @@ class XModuleDescriptor(Plugin):
"""
return cls.js
def js_module_name(self):
"""
Return the name of the javascript class to instantiate when
this module descriptor is loaded for editing
"""
return self.js_module
def __init__(self,
system,
......@@ -230,15 +289,12 @@ class XModuleDescriptor(Plugin):
self._child_instances = None
def get_children(self, categories=None):
def get_children(self):
"""Returns a list of XModuleDescriptor instances for the children of this module"""
if self._child_instances is None:
self._child_instances = [self.system.load_item(child) for child in self.definition['children']]
self._child_instances = [self.system.load_item(child) for child in self.definition.get('children', [])]
if categories is None:
return self._child_instances
else:
return [child for child in self._child_instances if child.type in categories]
return self._child_instances
def get_html(self):
"""
......@@ -277,7 +333,18 @@ class XModuleDescriptor(Plugin):
class DescriptorSystem(object):
def __init__(self, load_item):
"""
load_item: Takes a Location and returns and XModuleDescriptor
load_item: Takes a Location and returns an XModuleDescriptor
"""
self.load_item = load_item
class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, process_xml, fs):
"""
process_xml: Takes an xml string, and returns the the XModuleDescriptor created from that xml
fs: A Filesystem object that contains all of the xml resources needed to parse
the course
"""
self.process_xml = process_xml
self.fs = fs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment