Commit 919b3e21 by Calen Pennington

Merge pull request #429 from MITx/feature/victor/strip-metadata

Feature/victor/strip metadata
parents 053d52e8 61478f4b
...@@ -21,6 +21,7 @@ def process_includes(fn): ...@@ -21,6 +21,7 @@ def process_includes(fn):
xml_object = etree.fromstring(xml_data) xml_object = etree.fromstring(xml_data)
next_include = xml_object.find('include') next_include = xml_object.find('include')
while next_include is not None: while next_include is not None:
system.error_tracker("WARNING: the <include> tag is deprecated, and will go away.")
file = next_include.get('file') file = next_include.get('file')
parent = next_include.getparent() parent = next_include.getparent()
...@@ -67,6 +68,8 @@ class SemanticSectionDescriptor(XModuleDescriptor): ...@@ -67,6 +68,8 @@ class SemanticSectionDescriptor(XModuleDescriptor):
the child element the child element
""" """
xml_object = etree.fromstring(xml_data) xml_object = etree.fromstring(xml_data)
system.error_tracker("WARNING: the <{}> tag is deprecated. Please do not use in new content."
.format(xml_object.tag))
if len(xml_object) == 1: if len(xml_object) == 1:
for (key, val) in xml_object.items(): for (key, val) in xml_object.items():
...@@ -74,7 +77,7 @@ class SemanticSectionDescriptor(XModuleDescriptor): ...@@ -74,7 +77,7 @@ class SemanticSectionDescriptor(XModuleDescriptor):
return system.process_xml(etree.tostring(xml_object[0])) return system.process_xml(etree.tostring(xml_object[0]))
else: else:
xml_object.tag = 'sequence' xml_object.tag = 'sequential'
return system.process_xml(etree.tostring(xml_object)) return system.process_xml(etree.tostring(xml_object))
...@@ -83,10 +86,14 @@ class TranslateCustomTagDescriptor(XModuleDescriptor): ...@@ -83,10 +86,14 @@ class TranslateCustomTagDescriptor(XModuleDescriptor):
def from_xml(cls, xml_data, system, org=None, course=None): def from_xml(cls, xml_data, system, org=None, course=None):
""" """
Transforms the xml_data from <$custom_tag attr="" attr=""/> to Transforms the xml_data from <$custom_tag attr="" attr=""/> to
<customtag attr="" attr=""><impl>$custom_tag</impl></customtag> <customtag attr="" attr="" impl="$custom_tag"/>
""" """
xml_object = etree.fromstring(xml_data) xml_object = etree.fromstring(xml_data)
system.error_tracker('WARNING: the <{tag}> tag is deprecated. '
'Instead, use <customtag impl="{tag}" attr1="..." attr2="..."/>. '
.format(tag=xml_object.tag))
tag = xml_object.tag tag = xml_object.tag
xml_object.tag = 'customtag' xml_object.tag = 'customtag'
xml_object.attrib['impl'] = tag xml_object.attrib['impl'] = tag
......
...@@ -237,7 +237,7 @@ class CapaModule(XModule): ...@@ -237,7 +237,7 @@ class CapaModule(XModule):
else: else:
raise raise
content = {'name': self.metadata['display_name'], content = {'name': self.display_name,
'html': html, 'html': html,
'weight': self.weight, 'weight': self.weight,
} }
...@@ -467,7 +467,7 @@ class CapaModule(XModule): ...@@ -467,7 +467,7 @@ class CapaModule(XModule):
return {'success': msg} return {'success': msg}
log.exception("Error in capa_module problem checking") log.exception("Error in capa_module problem checking")
raise Exception("error in capa_module") raise Exception("error in capa_module")
self.attempts = self.attempts + 1 self.attempts = self.attempts + 1
self.lcp.done = True self.lcp.done = True
......
...@@ -140,7 +140,7 @@ class CourseDescriptor(SequenceDescriptor): ...@@ -140,7 +140,7 @@ class CourseDescriptor(SequenceDescriptor):
@property @property
def title(self): def title(self):
return self.metadata['display_name'] return self.display_name
@property @property
def number(self): def number(self):
......
...@@ -112,8 +112,8 @@ class TestMongoModuleStore(object): ...@@ -112,8 +112,8 @@ class TestMongoModuleStore(object):
should_work = ( should_work = (
("i4x://edX/toy/video/Welcome", ("i4x://edX/toy/video/Welcome",
("edX/toy/2012_Fall", "Overview", "Welcome", None)), ("edX/toy/2012_Fall", "Overview", "Welcome", None)),
("i4x://edX/toy/html/toylab", ("i4x://edX/toy/chapter/Overview",
("edX/toy/2012_Fall", "Overview", "Toy_Videos", None)), ("edX/toy/2012_Fall", "Overview", None, None)),
) )
for location, expected in should_work: for location, expected in should_work:
assert_equals(path_to_location(self.store, location), expected) assert_equals(path_to_location(self.store, location), expected)
......
...@@ -31,7 +31,8 @@ def clean_out_mako_templating(xml_string): ...@@ -31,7 +31,8 @@ def clean_out_mako_templating(xml_string):
return xml_string return xml_string
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore, org, course, course_dir, error_tracker, **kwargs): def __init__(self, xmlstore, org, course, course_dir,
policy, error_tracker, **kwargs):
""" """
A class that handles loading from xml. Does some munging to ensure that A class that handles loading from xml. Does some munging to ensure that
all elements have unique slugs. all elements have unique slugs.
...@@ -97,7 +98,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): ...@@ -97,7 +98,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
MakoDescriptorSystem.__init__(self, load_item, resources_fs, MakoDescriptorSystem.__init__(self, load_item, resources_fs,
error_tracker, render_template, **kwargs) error_tracker, render_template, **kwargs)
XMLParsingSystem.__init__(self, load_item, resources_fs, XMLParsingSystem.__init__(self, load_item, resources_fs,
error_tracker, process_xml, **kwargs) error_tracker, process_xml, policy, **kwargs)
class XMLModuleStore(ModuleStoreBase): class XMLModuleStore(ModuleStoreBase):
...@@ -184,6 +185,7 @@ class XMLModuleStore(ModuleStoreBase): ...@@ -184,6 +185,7 @@ class XMLModuleStore(ModuleStoreBase):
if not os.path.exists(policy_path): if not os.path.exists(policy_path):
return {} return {}
try: try:
log.debug("Loading policy from {}".format(policy_path))
with open(policy_path) as f: with open(policy_path) as f:
return json.load(f) return json.load(f)
except (IOError, ValueError) as err: except (IOError, ValueError) as err:
...@@ -232,13 +234,16 @@ class XMLModuleStore(ModuleStoreBase): ...@@ -232,13 +234,16 @@ class XMLModuleStore(ModuleStoreBase):
tracker(msg) tracker(msg)
course = course_dir course = course_dir
system = ImportSystem(self, org, course, course_dir, tracker) url_name = course_data.get('url_name')
if url_name:
policy_path = self.data_dir / course_dir / 'policies' / '{}.json'.format(url_name)
policy = self.load_policy(policy_path, tracker)
else:
policy = {}
course_descriptor = system.process_xml(etree.tostring(course_data)) system = ImportSystem(self, org, course, course_dir, policy, tracker)
policy_path = self.data_dir / course_dir / 'policy.json'
policy = self.load_policy(policy_path, tracker) course_descriptor = system.process_xml(etree.tostring(course_data))
XModuleDescriptor.apply_policy(course_descriptor, policy)
# NOTE: The descriptors end up loading somewhat bottom up, which # NOTE: The descriptors end up loading somewhat bottom up, which
# breaks metadata inheritance via get_children(). Instead # breaks metadata inheritance via get_children(). Instead
......
...@@ -76,7 +76,7 @@ class SequenceModule(XModule): ...@@ -76,7 +76,7 @@ class SequenceModule(XModule):
contents.append({ contents.append({
'content': child.get_html(), 'content': child.get_html(),
'title': "\n".join( 'title': "\n".join(
grand_child.metadata['display_name'].strip() grand_child.display_name.strip()
for grand_child in child.get_children() for grand_child in child.get_children()
if 'display_name' in grand_child.metadata if 'display_name' in grand_child.metadata
), ),
...@@ -107,7 +107,7 @@ class SequenceModule(XModule): ...@@ -107,7 +107,7 @@ class SequenceModule(XModule):
class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor): class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor):
mako_template = 'widgets/sequence-edit.html' mako_template = 'widgets/sequence-edit.html'
module_class = SequenceModule module_class = SequenceModule
stores_state = True # For remembering where in the sequence the student is stores_state = True # For remembering where in the sequence the student is
@classmethod @classmethod
......
...@@ -42,9 +42,9 @@ class DummySystem(XMLParsingSystem): ...@@ -42,9 +42,9 @@ class DummySystem(XMLParsingSystem):
descriptor.get_children() descriptor.get_children()
return descriptor return descriptor
policy = {}
XMLParsingSystem.__init__(self, load_item, self.resources_fs, XMLParsingSystem.__init__(self, load_item, self.resources_fs,
self.errorlog.tracker, process_xml) self.errorlog.tracker, process_xml, policy)
def render_template(self, template, context): def render_template(self, template, context):
raise Exception("Shouldn't be called") raise Exception("Shouldn't be called")
......
...@@ -426,23 +426,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet): ...@@ -426,23 +426,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
@staticmethod @staticmethod
def apply_policy(node, policy):
"""
Given a descriptor, traverse all its descendants and update its metadata
with the policy.
Notes:
- this does not propagate inherited metadata. The caller should
call compute_inherited_metadata after applying the policy.
- metadata specified in the policy overrides metadata in the xml
"""
k = policy_key(node.location)
if k in policy:
node.metadata.update(policy[k])
for c in node.get_children():
XModuleDescriptor.apply_policy(c, policy)
@staticmethod
def compute_inherited_metadata(node): def compute_inherited_metadata(node):
"""Given a descriptor, traverse all of its descendants and do metadata """Given a descriptor, traverse all of its descendants and do metadata
inheritance. Should be called on a CourseDescriptor after importing a inheritance. Should be called on a CourseDescriptor after importing a
...@@ -697,16 +680,19 @@ class DescriptorSystem(object): ...@@ -697,16 +680,19 @@ class DescriptorSystem(object):
class XMLParsingSystem(DescriptorSystem): class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, resources_fs, error_tracker, process_xml, **kwargs): def __init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs):
""" """
load_item, resources_fs, error_tracker: see DescriptorSystem load_item, resources_fs, error_tracker: see DescriptorSystem
policy: a policy dictionary for overriding xml metadata
process_xml: Takes an xml string, and returns a XModuleDescriptor process_xml: Takes an xml string, and returns a XModuleDescriptor
created from that xml created from that xml
""" """
DescriptorSystem.__init__(self, load_item, resources_fs, error_tracker, DescriptorSystem.__init__(self, load_item, resources_fs, error_tracker,
**kwargs) **kwargs)
self.process_xml = process_xml self.process_xml = process_xml
self.policy = policy
class ModuleSystem(object): class ModuleSystem(object):
......
from xmodule.x_module import XModuleDescriptor from xmodule.x_module import (XModuleDescriptor, policy_key)
from xmodule.modulestore import Location from xmodule.modulestore import Location
from lxml import etree from lxml import etree
import json import json
...@@ -270,6 +270,11 @@ class XmlDescriptor(XModuleDescriptor): ...@@ -270,6 +270,11 @@ class XmlDescriptor(XModuleDescriptor):
log.debug('Error %s in loading metadata %s' % (err,dmdata)) log.debug('Error %s in loading metadata %s' % (err,dmdata))
metadata['definition_metadata_err'] = str(err) metadata['definition_metadata_err'] = str(err)
# Set/override any metadata specified by policy
k = policy_key(location)
if k in system.policy:
metadata.update(system.policy[k])
return cls( return cls(
system, system,
definition, definition,
......
<course name="Toy Course" org="edX" course="toy" graceperiod="1 day 5 hours 59 minutes 59 seconds" slug="2012_Fall" start="2015-07-17T12:00">
<chapter name="Overview">
<videosequence format="Lecture Sequence" name="Toy Videos">
<html name="toylab" filename="toylab"/>
<video name="Video Resources" youtube="1.0:1bK-WdDi6Qw"/>
</videosequence>
<video name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
</chapter>
</course>
roots/2012_Fall.xml
\ No newline at end of file
<course>
<chapter url_name="Overview">
<videosequence url_name="Toy_Videos">
<html url_name="toylab"/>
<video url_name="Video_Resources" youtube="1.0:1bK-WdDi6Qw"/>
</videosequence>
<video url_name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
</chapter>
</course>
{
"course/2012_Fall": {
"graceperiod": "2 days 5 hours 59 minutes 59 seconds",
"start": "2015-07-17T12:00",
"display_name": "Toy Course"
},
"chapter/Overview": {
"display_name": "Overview"
},
"videosequence/Toy_Videos": {
"display_name": "Toy Videos",
"format": "Lecture Sequence"
},
"html/toylab": {
"display_name": "Toy lab"
},
"video/Video_Resources": {
"display_name": "Video Resources"
},
"video/Welcome": {
"display_name": "Welcome"
}
}
<course org="edX" course="toy" url_name="2012_Fall"/>
\ No newline at end of file
#!/usr/bin/env python
"""
Victor's xml cleanup script. A big pile of useful hacks. Do not use
without carefully reading the code and deciding that this is what you want.
In particular, the remove-meta option is only intended to be used after pulling out a policy
using the metadata_to_json management command.
"""
import os, fnmatch, re, sys
from lxml import etree
from collections import defaultdict
INVALID_CHARS = re.compile(r"[^\w.-]")
def clean(value):
"""
Return value, made into a form legal for locations
"""
return re.sub('_+', '_', INVALID_CHARS.sub('_', value))
# category -> set of url_names for that category that we've already seen
used_names = defaultdict(set)
def clean_unique(category, name):
cleaned = clean(name)
if cleaned not in used_names[category]:
used_names[category].add(cleaned)
return cleaned
x = 1
while cleaned + str(x) in used_names[category]:
x += 1
# Found one!
cleaned = cleaned + str(x)
used_names[category].add(cleaned)
return cleaned
def cleanup(filepath, remove_meta):
# Keys that are exported to the policy file, and so
# can be removed from the xml afterward
to_remove = ('format', 'display_name',
'graceperiod', 'showanswer', 'rerandomize',
'start', 'due', 'graded', 'hide_from_toc',
'ispublic', 'xqa_key')
try:
print "Cleaning {}".format(filepath)
with open(filepath) as f:
parser = etree.XMLParser(remove_comments=False)
xml = etree.parse(filepath, parser=parser)
except:
print "Error parsing file {}".format(filepath)
return
for node in xml.iter(tag=etree.Element):
attrs = node.attrib
if 'url_name' in attrs:
used_names[node.tag].add(attrs['url_name'])
if 'name' in attrs:
# Replace name with an identical display_name, and a unique url_name
name = attrs['name']
attrs['display_name'] = name
attrs['url_name'] = clean_unique(node.tag, name)
del attrs['name']
if 'url_name' in attrs and 'slug' in attrs:
print "WARNING: {} has both slug and url_name"
if ('url_name' in attrs and 'filename' in attrs and
len(attrs)==2 and attrs['url_name'] == attrs['filename']):
# This is a pointer tag in disguise. Get rid of the filename.
print 'turning {}.{} into a pointer tag'.format(node.tag, attrs['url_name'])
del attrs['filename']
if remove_meta:
for attr in to_remove:
if attr in attrs:
del attrs[attr]
with open(filepath, "w") as f:
f.write(etree.tostring(xml))
def find_replace(directory, filePattern, remove_meta):
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, filePattern):
filepath = os.path.join(path, filename)
cleanup(filepath, remove_meta)
def main(args):
usage = "xml_cleanup [dir] [remove-meta]"
n = len(args)
if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'):
print usage
return
remove_meta = False
if n == 2:
remove_meta = True
find_replace(args[0], '*.xml', remove_meta)
if __name__ == '__main__':
main(sys.argv[1:])
...@@ -35,7 +35,7 @@ def manage_modulestores(request,reload_dir=None): ...@@ -35,7 +35,7 @@ def manage_modulestores(request,reload_dir=None):
ip = request.META.get('HTTP_X_REAL_IP','') # nginx reverse proxy ip = request.META.get('HTTP_X_REAL_IP','') # nginx reverse proxy
if not ip: if not ip:
ip = request.META.get('REMOTE_ADDR','None') ip = request.META.get('REMOTE_ADDR','None')
if LOCAL_DEBUG: if LOCAL_DEBUG:
html += '<h3>IP address: %s ' % ip html += '<h3>IP address: %s ' % ip
html += '<h3>User: %s ' % request.user html += '<h3>User: %s ' % request.user
...@@ -48,7 +48,7 @@ def manage_modulestores(request,reload_dir=None): ...@@ -48,7 +48,7 @@ def manage_modulestores(request,reload_dir=None):
html += 'Permission denied' html += 'Permission denied'
html += "</body></html>" html += "</body></html>"
log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS) log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
return HttpResponse(html) return HttpResponse(html)
#---------------------------------------- #----------------------------------------
# reload course if specified # reload course if specified
...@@ -74,10 +74,10 @@ def manage_modulestores(request,reload_dir=None): ...@@ -74,10 +74,10 @@ def manage_modulestores(request,reload_dir=None):
#---------------------------------------- #----------------------------------------
dumpfields = ['definition','location','metadata'] dumpfields = ['definition','location','metadata']
for cdir, course in def_ms.courses.items(): for cdir, course in def_ms.courses.items():
html += '<hr width="100%"/>' html += '<hr width="100%"/>'
html += '<h2>Course: %s (%s)</h2>' % (course.metadata['display_name'],cdir) html += '<h2>Course: %s (%s)</h2>' % (course.display_name,cdir)
for field in dumpfields: for field in dumpfields:
data = getattr(course,field) data = getattr(course,field)
...@@ -89,7 +89,7 @@ def manage_modulestores(request,reload_dir=None): ...@@ -89,7 +89,7 @@ def manage_modulestores(request,reload_dir=None):
html += '</ul>' html += '</ul>'
else: else:
html += '<ul><li>%s</li></ul>' % escape(data) html += '<ul><li>%s</li></ul>' % escape(data)
#---------------------------------------- #----------------------------------------
...@@ -107,4 +107,4 @@ def manage_modulestores(request,reload_dir=None): ...@@ -107,4 +107,4 @@ def manage_modulestores(request,reload_dir=None):
log.debug('def_ms=%s' % unicode(def_ms)) log.debug('def_ms=%s' % unicode(def_ms))
html += "</body></html>" html += "</body></html>"
return HttpResponse(html) return HttpResponse(html)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment