Commit 919b3e21 by Calen Pennington

Merge pull request #429 from MITx/feature/victor/strip-metadata

Feature/victor/strip metadata
parents 053d52e8 61478f4b
......@@ -21,6 +21,7 @@ def process_includes(fn):
xml_object = etree.fromstring(xml_data)
next_include = xml_object.find('include')
while next_include is not None:
system.error_tracker("WARNING: the <include> tag is deprecated, and will go away.")
file = next_include.get('file')
parent = next_include.getparent()
......@@ -67,6 +68,8 @@ class SemanticSectionDescriptor(XModuleDescriptor):
the child element
"""
xml_object = etree.fromstring(xml_data)
system.error_tracker("WARNING: the <{}> tag is deprecated. Please do not use in new content."
.format(xml_object.tag))
if len(xml_object) == 1:
for (key, val) in xml_object.items():
......@@ -74,7 +77,7 @@ class SemanticSectionDescriptor(XModuleDescriptor):
return system.process_xml(etree.tostring(xml_object[0]))
else:
xml_object.tag = 'sequence'
xml_object.tag = 'sequential'
return system.process_xml(etree.tostring(xml_object))
......@@ -83,10 +86,14 @@ class TranslateCustomTagDescriptor(XModuleDescriptor):
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Transforms the xml_data from <$custom_tag attr="" attr=""/> to
<customtag attr="" attr=""><impl>$custom_tag</impl></customtag>
<customtag attr="" attr="" impl="$custom_tag"/>
"""
xml_object = etree.fromstring(xml_data)
system.error_tracker('WARNING: the <{tag}> tag is deprecated. '
'Instead, use <customtag impl="{tag}" attr1="..." attr2="..."/>. '
.format(tag=xml_object.tag))
tag = xml_object.tag
xml_object.tag = 'customtag'
xml_object.attrib['impl'] = tag
......
......@@ -237,7 +237,7 @@ class CapaModule(XModule):
else:
raise
content = {'name': self.metadata['display_name'],
content = {'name': self.display_name,
'html': html,
'weight': self.weight,
}
......@@ -467,7 +467,7 @@ class CapaModule(XModule):
return {'success': msg}
log.exception("Error in capa_module problem checking")
raise Exception("error in capa_module")
self.attempts = self.attempts + 1
self.lcp.done = True
......
......@@ -140,7 +140,7 @@ class CourseDescriptor(SequenceDescriptor):
@property
def title(self):
return self.metadata['display_name']
return self.display_name
@property
def number(self):
......
......@@ -112,8 +112,8 @@ class TestMongoModuleStore(object):
should_work = (
("i4x://edX/toy/video/Welcome",
("edX/toy/2012_Fall", "Overview", "Welcome", None)),
("i4x://edX/toy/html/toylab",
("edX/toy/2012_Fall", "Overview", "Toy_Videos", None)),
("i4x://edX/toy/chapter/Overview",
("edX/toy/2012_Fall", "Overview", None, None)),
)
for location, expected in should_work:
assert_equals(path_to_location(self.store, location), expected)
......
......@@ -31,7 +31,8 @@ def clean_out_mako_templating(xml_string):
return xml_string
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
def __init__(self, xmlstore, org, course, course_dir, error_tracker, **kwargs):
def __init__(self, xmlstore, org, course, course_dir,
policy, error_tracker, **kwargs):
"""
A class that handles loading from xml. Does some munging to ensure that
all elements have unique slugs.
......@@ -97,7 +98,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
MakoDescriptorSystem.__init__(self, load_item, resources_fs,
error_tracker, render_template, **kwargs)
XMLParsingSystem.__init__(self, load_item, resources_fs,
error_tracker, process_xml, **kwargs)
error_tracker, process_xml, policy, **kwargs)
class XMLModuleStore(ModuleStoreBase):
......@@ -184,6 +185,7 @@ class XMLModuleStore(ModuleStoreBase):
if not os.path.exists(policy_path):
return {}
try:
log.debug("Loading policy from {}".format(policy_path))
with open(policy_path) as f:
return json.load(f)
except (IOError, ValueError) as err:
......@@ -232,13 +234,16 @@ class XMLModuleStore(ModuleStoreBase):
tracker(msg)
course = course_dir
system = ImportSystem(self, org, course, course_dir, tracker)
url_name = course_data.get('url_name')
if url_name:
policy_path = self.data_dir / course_dir / 'policies' / '{}.json'.format(url_name)
policy = self.load_policy(policy_path, tracker)
else:
policy = {}
course_descriptor = system.process_xml(etree.tostring(course_data))
policy_path = self.data_dir / course_dir / 'policy.json'
system = ImportSystem(self, org, course, course_dir, policy, tracker)
policy = self.load_policy(policy_path, tracker)
XModuleDescriptor.apply_policy(course_descriptor, policy)
course_descriptor = system.process_xml(etree.tostring(course_data))
# NOTE: The descriptors end up loading somewhat bottom up, which
# breaks metadata inheritance via get_children(). Instead
......
......@@ -76,7 +76,7 @@ class SequenceModule(XModule):
contents.append({
'content': child.get_html(),
'title': "\n".join(
grand_child.metadata['display_name'].strip()
grand_child.display_name.strip()
for grand_child in child.get_children()
if 'display_name' in grand_child.metadata
),
......@@ -107,7 +107,7 @@ class SequenceModule(XModule):
class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor):
mako_template = 'widgets/sequence-edit.html'
module_class = SequenceModule
stores_state = True # For remembering where in the sequence the student is
@classmethod
......
......@@ -42,9 +42,9 @@ class DummySystem(XMLParsingSystem):
descriptor.get_children()
return descriptor
policy = {}
XMLParsingSystem.__init__(self, load_item, self.resources_fs,
self.errorlog.tracker, process_xml)
self.errorlog.tracker, process_xml, policy)
def render_template(self, template, context):
raise Exception("Shouldn't be called")
......
......@@ -426,23 +426,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
@staticmethod
def apply_policy(node, policy):
"""
Given a descriptor, traverse all its descendants and update its metadata
with the policy.
Notes:
- this does not propagate inherited metadata. The caller should
call compute_inherited_metadata after applying the policy.
- metadata specified in the policy overrides metadata in the xml
"""
k = policy_key(node.location)
if k in policy:
node.metadata.update(policy[k])
for c in node.get_children():
XModuleDescriptor.apply_policy(c, policy)
@staticmethod
def compute_inherited_metadata(node):
"""Given a descriptor, traverse all of its descendants and do metadata
inheritance. Should be called on a CourseDescriptor after importing a
......@@ -697,16 +680,19 @@ class DescriptorSystem(object):
class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, resources_fs, error_tracker, process_xml, **kwargs):
def __init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs):
"""
load_item, resources_fs, error_tracker: see DescriptorSystem
policy: a policy dictionary for overriding xml metadata
process_xml: Takes an xml string, and returns a XModuleDescriptor
created from that xml
"""
DescriptorSystem.__init__(self, load_item, resources_fs, error_tracker,
**kwargs)
self.process_xml = process_xml
self.policy = policy
class ModuleSystem(object):
......
from xmodule.x_module import XModuleDescriptor
from xmodule.x_module import (XModuleDescriptor, policy_key)
from xmodule.modulestore import Location
from lxml import etree
import json
......@@ -270,6 +270,11 @@ class XmlDescriptor(XModuleDescriptor):
log.debug('Error %s in loading metadata %s' % (err,dmdata))
metadata['definition_metadata_err'] = str(err)
# Set/override any metadata specified by policy
k = policy_key(location)
if k in system.policy:
metadata.update(system.policy[k])
return cls(
system,
definition,
......
<course name="Toy Course" org="edX" course="toy" graceperiod="1 day 5 hours 59 minutes 59 seconds" slug="2012_Fall" start="2015-07-17T12:00">
<chapter name="Overview">
<videosequence format="Lecture Sequence" name="Toy Videos">
<html name="toylab" filename="toylab"/>
<video name="Video Resources" youtube="1.0:1bK-WdDi6Qw"/>
</videosequence>
<video name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
</chapter>
</course>
roots/2012_Fall.xml
\ No newline at end of file
<course>
<chapter url_name="Overview">
<videosequence url_name="Toy_Videos">
<html url_name="toylab"/>
<video url_name="Video_Resources" youtube="1.0:1bK-WdDi6Qw"/>
</videosequence>
<video url_name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
</chapter>
</course>
{
"course/2012_Fall": {
"graceperiod": "2 days 5 hours 59 minutes 59 seconds",
"start": "2015-07-17T12:00",
"display_name": "Toy Course"
},
"chapter/Overview": {
"display_name": "Overview"
},
"videosequence/Toy_Videos": {
"display_name": "Toy Videos",
"format": "Lecture Sequence"
},
"html/toylab": {
"display_name": "Toy lab"
},
"video/Video_Resources": {
"display_name": "Video Resources"
},
"video/Welcome": {
"display_name": "Welcome"
}
}
<course org="edX" course="toy" url_name="2012_Fall"/>
\ No newline at end of file
#!/usr/bin/env python
"""
Victor's xml cleanup script. A big pile of useful hacks. Do not use
without carefully reading the code and deciding that this is what you want.
In particular, the remove-meta option is only intended to be used after pulling out a policy
using the metadata_to_json management command.
"""
import os, fnmatch, re, sys
from lxml import etree
from collections import defaultdict
INVALID_CHARS = re.compile(r"[^\w.-]")
def clean(value):
"""
Return value, made into a form legal for locations
"""
return re.sub('_+', '_', INVALID_CHARS.sub('_', value))
# category -> set of url_names for that category that we've already seen
used_names = defaultdict(set)
def clean_unique(category, name):
cleaned = clean(name)
if cleaned not in used_names[category]:
used_names[category].add(cleaned)
return cleaned
x = 1
while cleaned + str(x) in used_names[category]:
x += 1
# Found one!
cleaned = cleaned + str(x)
used_names[category].add(cleaned)
return cleaned
def cleanup(filepath, remove_meta):
# Keys that are exported to the policy file, and so
# can be removed from the xml afterward
to_remove = ('format', 'display_name',
'graceperiod', 'showanswer', 'rerandomize',
'start', 'due', 'graded', 'hide_from_toc',
'ispublic', 'xqa_key')
try:
print "Cleaning {}".format(filepath)
with open(filepath) as f:
parser = etree.XMLParser(remove_comments=False)
xml = etree.parse(filepath, parser=parser)
except:
print "Error parsing file {}".format(filepath)
return
for node in xml.iter(tag=etree.Element):
attrs = node.attrib
if 'url_name' in attrs:
used_names[node.tag].add(attrs['url_name'])
if 'name' in attrs:
# Replace name with an identical display_name, and a unique url_name
name = attrs['name']
attrs['display_name'] = name
attrs['url_name'] = clean_unique(node.tag, name)
del attrs['name']
if 'url_name' in attrs and 'slug' in attrs:
print "WARNING: {} has both slug and url_name"
if ('url_name' in attrs and 'filename' in attrs and
len(attrs)==2 and attrs['url_name'] == attrs['filename']):
# This is a pointer tag in disguise. Get rid of the filename.
print 'turning {}.{} into a pointer tag'.format(node.tag, attrs['url_name'])
del attrs['filename']
if remove_meta:
for attr in to_remove:
if attr in attrs:
del attrs[attr]
with open(filepath, "w") as f:
f.write(etree.tostring(xml))
def find_replace(directory, filePattern, remove_meta):
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, filePattern):
filepath = os.path.join(path, filename)
cleanup(filepath, remove_meta)
def main(args):
usage = "xml_cleanup [dir] [remove-meta]"
n = len(args)
if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'):
print usage
return
remove_meta = False
if n == 2:
remove_meta = True
find_replace(args[0], '*.xml', remove_meta)
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -35,7 +35,7 @@ def manage_modulestores(request,reload_dir=None):
ip = request.META.get('HTTP_X_REAL_IP','') # nginx reverse proxy
if not ip:
ip = request.META.get('REMOTE_ADDR','None')
if LOCAL_DEBUG:
html += '<h3>IP address: %s ' % ip
html += '<h3>User: %s ' % request.user
......@@ -48,7 +48,7 @@ def manage_modulestores(request,reload_dir=None):
html += 'Permission denied'
html += "</body></html>"
log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
return HttpResponse(html)
return HttpResponse(html)
#----------------------------------------
# reload course if specified
......@@ -74,10 +74,10 @@ def manage_modulestores(request,reload_dir=None):
#----------------------------------------
dumpfields = ['definition','location','metadata']
for cdir, course in def_ms.courses.items():
html += '<hr width="100%"/>'
html += '<h2>Course: %s (%s)</h2>' % (course.metadata['display_name'],cdir)
html += '<h2>Course: %s (%s)</h2>' % (course.display_name,cdir)
for field in dumpfields:
data = getattr(course,field)
......@@ -89,7 +89,7 @@ def manage_modulestores(request,reload_dir=None):
html += '</ul>'
else:
html += '<ul><li>%s</li></ul>' % escape(data)
#----------------------------------------
......@@ -107,4 +107,4 @@ def manage_modulestores(request,reload_dir=None):
log.debug('def_ms=%s' % unicode(def_ms))
html += "</body></html>"
return HttpResponse(html)
return HttpResponse(html)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment