Merge pull request #429 from MITx/feature/victor/strip-metadata

Feature/victor/strip metadata

Merge pull request #429 from MITx/feature/victor/strip-metadata
Feature/victor/strip metadata
919b3e21 · Calen Pennington · 053d52e8 · 61478f4b · 919b3e21 · 919b3e21
Commit 919b3e21 authored Aug 16, 2012 by Calen Pennington
16 changed files
--- a/common/lib/xmodule/xmodule/backcompat_module.py
+++ b/common/lib/xmodule/xmodule/backcompat_module.py
@@ -21,6 +21,7 @@ def process_includes(fn):
        xml_object = etree.fromstring(xml_data)
        next_include = xml_object.find('include')
        while next_include is not None:
+            system.error_tracker("WARNING: the <include> tag is deprecated, and will go away.")
            file = next_include.get('file')
            parent = next_include.getparent()

@@ -67,6 +68,8 @@ class SemanticSectionDescriptor(XModuleDescriptor):
        the child element
        """
        xml_object = etree.fromstring(xml_data)
+        system.error_tracker("WARNING: the <{}> tag is deprecated.  Please do not use in new content."
+                             .format(xml_object.tag))

        if len(xml_object) == 1:
            for (key, val) in xml_object.items():
@@ -74,7 +77,7 @@ class SemanticSectionDescriptor(XModuleDescriptor):

            return system.process_xml(etree.tostring(xml_object[0]))
        else:
-            xml_object.tag = 'sequence'
+            xml_object.tag = 'sequential'
            return system.process_xml(etree.tostring(xml_object))


@@ -83,10 +86,14 @@ class TranslateCustomTagDescriptor(XModuleDescriptor):
    def from_xml(cls, xml_data, system, org=None, course=None):
        """
        Transforms the xml_data from <$custom_tag attr="" attr=""/> to
-        <customtag attr="" attr=""><impl>$custom_tag</impl></customtag>
+        <customtag attr="" attr="" impl="$custom_tag"/>
        """

        xml_object = etree.fromstring(xml_data)
+        system.error_tracker('WARNING: the <{tag}> tag is deprecated.  '
+                             'Instead, use <customtag impl="{tag}" attr1="..." attr2="..."/>. '
+                             .format(tag=xml_object.tag))
+
        tag = xml_object.tag
        xml_object.tag = 'customtag'
        xml_object.attrib['impl'] = tag

--- a/common/lib/xmodule/xmodule/capa_module.py
+++ b/common/lib/xmodule/xmodule/capa_module.py
@@ -237,7 +237,7 @@ class CapaModule(XModule):
            else:
                raise

-        content = {'name': self.metadata['display_name'],
+        content = {'name': self.display_name,
                   'html': html,
                   'weight': self.weight,
                   }
@@ -467,7 +467,7 @@ class CapaModule(XModule):
                return {'success': msg}
            log.exception("Error in capa_module problem checking")
            raise Exception("error in capa_module")
-        
+
        self.attempts = self.attempts + 1
        self.lcp.done = True


--- a/common/lib/xmodule/xmodule/course_module.py
+++ b/common/lib/xmodule/xmodule/course_module.py
@@ -140,7 +140,7 @@ class CourseDescriptor(SequenceDescriptor):

    @property
    def title(self):
-        return self.metadata['display_name']
+        return self.display_name

    @property
    def number(self):

--- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py
@@ -112,8 +112,8 @@ class TestMongoModuleStore(object):
        should_work = (
            ("i4x://edX/toy/video/Welcome",
             ("edX/toy/2012_Fall", "Overview", "Welcome", None)),
-            ("i4x://edX/toy/html/toylab",
-             ("edX/toy/2012_Fall", "Overview", "Toy_Videos", None)),
+            ("i4x://edX/toy/chapter/Overview",
+             ("edX/toy/2012_Fall", "Overview", None, None)),
            )
        for location, expected in should_work:
            assert_equals(path_to_location(self.store, location), expected)

--- a/common/lib/xmodule/xmodule/modulestore/xml.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml.py
@@ -31,7 +31,8 @@ def clean_out_mako_templating(xml_string):
    return xml_string

 class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
-    def __init__(self, xmlstore, org, course, course_dir, error_tracker, **kwargs):
+    def __init__(self, xmlstore, org, course, course_dir,
+                 policy, error_tracker, **kwargs):
        """
        A class that handles loading from xml.  Does some munging to ensure that
        all elements have unique slugs.
@@ -97,7 +98,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
        MakoDescriptorSystem.__init__(self, load_item, resources_fs,
                                      error_tracker, render_template, **kwargs)
        XMLParsingSystem.__init__(self, load_item, resources_fs,
-                                  error_tracker, process_xml, **kwargs)
+                                  error_tracker, process_xml, policy, **kwargs)


 class XMLModuleStore(ModuleStoreBase):
@@ -184,6 +185,7 @@ class XMLModuleStore(ModuleStoreBase):
        if not os.path.exists(policy_path):
            return {}
        try:
+            log.debug("Loading policy from {}".format(policy_path))
            with open(policy_path) as f:
                return json.load(f)
        except (IOError, ValueError) as err:
@@ -232,13 +234,16 @@ class XMLModuleStore(ModuleStoreBase):
                tracker(msg)
                course = course_dir

-            system = ImportSystem(self, org, course, course_dir, tracker)
+            url_name = course_data.get('url_name')
+            if url_name:
+                policy_path = self.data_dir / course_dir / 'policies' / '{}.json'.format(url_name)
+                policy = self.load_policy(policy_path, tracker)
+            else:
+                policy = {}

-            course_descriptor = system.process_xml(etree.tostring(course_data))
-            policy_path = self.data_dir / course_dir / 'policy.json'
+            system = ImportSystem(self, org, course, course_dir, policy, tracker)

-            policy = self.load_policy(policy_path, tracker)
-            XModuleDescriptor.apply_policy(course_descriptor, policy)
+            course_descriptor = system.process_xml(etree.tostring(course_data))

            # NOTE: The descriptors end up loading somewhat bottom up, which
            # breaks metadata inheritance via get_children().  Instead

--- a/common/lib/xmodule/xmodule/seq_module.py
+++ b/common/lib/xmodule/xmodule/seq_module.py
@@ -76,7 +76,7 @@ class SequenceModule(XModule):
            contents.append({
                'content': child.get_html(),
                'title': "\n".join(
-                    grand_child.metadata['display_name'].strip()
+                    grand_child.display_name.strip()
                    for grand_child in child.get_children()
                    if 'display_name'  in grand_child.metadata
                ),
@@ -107,7 +107,7 @@ class SequenceModule(XModule):
 class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor):
    mako_template = 'widgets/sequence-edit.html'
    module_class = SequenceModule
-    
+
    stores_state = True # For remembering where in the sequence the student is

    @classmethod

--- a/common/lib/xmodule/xmodule/tests/test_import.py
+++ b/common/lib/xmodule/xmodule/tests/test_import.py
@@ -42,9 +42,9 @@ class DummySystem(XMLParsingSystem):
            descriptor.get_children()
            return descriptor

-
+        policy = {}
        XMLParsingSystem.__init__(self, load_item, self.resources_fs,
-                                  self.errorlog.tracker, process_xml)
+                                  self.errorlog.tracker, process_xml, policy)

    def render_template(self, template, context):
            raise Exception("Shouldn't be called")

--- a/common/lib/xmodule/xmodule/x_module.py
+++ b/common/lib/xmodule/xmodule/x_module.py
@@ -426,23 +426,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet):


    @staticmethod
-    def apply_policy(node, policy):
-        """
-        Given a descriptor, traverse all its descendants and update its metadata
-        with the policy.
-
-        Notes:
-          - this does not propagate inherited metadata.  The caller should
-            call compute_inherited_metadata after applying the policy.
-          - metadata specified in the policy overrides metadata in the xml
-        """
-        k = policy_key(node.location)
-        if k in policy:
-            node.metadata.update(policy[k])
-        for c in node.get_children():
-            XModuleDescriptor.apply_policy(c, policy)
-
-    @staticmethod
    def compute_inherited_metadata(node):
        """Given a descriptor, traverse all of its descendants and do metadata
        inheritance.  Should be called on a CourseDescriptor after importing a
@@ -697,16 +680,19 @@ class DescriptorSystem(object):


 class XMLParsingSystem(DescriptorSystem):
-    def __init__(self, load_item, resources_fs, error_tracker, process_xml, **kwargs):
+    def __init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs):
        """
        load_item, resources_fs, error_tracker: see DescriptorSystem

+        policy: a policy dictionary for overriding xml metadata
+
        process_xml: Takes an xml string, and returns a XModuleDescriptor
            created from that xml
        """
        DescriptorSystem.__init__(self, load_item, resources_fs, error_tracker,
                                  **kwargs)
        self.process_xml = process_xml
+        self.policy = policy


 class ModuleSystem(object):

--- a/common/lib/xmodule/xmodule/xml_module.py
+++ b/common/lib/xmodule/xmodule/xml_module.py
-from xmodule.x_module import XModuleDescriptor
+from xmodule.x_module import (XModuleDescriptor, policy_key)
 from xmodule.modulestore import Location
 from lxml import etree
 import json
@@ -270,6 +270,11 @@ class XmlDescriptor(XModuleDescriptor):
                log.debug('Error %s in loading metadata %s' % (err,dmdata))
                metadata['definition_metadata_err'] = str(err)

+        # Set/override any metadata specified by policy
+        k = policy_key(location)
+        if k in system.policy:
+            metadata.update(system.policy[k])
+
        return cls(
            system,
            definition,

--- a/common/test/data/toy/course.xml
+++ b/common/test/data/toy/course.xml
-<course name="Toy Course" org="edX" course="toy" graceperiod="1 day 5 hours 59 minutes 59 seconds" slug="2012_Fall" start="2015-07-17T12:00">
-  <chapter name="Overview">
-    <videosequence format="Lecture Sequence" name="Toy Videos">
-      <html name="toylab" filename="toylab"/>
-      <video name="Video Resources" youtube="1.0:1bK-WdDi6Qw"/>
-    </videosequence>
-    <video name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
-  </chapter>
-</course>
--- a/common/test/data/toy/course.xml
+++ b/common/test/data/toy/course.xml
+roots/2012_Fall.xml
\ No newline at end of file
--- a/common/test/data/toy/course/2012_Fall.xml
+++ b/common/test/data/toy/course/2012_Fall.xml
+<course>
+  <chapter url_name="Overview">
+    <videosequence url_name="Toy_Videos">
+      <html url_name="toylab"/>
+      <video url_name="Video_Resources" youtube="1.0:1bK-WdDi6Qw"/>
+    </videosequence>
+    <video url_name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
+  </chapter>
+</course>
--- a/common/test/data/toy/policies/2012_Fall.json
+++ b/common/test/data/toy/policies/2012_Fall.json
+{
+    "course/2012_Fall": {
+        "graceperiod": "2 days 5 hours 59 minutes 59 seconds",
+        "start": "2015-07-17T12:00",
+        "display_name": "Toy Course"
+    },
+    "chapter/Overview": {
+        "display_name": "Overview"
+    },
+    "videosequence/Toy_Videos": {
+        "display_name": "Toy Videos",
+        "format": "Lecture Sequence"
+    },
+    "html/toylab": {
+        "display_name": "Toy lab"
+    },
+    "video/Video_Resources": {
+        "display_name": "Video Resources"
+    },
+    "video/Welcome": {
+        "display_name": "Welcome"
+    }
+}
--- a/common/test/data/toy/roots/2012_Fall.xml
+++ b/common/test/data/toy/roots/2012_Fall.xml
+<course org="edX" course="toy" url_name="2012_Fall"/>
\ No newline at end of file
--- a/common/xml_cleanup.py
+++ b/common/xml_cleanup.py
+#!/usr/bin/env python
+
+"""
+Victor's xml cleanup script.  A big pile of useful hacks.  Do not use
+without carefully reading the code and deciding that this is what you want.
+
+In particular, the remove-meta option is only intended to be used after pulling out a policy
+using the metadata_to_json management command.
+"""
+
+import os, fnmatch, re, sys
+from lxml import etree
+from collections import defaultdict
+
+INVALID_CHARS = re.compile(r"[^\w.-]")
+
+def clean(value):
+    """
+    Return value, made into a form legal for locations
+    """
+    return re.sub('_+', '_', INVALID_CHARS.sub('_', value))
+
+
+# category -> set of url_names for that category that we've already seen
+used_names = defaultdict(set)
+
+def clean_unique(category, name):
+    cleaned = clean(name)
+    if cleaned not in used_names[category]:
+        used_names[category].add(cleaned)
+        return cleaned
+    x = 1
+    while cleaned + str(x) in used_names[category]:
+        x += 1
+
+    # Found one!
+    cleaned = cleaned + str(x)
+    used_names[category].add(cleaned)
+    return cleaned
+
+def cleanup(filepath, remove_meta):
+    # Keys that are exported to the policy file, and so
+    # can be removed from the xml afterward
+    to_remove = ('format', 'display_name',
+                 'graceperiod', 'showanswer', 'rerandomize',
+                 'start', 'due', 'graded', 'hide_from_toc',
+                 'ispublic', 'xqa_key')
+
+    try:
+        print "Cleaning {}".format(filepath)
+        with open(filepath) as f:
+            parser = etree.XMLParser(remove_comments=False)
+            xml = etree.parse(filepath, parser=parser)
+    except:
+        print "Error parsing file {}".format(filepath)
+        return
+
+    for node in xml.iter(tag=etree.Element):
+        attrs = node.attrib
+        if 'url_name' in attrs:
+            used_names[node.tag].add(attrs['url_name'])
+        if 'name' in attrs:
+            # Replace name with an identical display_name, and a unique url_name
+            name = attrs['name']
+            attrs['display_name'] = name
+            attrs['url_name'] = clean_unique(node.tag, name)
+            del attrs['name']
+
+        if 'url_name' in attrs and 'slug' in attrs:
+            print "WARNING: {} has both slug and url_name"
+
+        if ('url_name' in attrs and 'filename' in attrs and
+            len(attrs)==2 and attrs['url_name'] == attrs['filename']):
+            # This is a pointer tag in disguise.  Get rid of the filename.
+            print 'turning {}.{} into a pointer tag'.format(node.tag, attrs['url_name'])
+            del attrs['filename']
+
+        if remove_meta:
+            for attr in to_remove:
+                if attr in attrs:
+                    del attrs[attr]
+
+
+    with open(filepath, "w") as f:
+        f.write(etree.tostring(xml))
+
+
+def find_replace(directory, filePattern, remove_meta):
+    for path, dirs, files in os.walk(os.path.abspath(directory)):
+        for filename in fnmatch.filter(files, filePattern):
+            filepath = os.path.join(path, filename)
+            cleanup(filepath, remove_meta)
+
+
+def main(args):
+    usage = "xml_cleanup [dir] [remove-meta]"
+    n = len(args)
+    if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'):
+        print usage
+        return
+
+    remove_meta = False
+    if n == 2:
+        remove_meta = True
+
+    find_replace(args[0], '*.xml', remove_meta)
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
+
+
--- a/lms/djangoapps/lms_migration/migrate.py
+++ b/lms/djangoapps/lms_migration/migrate.py
@@ -35,7 +35,7 @@ def manage_modulestores(request,reload_dir=None):
    ip = request.META.get('HTTP_X_REAL_IP','')	# nginx reverse proxy
    if not ip:
        ip = request.META.get('REMOTE_ADDR','None')
-    
+
    if LOCAL_DEBUG:
        html += '<h3>IP address: %s ' % ip
        html += '<h3>User: %s ' % request.user
@@ -48,7 +48,7 @@ def manage_modulestores(request,reload_dir=None):
            html += 'Permission denied'
            html += "</body></html>"
            log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
-            return HttpResponse(html)    
+            return HttpResponse(html)

    #----------------------------------------
    # reload course if specified
@@ -74,10 +74,10 @@ def manage_modulestores(request,reload_dir=None):
    #----------------------------------------

    dumpfields = ['definition','location','metadata']
-    
+
    for cdir, course in def_ms.courses.items():
        html += '<hr width="100%"/>'
-        html += '<h2>Course: %s (%s)</h2>' % (course.metadata['display_name'],cdir)
+        html += '<h2>Course: %s (%s)</h2>' % (course.display_name,cdir)

        for field in dumpfields:
            data = getattr(course,field)
@@ -89,7 +89,7 @@ def manage_modulestores(request,reload_dir=None):
                html += '</ul>'
            else:
                html += '<ul><li>%s</li></ul>' % escape(data)
-        
+

    #----------------------------------------

@@ -107,4 +107,4 @@ def manage_modulestores(request,reload_dir=None):
    log.debug('def_ms=%s' % unicode(def_ms))

    html += "</body></html>"
-    return HttpResponse(html)    
+    return HttpResponse(html)