Two content_parser modules by accident

95842602 · Piotr Mitros · 97ebb750 · 97ebb750 · 95842602
Commit 95842602 authored Jan 20, 2012 by Piotr Mitros
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 121 deletions

courseware/capa/content_parser.py
+0 -117

courseware/content_parser.py
+17 -4

No files found.
--- a/courseware/capa/content_parser.py
+++ b/courseware/capa/content_parser.py
-try: 
-    from django.conf import settings
-    from auth.models import UserProfile
-except: 
-    settings = None 
-
-from xml.dom.minidom import parse, parseString
-
-from lxml import etree
-
-''' This file will eventually form an abstraction layer between the
-course XML file and the rest of the system. 
-
-TODO: Shift everything from xml.dom.minidom to XPath (or XQuery)
-'''
-
-def xpath(xml, query_string, **args):
-    ''' Safe xpath query into an xml tree:
-        * xml is the tree.
-        * query_string is the query
-        * args are the parameters. Substitute for {params}. 
-        We should remove this with the move to lxml. 
-        We should also use lxml argument passing. '''
-    doc = etree.fromstring(xml)
-    print type(doc)
-    def escape(x):
-        # TODO: This should escape the string. For now, we just assume it's made of valid characters. 
-        # Couldn't figure out how to escape for lxml in a few quick Googles
-        valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ "
-        for e in x:
-            if e not in valid_chars:
-                raise Exception("Invalid char in xpath expression. TODO: Escape")
-        return x
-
-    args=dict( ((k, escape(args[k])) for k in args) )
-    print args
-    results = doc.xpath(query_string.format(**args))
-    return results
-
-def xpath_remove(tree, path):
-    ''' Remove all items matching path from lxml tree.  Works in
-        place.'''
-    items = tree.xpath(path)
-    for item in items: 
-        item.getparent().remove(item)
-    return tree
-
-if __name__=='__main__':
-    print xpath('<html><problem name="Bob"></problem></html>', '/{search}/problem[@name="{name}"]', search='html', name="Bob")
-
-def item(l, default="", process=lambda x:x):
-    if len(l)==0:
-        return default
-    elif len(l)==1:
-        return process(l[0])
-    else:
-        raise Exception('Malformed XML')
-    
-
-def course_file(user):
-    # TODO: Cache. Also, return the libxml2 object. 
-    return settings.DATA_DIR+UserProfile.objects.get(user=user).courseware
-
-def module_xml(coursefile, module, id_tag, module_id):
-    ''' Get XML for a module based on module and module_id. Assumes
-        module occurs once in courseware XML file.. '''
-    doc = etree.parse(coursefile)
-
-    # Sanitize input
-    if not module.isalnum():
-        raise Exception("Module is not alphanumeric")
-    if not module_id.isalnum():
-        raise Exception("Module ID is not alphanumeric")
-    xpath_search='//*/{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module, 
-                                                           id_tag=id_tag,
-                                                           id=module_id)
-    #result_set=doc.xpathEval(xpath_search)
-    result_set=doc.xpath(xpath_search)
-    if len(result_set)>1:
-        print "WARNING: Potentially malformed course file", module, module_id
-    if len(result_set)==0:
-        return None
-    return etree.tostring(result_set[0])
-    #return result_set[0].serialize()
-
-def toc_from_xml(coursefile, active_chapter, active_section):
-    dom=parse(coursefile)
-
-    course = dom.getElementsByTagName('course')[0]
-    name=course.getAttribute("name")
-    chapters = course.getElementsByTagName('chapter')
-    ch=list()
-    for c in chapters:
-        if c.getAttribute("name") == 'hidden':
-            continue
-        sections=list()
-        for s in c.getElementsByTagName('section'):
-            sections.append({'name':s.getAttribute("name"), 
-                             'time':s.getAttribute("time"), 
-                             'format':s.getAttribute("format"), 
-                             'due':s.getAttribute("due"),
-                             'active':(c.getAttribute("name")==active_chapter and \
-                                           s.getAttribute("name")==active_section)})
-        ch.append({'name':c.getAttribute("name"), 
-                   'sections':sections,
-                   'active':(c.getAttribute("name")==active_chapter)})
-    return ch
-
-def dom_select(dom, element_type, element_name):
-    if dom==None:
-        return None
-    elements=dom.getElementsByTagName(element_type)
-    for e in elements:
-        if e.getAttribute("name")==element_name:
-            return e
-    return None
-
--- a/courseware/content_parser.py
+++ b/courseware/content_parser.py
-from django.conf import settings
+try: 
+    from django.conf import settings
+    from auth.models import UserProfile
+except: 
+    settings = None 
+
 from xml.dom.minidom import parse, parseString

 from lxml import etree

-from auth.models import UserProfile
-
 ''' This file will eventually form an abstraction layer between the
 course XML file and the rest of the system. 

@@ -15,7 +18,9 @@ def xpath(xml, query_string, **args):
    ''' Safe xpath query into an xml tree:
        * xml is the tree.
        * query_string is the query
-        * args are the parameters. Substitute for {params}. '''
+        * args are the parameters. Substitute for {params}. 
+        We should remove this with the move to lxml. 
+        We should also use lxml argument passing. '''
    doc = etree.fromstring(xml)
    print type(doc)
    def escape(x):
@@ -32,6 +37,14 @@ def xpath(xml, query_string, **args):
    results = doc.xpath(query_string.format(**args))
    return results

+def xpath_remove(tree, path):
+    ''' Remove all items matching path from lxml tree.  Works in
+        place.'''
+    items = tree.xpath(path)
+    for item in items: 
+        item.getparent().remove(item)
+    return tree
+
 if __name__=='__main__':
    print xpath('<html><problem name="Bob"></problem></html>', '/{search}/problem[@name="{name}"]', search='html', name="Bob")