Commit 95842602 by Piotr Mitros

Two content_parser modules by accident

parent 97ebb750
from django.conf import settings
from auth.models import UserProfile
settings = None
from xml.dom.minidom import parse, parseString
from lxml import etree
''' This file will eventually form an abstraction layer between the
course XML file and the rest of the system.
TODO: Shift everything from xml.dom.minidom to XPath (or XQuery)
def xpath(xml, query_string, **args):
''' Safe xpath query into an xml tree:
* xml is the tree.
* query_string is the query
* args are the parameters. Substitute for {params}.
We should remove this with the move to lxml.
We should also use lxml argument passing. '''
doc = etree.fromstring(xml)
print type(doc)
def escape(x):
# TODO: This should escape the string. For now, we just assume it's made of valid characters.
# Couldn't figure out how to escape for lxml in a few quick Googles
valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ "
for e in x:
if e not in valid_chars:
raise Exception("Invalid char in xpath expression. TODO: Escape")
return x
args=dict( ((k, escape(args[k])) for k in args) )
print args
results = doc.xpath(query_string.format(**args))
return results
def xpath_remove(tree, path):
''' Remove all items matching path from lxml tree. Works in
items = tree.xpath(path)
for item in items:
return tree
if __name__=='__main__':
print xpath('<html><problem name="Bob"></problem></html>', '/{search}/problem[@name="{name}"]', search='html', name="Bob")
def item(l, default="", process=lambda x:x):
if len(l)==0:
return default
elif len(l)==1:
return process(l[0])
raise Exception('Malformed XML')
def course_file(user):
# TODO: Cache. Also, return the libxml2 object.
return settings.DATA_DIR+UserProfile.objects.get(user=user).courseware
def module_xml(coursefile, module, id_tag, module_id):
''' Get XML for a module based on module and module_id. Assumes
module occurs once in courseware XML file.. '''
doc = etree.parse(coursefile)
# Sanitize input
if not module.isalnum():
raise Exception("Module is not alphanumeric")
if not module_id.isalnum():
raise Exception("Module ID is not alphanumeric")
xpath_search='//*/{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module,
if len(result_set)>1:
print "WARNING: Potentially malformed course file", module, module_id
if len(result_set)==0:
return None
return etree.tostring(result_set[0])
#return result_set[0].serialize()
def toc_from_xml(coursefile, active_chapter, active_section):
course = dom.getElementsByTagName('course')[0]
chapters = course.getElementsByTagName('chapter')
for c in chapters:
if c.getAttribute("name") == 'hidden':
for s in c.getElementsByTagName('section'):
'active':(c.getAttribute("name")==active_chapter and \
return ch
def dom_select(dom, element_type, element_name):
if dom==None:
return None
for e in elements:
if e.getAttribute("name")==element_name:
return e
return None
from django.conf import settings
from django.conf import settings
from auth.models import UserProfile
settings = None
from xml.dom.minidom import parse, parseString
from lxml import etree
from auth.models import UserProfile
''' This file will eventually form an abstraction layer between the
course XML file and the rest of the system.
......@@ -15,7 +18,9 @@ def xpath(xml, query_string, **args):
''' Safe xpath query into an xml tree:
* xml is the tree.
* query_string is the query
* args are the parameters. Substitute for {params}. '''
* args are the parameters. Substitute for {params}.
We should remove this with the move to lxml.
We should also use lxml argument passing. '''
doc = etree.fromstring(xml)
print type(doc)
def escape(x):
......@@ -32,6 +37,14 @@ def xpath(xml, query_string, **args):
results = doc.xpath(query_string.format(**args))
return results
def xpath_remove(tree, path):
''' Remove all items matching path from lxml tree. Works in
items = tree.xpath(path)
for item in items:
return tree
if __name__=='__main__':
print xpath('<html><problem name="Bob"></problem></html>', '/{search}/problem[@name="{name}"]', search='html', name="Bob")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment