Major refactor of infrastructure for grading problems

--HG-- rename : courseware/calc.py => courseware/capa/calc.py

Major refactor of infrastructure for grading problems
--HG-- rename : courseware/calc.py => courseware/capa/calc.py
18dc8cc5 · Piotr Mitros · e58aca28 · 18dc8cc5 · 18dc8cc5 · 18dc8cc5
Commit 18dc8cc5 authored Jan 10, 2012 by Piotr Mitros
10 changed files
--- a/courseware/capa/__init__.py
+++ b/courseware/capa/__init__.py
--- a/courseware/calc.py
+++ b/courseware/calc.py
--- a/courseware/capa/capa_problem.py
+++ b/courseware/capa/capa_problem.py
+import random, numpy, math, scipy
+import struct, os
+import re
+from lxml import etree
+from lxml.etree import Element
+import copy
+from mako.template import Template
+from content_parser import xpath_remove
+from util import contextualize_text
+from inputtypes import textline, schematic
+from responsetypes import numericalresponse, formularesponse
+response_types = {'numericalresponse':numericalresponse, 
+                  'formularesponse':formularesponse,
+                  'customresponse':None}
+entry_types = ['textline', 'schematic']
+response_properties = ["responseparam", "answer"]
+# How to convert from original XML to HTML
+# We should do this with xlst later
+html_transforms = {'problem': {'tag':'div'},
+                   "numericalresponse": {'tag':'span'}, 
+                   "customresponse": {'tag':'span'}, 
+                   "formularesponse": {'tag':'span'}, 
+                   "text": {'tag':'span'}}
+global_context={'random':random,
+                'numpy':numpy,
+                'math':math,
+                'scipy':scipy}
+# These should be removed from HTML output, including all subelements
+html_problem_semantics = ["responseparam", "answer", "script"]
+# These should be removed from HTML output, but keeping subelements
+html_skip = ["numericalresponse", "customresponse", "formularesponse", "text"]
+# These should be transformed
+html_special_response = {"textline":textline.render,
+                         "schematic":schematic.render}
+class LoncapaProblem(object):
+    def __init__(self, filename, id=None, state=None):
+        ## Initialize class variables from state
+        self.seed = None
+        self.student_answers = dict()
+        self.correct_map = dict()
+        self.done = False
+        self.filename = filename
+        if id!=None:
+            self.problem_id = id
+        else:
+            self.problem_id = filename
+        if state!=None:
+            if 'seed' in state:
+                self.seed = state['seed']
+            if 'student_answers' in state:
+                self.student_answers = state['student_answers']
+            if 'correct_map' in state:
+                self.correct_map = state['correct_map']
+            if 'done' in state:
+                self.done = state['done']
+        # TODO: Does this deplete the Linux entropy pool? Is this fast enough?
+        if self.seed == None:
+            self.seed=struct.unpack('i', os.urandom(4))[0]
+        ## Parse XML file
+        file_text = open(filename).read()
+        # Convert startouttext and endouttext to proper <text></text>
+        # TODO: Do with XML operations
+        file_text = re.sub("startouttext\s*/","text",file_text)
+        file_text = re.sub("endouttext\s*/","/text",file_text)
+        self.tree = etree.XML(file_text)
+        self.preprocess_problem(self.tree, correct_map=self.correct_map, answer_map = self.student_answers)
+        self.context = self.extract_context(self.tree, seed=self.seed)
+    def get_state(self):
+        ''' Stored per-user session data neeeded to: 
+            1) Recreate the problem
+            2) Populate any student answers. '''
+        return {'seed':self.seed, 
+                'student_answers':self.student_answers,
+                'correct_map':self.correct_map, 
+                'done':self.done}
+    def get_max_score(self):
+        sum = 0 
+        for et in entry_types: 
+            sum = sum + self.tree.xpath('count(//'+et+')')
+        return int(sum)
+    def get_score(self):
+        correct=0
+        for key in self.correct_map:
+            if self.correct_map[key] == u'correct':
+                correct += 1
+        if self.student_answers == None or len(self.student_answers)==0:
+            return {'score':0,
+                    'total':self.get_max_score()}
+        else:
+            return {'score':correct,
+                    'total':self.get_max_score()}
+    def grade_answers(self, answers):
+        self.student_answers = answers
+        context=self.extract_context(self.tree)
+        self.correct_map = dict()
+        problems_simple = self.extract_problems(self.tree)
+        for response in problems_simple:
+            grader = response_types[response.tag](response, self.context)
+            results = grader.grade(answers)
+            self.correct_map.update(results)
+        return self.correct_map
+    def get_question_answers(self):
+        context=self.extract_context(self.tree)
+        answer_map = dict()
+        problems_simple = self.extract_problems(self.tree)
+        for response in problems_simple:
+            responder = response_types[response.tag](response, self.context)
+            results = responder.get_answers()
+            answer_map.update(results)
+        return answer_map
+    # ======= Private ========
+    def extract_context(self, tree, seed = struct.unpack('i', os.urandom(4))[0]):  # private
+        ''' Problem XML goes to Python execution context. Runs everything in script tags '''
+        random.seed(self.seed)
+        context = dict()
+        for script in tree.xpath('/problem/script'):
+            exec script.text in global_context, context
+        return context
+    def get_html(self):
+        return contextualize_text(etree.tostring(self.extract_html(self.tree)[0]), self.context)
+    def extract_html(self, problemtree):  # private
+        ''' Helper function for get_html. Recursively converts XML tree to HTML
+        '''
+        if problemtree.tag in html_problem_semantics:
+            return
+        if problemtree.tag in html_special_response:
+            status = "unsubmitted"
+            if problemtree.get('id') in self.correct_map:
+                status = self.correct_map[problemtree.get('id')]
+            value = ""
+            if self.student_answers != None and problemtree.get('id') in self.student_answers:
+                value = self.student_answers[problemtree.get('id')]
+            return html_special_response[problemtree.tag](problemtree, value, status) #TODO
+        tree=Element(problemtree.tag)
+        for item in problemtree:
+            subitems = self.extract_html(item)
+            if subitems != None: 
+                for subitem in subitems:
+                    tree.append(subitem)
+        for (key,value) in problemtree.items():
+            tree.set(key, value)
+        tree.text=problemtree.text
+        tree.tail=problemtree.tail
+        if problemtree.tag in html_transforms:
+            tree.tag=html_transforms[problemtree.tag]['tag']
+        # TODO: Fix. This loses Element().tail
+        #if problemtree.tag in html_skip:
+        #    return tree
+        return [tree]
+    def preprocess_problem(self, tree, correct_map=dict(), answer_map=dict()): # private
+        ''' Assign IDs to all the responses 
+        Assign sub-IDs to all entries (textline, schematic, etc.)
+        Annoted correctness and value
+        In-place transformation
+        '''
+        response_id = 1
+        for response in tree.xpath('//'+"|//".join(response_types)):
+            response_id_str=self.problem_id+"_"+str(response_id)
+            response.attrib['id']=response_id_str
+            if response_id not in correct_map:
+                correct = 'unsubmitted'
+            response.attrib['state'] = correct
+            response_id = response_id + 1
+            answer_id = 1
+            for entry in tree.xpath("|".join(['//'+response.tag+'[@id=$id]//'+x for x in entry_types]), 
+                                    id=response_id_str):
+                entry.attrib['response_id'] = str(response_id)
+                entry.attrib['answer_id'] = str(answer_id)
+                entry.attrib['id'] = "%s_%i_%i"%(self.problem_id, response_id, answer_id)
+                answer_id=answer_id+1
+    def extract_problems(self, problem_tree):
+        ''' Remove layout from the problem, and give a purified XML tree of just the problems '''
+        problem_tree=copy.deepcopy(problem_tree)
+        tree=Element('problem')
+        for response in problem_tree.xpath("//"+"|//".join(response_types)):
+            newresponse = copy.copy(response)
+            for e in newresponse: 
+                newresponse.remove(e)
+            # copy.copy is needed to make xpath work right. Otherwise, it starts at the root
+            # of the tree. We should figure out if there's some work-around
+            for e in copy.copy(response).xpath("//"+"|//".join(response_properties+entry_types)):
+                newresponse.append(e)
+            tree.append(newresponse)
+        return tree
+if __name__=='__main__':
+    problem_id='simpleFormula'
+    filename = 'simpleFormula.xml'
+    problem_id='resistor'
+    filename = 'resistor.xml'
+    lcp = LoncapaProblem(filename, problem_id)
+    context = lcp.extract_context(lcp.tree)
+    problem = lcp.extract_problems(lcp.tree)
+    print lcp.grade_problems({'resistor_2_1':'1.0','resistor_3_1':'2.0'})
+    #print lcp.grade_problems({'simpleFormula_2_1':'3*x^3'})
+#numericalresponse(problem, context)
+#print etree.tostring((lcp.tree))
+    print '============'
+    print
+#print etree.tostring(lcp.extract_problems(lcp.tree))
+    print lcp.get_html()
+#print extract_context(tree)
+    # def handle_fr(self, element):
+    #     problem={"answer":self.contextualize_text(answer),
+    #              "type":"formularesponse",
+    #              "tolerance":evaluator({},{},self.contextualize_text(tolerance)),
+    #              "sample_range":dict(zip(variables, sranges)),
+    #              "samples_count": numsamples,
+    #              "id":id,
+    #     self.questions[self.lid]=problem        
--- a/courseware/capa/content_parser.py
+++ b/courseware/capa/content_parser.py
+try: 
+    from django.conf import settings
+    from auth.models import UserProfile
+except: 
+    settings = None 
+from xml.dom.minidom import parse, parseString
+from lxml import etree
+''' This file will eventually form an abstraction layer between the
+course XML file and the rest of the system. 
+TODO: Shift everything from xml.dom.minidom to XPath (or XQuery)
+'''
+def xpath(xml, query_string, **args):
+    ''' Safe xpath query into an xml tree:
+        * xml is the tree.
+        * query_string is the query
+        * args are the parameters. Substitute for {params}. 
+        We should remove this with the move to lxml. 
+        We should also use lxml argument passing. '''
+    doc = etree.fromstring(xml)
+    print type(doc)
+    def escape(x):
+        # TODO: This should escape the string. For now, we just assume it's made of valid characters. 
+        # Couldn't figure out how to escape for lxml in a few quick Googles
+        valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ "
+        for e in x:
+            if e not in valid_chars:
+                raise Exception("Invalid char in xpath expression. TODO: Escape")
+        return x
+    args=dict( ((k, escape(args[k])) for k in args) )
+    print args
+    results = doc.xpath(query_string.format(**args))
+    return results
+def xpath_remove(tree, path):
+    ''' Remove all items matching path from lxml tree.  Works in
+        place.'''
+    items = tree.xpath(path)
+    for item in items: 
+        item.getparent().remove(item)
+    return tree
+if __name__=='__main__':
+    print xpath('<html><problem name="Bob"></problem></html>', '/{search}/problem[@name="{name}"]', search='html', name="Bob")
+def item(l, default="", process=lambda x:x):
+    if len(l)==0:
+        return default
+    elif len(l)==1:
+        return process(l[0])
+    else:
+        raise Exception('Malformed XML')
+def course_file(user):
+    # TODO: Cache. Also, return the libxml2 object. 
+    return settings.DATA_DIR+UserProfile.objects.get(user=user).courseware
+def module_xml(coursefile, module, id_tag, module_id):
+    ''' Get XML for a module based on module and module_id. Assumes
+        module occurs once in courseware XML file.. '''
+    doc = etree.parse(coursefile)
+    # Sanitize input
+    if not module.isalnum():
+        raise Exception("Module is not alphanumeric")
+    if not module_id.isalnum():
+        raise Exception("Module ID is not alphanumeric")
+    xpath_search='//*/{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module, 
+                                                           id_tag=id_tag,
+                                                           id=module_id)
+    #result_set=doc.xpathEval(xpath_search)
+    result_set=doc.xpath(xpath_search)
+    if len(result_set)>1:
+        print "WARNING: Potentially malformed course file", module, module_id
+    if len(result_set)==0:
+        return None
+    return etree.tostring(result_set[0])
+    #return result_set[0].serialize()
+def toc_from_xml(coursefile, active_chapter, active_section):
+    dom=parse(coursefile)
+    course = dom.getElementsByTagName('course')[0]
+    name=course.getAttribute("name")
+    chapters = course.getElementsByTagName('chapter')
+    ch=list()
+    for c in chapters:
+        if c.getAttribute("name") == 'hidden':
+            continue
+        sections=list()
+        for s in c.getElementsByTagName('section'):
+            sections.append({'name':s.getAttribute("name"), 
+                             'time':s.getAttribute("time"), 
+                             'format':s.getAttribute("format"), 
+                             'due':s.getAttribute("due"),
+                             'active':(c.getAttribute("name")==active_chapter and \
+                                           s.getAttribute("name")==active_section)})
+        ch.append({'name':c.getAttribute("name"), 
+                   'sections':sections,
+                   'active':(c.getAttribute("name")==active_chapter)})
+    return ch
+def dom_select(dom, element_type, element_name):
+    if dom==None:
+        return None
+    elements=dom.getElementsByTagName(element_type)
+    for e in elements:
+        if e.getAttribute("name")==element_name:
+            return e
+    return None
--- a/courseware/capa/inputtypes.py
+++ b/courseware/capa/inputtypes.py
+from djangomako.shortcuts import render_to_response, render_to_string
+from lxml.etree import Element
+from lxml import etree
+class textline(object):
+    @staticmethod
+    def render(element, value, state):
+        eid=element.get('id')
+        context = {'id':eid, 'value':value, 'state':state}
+        html=render_to_string("textinput.html", context)
+        return etree.XML(html)
+class schematic(object):
+    @staticmethod
+    def render(element, value, state):
+        eid = element.get('id')
+        height = element.get('height')
+        width = element.get('width')
+        context = {'id':eid, 'value':value, 'state':state, 'width':width, 'height':height}
+        html=render_to_string("schematicinput.html", context)
+        return etree.XML(html)
--- a/courseware/capa/responsetypes.py
+++ b/courseware/capa/responsetypes.py
+from util import contextualize_text
+from calc import evaluator
+import random, math
+class numericalresponse(object):
+    def __init__(self, xml, context):
+        self.xml = xml
+        self.correct_answer = contextualize_text(xml.get('answer'), context)
+        self.correct_answer = float(self.correct_answer)
+        self.tolerance = xml.xpath('//*[@id=$id]//responseparam[@type="tolerance"]/@default',
+                                   id=xml.get('id'))[0]
+        self.tolerance = contextualize_text(self.tolerance, context)
+        self.tolerance = evaluator(dict(),dict(),self.tolerance)
+        self.answer_id = xml.xpath('//*[@id=$id]//textline/@id',
+                                   id=xml.get('id'))[0]
+    def grade(self, student_answers):
+        ''' Display HTML for a numeric response '''
+        student_answer = student_answers[self.answer_id]
+        error = abs(evaluator(dict(),dict(),student_answer) - self.correct_answer)
+        allowed_error = abs(self.correct_answer*self.tolerance)
+        if error <= allowed_error:
+            return {self.answer_id:'correct'}
+        else:
+            return {self.answer_id:'incorrect'}
+    def get_answers(self):
+        return {self.answer_id:self.correct_answer}
+class customresponse(object):
+    def __init__(self, xml, context):
+        self.xml = xml
+        self.answer_id = xml.xpath('//*[@id=$id]//textline/@id',
+                                   id=xml.get('id'))[0]
+        return {self.answer_id:'correct'}
+    def grade(self, student_answers):
+        return {self.answer_id:'correct'}
+    def get_answers(self):
+        return {self.answer_id:'correct'}
+class formularesponse(object):
+    def __init__(self, xml, context):
+        self.xml = xml
+        self.correct_answer = contextualize_text(xml.get('answer'), context)
+        self.samples = contextualize_text(xml.get('samples'), context)
+        self.tolerance = xml.xpath('//*[@id=$id]//responseparam[@type="tolerance"]/@default',
+                                   id=xml.get('id'))[0]
+        self.tolerance = contextualize_text(self.tolerance, context)
+        self.tolerance = evaluator(dict(),dict(),self.tolerance)
+        self.answer_id = xml.xpath('//*[@id=$id]//textline/@id',
+                                   id=xml.get('id'))[0]
+        self.context = context
+    def grade(self, student_answers):
+        variables=self.samples.split('@')[0].split(',')
+        numsamples=int(self.samples.split('@')[1].split('#')[1])
+        sranges=zip(*map(lambda x:map(float, x.split(",")), 
+                         self.samples.split('@')[1].split('#')[0].split(':')))
+        ranges=dict(zip(variables, sranges))
+        correct = True
+        for i in range(numsamples):
+            instructor_variables = self.strip_dict(dict(self.context))
+            student_variables = dict()
+            for var in ranges:
+                value = random.uniform(*ranges[var])
+                instructor_variables[str(var)] = value
+                student_variables[str(var)] = value
+            instructor_result = evaluator(instructor_variables,dict(),self.correct_answer)
+            student_result = evaluator(student_variables,dict(),student_answers[self.answer_id])
+            if math.isnan(student_result) or math.isinf(student_result):
+                return {self.answer_id:"incorrect"}
+            if abs( student_result - instructor_result ) > self.tolerance:
+                return {self.answer_id:"incorrect"}
+        return {self.answer_id:"correct"}
+    def strip_dict(self, d):
+        ''' Takes a dict. Returns an identical dict, with all non-word
+        keys and all non-numeric values stripped out. All values also
+        converted to float. Used so we can safely use Python contexts.
+        ''' 
+        d=dict([(k, float(d[k])) for k in d if type(k)==str and \
+                    k.isalnum() and \
+                    (type(d[k]) == float or type(d[k]) == int) ])
+        return d
+    def get_answers(self):
+        return {self.answer_id:self.correct_answer}
--- a/courseware/capa/util.py
+++ b/courseware/capa/util.py
+def contextualize_text(text, context): # private
+    ''' Takes a string with variables. E.g. $a+$b. 
+    Does a substitution of those variables from the context '''
+    for key in sorted(context, lambda x,y:cmp(len(y),len(x))):
+        text=text.replace('$'+key, str(context[key]))
+    return text
--- a/courseware/capa_module.py
+++ b/courseware/capa_module.py
 import random, numpy, math, scipy, sys, StringIO, os, struct, json
 from x_module import XModule
+import sys
-from capa_problem import LoncapaProblem
+from capa.capa_problem import LoncapaProblem
 from django.http import Http404
 import dateutil
@@ -34,7 +35,7 @@ class LoncapaModule(XModule):
        return self.lcp.get_score()
    def max_score(self):
-        return len(self.lcp.questions)
+        return self.lcp.get_max_score()
    def get_html(self):
        return render_to_string('problem_ajax.html', 
@@ -162,7 +163,6 @@ class LoncapaModule(XModule):
            return json.dumps({"error":"Past due date"})
        elif dispatch=='problem_check': 
            response = self.check_problem(get)
-            print response
        elif dispatch=='problem_reset':
            response = self.reset_problem(get)
        elif dispatch=='problem_save':
@@ -238,16 +238,12 @@ class LoncapaModule(XModule):
            answers['_'.join(key.split('_')[1:])]=get[key]
        try:
-            print "A"
+            old_state = self.lcp.get_state()
-            ocm = self.lcp.correct_map
+            lcp_id = self.lcp.problem_id
-            print "."
+            filename = self.lcp.filename
-            oa = self.lcp.answers
-            print "."
            correct_map = self.lcp.grade_answers(answers)
-            print "."
        except: 
-            self.lcp.correct_map = ocm # HACK: Reset state
+            self.lcp = LoncapaProblem(filename, id=lcp_id, state=old_state)
-            self.lcp.answers = oa
            return json.dumps({'success':'syntax'})
        self.attempts = self.attempts + 1
@@ -279,7 +275,7 @@ class LoncapaModule(XModule):
        for key in get:
            answers['_'.join(key.split('_')[1:])]=get[key]
-        self.lcp.answers=answers
+        self.lcp.student_answers=answers
        return json.dumps({'success':True})

--- a/courseware/capa_problem.py
+++ b/courseware/capa_problem.py
--- a/util/views.py
+++ b/util/views.py
@@ -6,7 +6,7 @@ import json
 from django.conf import settings
 from django.core.context_processors import csrf
 from django.http import Http404
-import courseware.calc
+import courseware.capa.calc
 from django.core.mail import send_mail
 from django.conf import settings
 import datetime