Add chemcalc to capa package, to context for customresponse

d10b568c · Victor Shnayder · c30f5702 · d10b568c · d10b568c · d10b568c
Commit d10b568c authored Oct 08, 2012 by Victor Shnayder
Hide whitespace changes
Inline Side-by-side

Showing with 546 additions and 2 deletions

common/lib/capa/capa/capa_problem.py
+5 -2

common/lib/capa/capa/chem/__init__.py
+1 -0

common/lib/capa/capa/chem/chemcalc.py
+540 -0

No files found.
--- a/common/lib/capa/capa/capa_problem.py
+++ b/common/lib/capa/capa/capa_problem.py
@@ -30,6 +30,8 @@ import sys
 from lxml import etree
 from xml.sax.saxutils import unescape
+import chem
+import chem.chemcalc
 import calc
 from correctmap import CorrectMap
 import eia
@@ -72,7 +74,8 @@ global_context = {'random': random,
                  'math': math,
                  'scipy': scipy,
                  'calc': calc,
-                  'eia': eia}
+                  'eia': eia,
+                  'chemcalc': chem.chemcalc}
 # These should be removed from HTML output, including all subelements
 html_problem_semantics = ["codeparam", "responseparam", "answer", "script", "hintgroup"]
@@ -436,7 +439,7 @@ class LoncapaProblem(object):
            sys.path = original_path + self._extract_system_path(script)
            stype = script.get('type')
            if stype:
                if 'javascript' in stype:
                    continue    # skip javascript

--- a/common/lib/capa/capa/chem/__init__.py
+++ b/common/lib/capa/capa/chem/__init__.py
--- a/common/lib/capa/capa/chem/chemcalc.py
+++ b/common/lib/capa/capa/chem/chemcalc.py
+from __future__ import division
+import copy
+import logging
+import math
+import operator
+import re
+import unittest
+import numpy
+import numbers
+import scipy.constants
+from pyparsing import Literal, Keyword, Word, nums, StringEnd, Optional, Forward, OneOrMore
+from pyparsing import ParseException
+import nltk
+from nltk.tree import Tree
+local_debug = None
+def log(s, output_type=None):
+    if local_debug:
+        print s
+        if output_type == 'html':
+            f.write(s + '\n<br>\n')
+## Defines a simple pyparsing tokenizer for chemical equations
+elements = ['Ac','Ag','Al','Am','Ar','As','At','Au','B','Ba','Be',
+            'Bh','Bi','Bk','Br','C','Ca','Cd','Ce','Cf','Cl','Cm',
+            'Cn','Co','Cr','Cs','Cu','Db','Ds','Dy','Er','Es','Eu',
+            'F','Fe','Fl','Fm','Fr','Ga','Gd','Ge','H','He','Hf',
+            'Hg','Ho','Hs','I','In','Ir','K','Kr','La','Li','Lr',
+            'Lu','Lv','Md','Mg','Mn','Mo','Mt','N','Na','Nb','Nd',
+            'Ne','Ni','No','Np','O','Os','P','Pa','Pb','Pd','Pm',
+            'Po','Pr','Pt','Pu','Ra','Rb','Re','Rf','Rg','Rh','Rn',
+            'Ru','S','Sb','Sc','Se','Sg','Si','Sm','Sn','Sr','Ta',
+            'Tb','Tc','Te','Th','Ti','Tl','Tm','U','Uuo','Uup',
+            'Uus','Uut','V','W','Xe','Y','Yb','Zn','Zr']
+digits = map(str, range(10))
+symbols = list("[](){}^+-/")
+phases = ["(s)", "(l)", "(g)", "(aq)"]
+tokens = reduce(lambda a, b: a ^ b, map(Literal, elements + digits + symbols + phases))
+tokenizer = OneOrMore(tokens) + StringEnd()
+def orjoin(l):
+    return "'" + "' | '".join(l) + "'"
+## Defines an NLTK parser for tokenized equations
+grammar = """
+  S -> multimolecule | multimolecule '+' S
+  multimolecule -> count molecule | molecule
+  count -> number | number '/' number
+  molecule -> unphased | unphased phase
+  unphased -> group | paren_group_round | paren_group_square
+  element -> """ + orjoin(elements) + """
+  digit -> """ + orjoin(digits) + """
+  phase -> """ + orjoin(phases) + """
+  number -> digit | digit number
+  group -> suffixed | suffixed group
+  paren_group_round -> '(' group ')'
+  paren_group_square -> '[' group ']'
+  plus_minus -> '+' | '-'
+  number_suffix -> number
+  ion_suffix -> '^' number plus_minus | '^' plus_minus
+  suffix -> number_suffix | number_suffix ion_suffix | ion_suffix
+  unsuffixed -> element | paren_group_round | paren_group_square
+  suffixed -> unsuffixed | unsuffixed suffix
+"""
+parser = nltk.ChartParser(nltk.parse_cfg(grammar))
+def clean_parse_tree(tree):
+    ''' The parse tree contains a lot of redundant
+    nodes. E.g. paren_groups have groups as children, etc. This will
+    clean up the tree.
+    '''
+    def unparse_number(n):
+        ''' Go from a number parse tree to a number '''
+        if len(n) == 1:
+            rv = n[0][0]
+        else:
+            rv = n[0][0] + unparse_number(n[1])
+        return rv
+    def null_tag(n):
+        ''' Remove a tag '''
+        return n[0]
+    def ion_suffix(n):
+        '''1. "if" part handles special case
+           2. "else" part is general behaviour '''
+        if n[1:][0].node == 'number' and n[1:][0][0][0] == '1':
+            # if suffix is explicitly 1, like ^1-
+            # strip 1, leave only sign: ^-
+            return nltk.tree.Tree(n.node, n[2:])
+        else:
+            return nltk.tree.Tree(n.node, n[1:])
+    dispatch = {'number': lambda x: nltk.tree.Tree("number", [unparse_number(x)]),
+                'unphased': null_tag,
+                'unsuffixed': null_tag,
+                'number_suffix': lambda x: nltk.tree.Tree('number_suffix', [unparse_number(x[0])]),
+                'suffixed': lambda x: len(x) > 1 and x or x[0],
+                'ion_suffix': ion_suffix,
+                'paren_group_square': lambda x: nltk.tree.Tree(x.node, x[1]),
+                'paren_group_round': lambda x: nltk.tree.Tree(x.node, x[1])}
+    if type(tree) == str:
+        return tree
+    old_node = None
+    ## This loop means that if a node is processed, and returns a child,
+    ## the child will be processed.
+    while tree.node in dispatch and tree.node != old_node:
+        old_node = tree.node
+        tree = dispatch[tree.node](tree)
+    children = []
+    for child in tree:
+        child = clean_parse_tree(child)
+        children.append(child)
+    tree = nltk.tree.Tree(tree.node, children)
+    return tree
+def merge_children(tree, tags):
+    ''' nltk, by documentation, cannot do arbitrary length
+    groups. Instead of:
+    (group 1 2 3 4)
+    It has to handle this recursively:
+    (group 1 (group 2 (group 3 (group 4))))
+    We do the cleanup of converting from the latter to the former (as a
+    '''
+    if type(tree) == str:
+        return tree
+    merged_children = []
+    done = False
+    #print '00000', tree
+    ## Merge current tag
+    while not done:
+        done = True
+        for child in tree:
+            if type(child) == nltk.tree.Tree and child.node == tree.node and tree.node in tags:
+                merged_children = merged_children + list(child)
+                done = False
+            else:
+                merged_children = merged_children + [child]
+        tree = nltk.tree.Tree(tree.node, merged_children)
+        merged_children = []
+    #print '======',tree
+    # And recurse
+    children = []
+    for child in tree:
+        children.append(merge_children(child, tags))
+    #return tree
+    return nltk.tree.Tree(tree.node, children)
+def render_to_html(tree):
+    ''' Renders a cleaned tree to HTML '''
+    def molecule_count(tree, children):
+        # If an integer, return that integer
+        if len(tree) == 1:
+            return tree[0][0]
+        # If a fraction, return the fraction
+        if len(tree) == 3:
+            return " <sup>{num}</sup>&frasl;<sub>{den}</sub> ".format(num=tree[0][0], den=tree[2][0])
+        return "Error"
+    def subscript(tree, children):
+        return "<sub>{sub}</sub>".format(sub=children)
+    def superscript(tree, children):
+        return "<sup>{sup}</sup>".format(sup=children)
+    def round_brackets(tree, children):
+        return "({insider})".format(insider=children)
+    def square_brackets(tree, children):
+        return "[{insider}]".format(insider=children)
+    dispatch = {'count': molecule_count,
+                'number_suffix': subscript,
+                'ion_suffix': superscript,
+                'paren_group_round': round_brackets,
+                'paren_group_square': square_brackets}
+    if type(tree) == str:
+        return tree
+    else:
+        children = "".join(map(render_to_html, tree))
+        if tree.node in dispatch:
+            return dispatch[tree.node](tree, children)
+        else:
+            return children.replace(' ', '')
+def clean_and_render_to_html(s):
+    ''' render a string to html '''
+    status = render_to_html(get_finale_tree(s))
+    return status
+def get_finale_tree(s):
+    '''  return final tree after merge and clean  '''
+    tokenized = tokenizer.parseString(s)
+    parsed = parser.parse(tokenized)
+    merged = merge_children(parsed, {'S','group'})
+    final = clean_parse_tree(merged)
+    return final
+def check_equality(tuple1, tuple2):
+    ''' return True if tuples of multimolecules are equal '''
+    list1 = list(tuple1)
+    list2 = list(tuple2)
+    # Hypo: trees where are levels count+molecule vs just molecule
+    # cannot be sorted properly (tested on test_complex_additivity)
+    # But without factors and phases sorting seems to work.
+    # Also for lists of multimolecules without factors and phases
+    # sorting seems to work fine.
+    list1.sort()
+    list2.sort()
+    return list1 == list2
+def compare_chemical_expression(s1, s2, ignore_state=False):
+    ''' It does comparison between two equations.
+        It uses divide_chemical_expression and check if division is 1
+    '''
+    return divide_chemical_expression(s1, s2, ignore_state) == 1
+def divide_chemical_expression(s1, s2, ignore_state=False):
+    ''' Compare chemical equations for difference
+    in factors. Ideas:
+        - extract factors and phases to standalone lists,
+        - compare equations without factors and phases,
+        - divide lists of factors for each other and check
+             for equality of every element in list,
+        - return result of factor division '''
+    # parsed final trees
+    treedic = {}
+    treedic['1'] = get_finale_tree(s1)
+    treedic['2'] = get_finale_tree(s2)
+    # strip phases and factors
+    # collect factors in list
+    for i in ('1', '2'):
+        treedic[i + ' cleaned_mm_list'] = []
+        treedic[i + ' factors'] = []
+        treedic[i + ' phases'] = []
+        for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'):
+            count_subtree = [t for t in el.subtrees() if t.node == 'count']
+            group_subtree = [t for t in el.subtrees() if t.node == 'group']
+            phase_subtree = [t for t in el.subtrees() if t.node == 'phase']
+            if count_subtree:
+                if len(count_subtree[0]) > 1:
+                    treedic[i + ' factors'].append(
+                        int(count_subtree[0][0][0]) /
+                        int(count_subtree[0][2][0]))
+                else:
+                    treedic[i + ' factors'].append(int(count_subtree[0][0][0]))
+            else:
+                treedic[i + ' factors'].append(1.0)
+            if phase_subtree:
+                treedic[i + ' phases'].append(phase_subtree[0][0])
+            else:
+                treedic[i + ' phases'].append(' ')
+            treedic[i + ' cleaned_mm_list'].append(
+                Tree('multimolecule', [Tree('molecule', group_subtree)]))
+    # order of factors and phases must mirror the order of multimolecules,
+    # use 'decorate, sort, undecorate' pattern
+    treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'] = zip(
+        *sorted(zip(treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'])))
+    treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'] = zip(
+        *sorted(zip(treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'])))
+    # check if equations are correct without factors
+    if not check_equality(treedic['1 cleaned_mm_list'], treedic['2 cleaned_mm_list']):
+        return False
+    # phases are ruled by ingore_state flag
+    if not ignore_state:  # phases matters
+        if treedic['1 phases'] != treedic['2 phases']:
+            return False
+    if any(map(lambda x, y: x / y - treedic['1 factors'][0] / treedic['2 factors'][0],
+                                         treedic['1 factors'], treedic['2 factors'])):
+        log('factors are not proportional')
+        return False
+    else:  # return ratio
+        return int(max(treedic['1 factors'][0] / treedic['2 factors'][0],
+                    treedic['2 factors'][0] / treedic['1 factors'][0]))
+class Test_Compare_Equations(unittest.TestCase):
+        def test_compare_incorrect_order_of_atoms_in_molecule(self):
+            self.assertFalse(compare_chemical_expression("H2O + CO2", "O2C + OH2"))
+        def test_compare_same_order_no_phases_no_factors_no_ions(self):
+            self.assertTrue(compare_chemical_expression("H2O + CO2", "CO2+H2O"))
+        def test_compare_different_order_no_phases_no_factors_no_ions(self):
+            self.assertTrue(compare_chemical_expression("H2O + CO2", "CO2 + H2O"))
+        def test_compare_different_order_three_multimolecule(self):
+            self.assertTrue(compare_chemical_expression("H2O + Fe(OH)3 +  CO2", "CO2 + H2O + Fe(OH)3"))
+        def test_compare_same_factors(self):
+            self.assertTrue(compare_chemical_expression("3H2O +  2CO2", "2CO2 + 3H2O "))
+        def test_compare_different_factors(self):
+            self.assertFalse(compare_chemical_expression("2H2O +  3CO2", "2CO2 + 3H2O "))
+        def test_compare_correct_ions(self):
+            self.assertTrue(compare_chemical_expression("H^+ + OH^-", " OH^- + H^+ "))
+        def test_compare_wrong_ions(self):
+            self.assertFalse(compare_chemical_expression("H^+ + OH^-", " OH^- + H^- "))
+        def test_compare_parent_groups_ions(self):
+            self.assertTrue(compare_chemical_expression("Fe(OH)^2- + (OH)^-", " (OH)^- + Fe(OH)^2- "))
+        def test_compare_correct_factors_ions_and_one(self):
+            self.assertTrue(compare_chemical_expression("3H^+ + 2OH^-", " 2OH^- + 3H^+ "))
+        def test_compare_wrong_factors_ions(self):
+            self.assertFalse(compare_chemical_expression("2H^+ + 3OH^-", " 2OH^- + 3H^+ "))
+        def test_compare_float_factors(self):
+            self.assertTrue(compare_chemical_expression("7/2H^+ + 3/5OH^-", " 3/5OH^- + 7/2H^+ "))
+        # Phases tests
+        def test_compare_phases_ignored(self):
+            self.assertTrue(compare_chemical_expression(
+                "H2O(s) + CO2", "H2O+CO2", ignore_state=True))
+        def test_compare_phases_not_ignored_explicitly(self):
+            self.assertFalse(compare_chemical_expression(
+                "H2O(s) + CO2", "H2O+CO2", ignore_state=False))
+        def test_compare_phases_not_ignored(self):  # same as previous
+            self.assertFalse(compare_chemical_expression(
+                "H2O(s) + CO2", "H2O+CO2"))
+        def test_compare_phases_not_ignored_explicitly(self):
+            self.assertTrue(compare_chemical_expression(
+                "H2O(s) + CO2", "H2O(s)+CO2", ignore_state=False))
+        # all in one cases
+        def test_complex_additivity(self):
+            self.assertTrue(compare_chemical_expression(
+                "5(H1H212)^70010- + 2H20 + 7/2HCl + H2O",
+                "7/2HCl + 2H20 + H2O + 5(H1H212)^70010-"))
+        def test_complex_additivity_wrong(self):
+            self.assertFalse(compare_chemical_expression(
+                "5(H1H212)^70010- + 2H20 + 7/2HCl + H2O",
+                "2H20 + 7/2HCl + H2O + 5(H1H212)^70011-"))
+        def test_complex_all_grammar(self):
+            self.assertTrue(compare_chemical_expression(
+                "5[Ni(NH3)4]^2+ + 5/2SO4^2-",
+                "5/2SO4^2- + 5[Ni(NH3)4]^2+"))
+        # special cases
+        def test_compare_one_superscript_explicitly_set(self):
+            self.assertTrue(compare_chemical_expression("H^+ + OH^1-", " OH^- + H^+ "))
+        def test_compare_equal_factors_differently_set(self):
+            self.assertTrue(compare_chemical_expression("6/2H^+ + OH^-", " OH^- + 3H^+ "))
+        def test_compare_one_subscript_explicitly_set(self):
+            self.assertFalse(compare_chemical_expression("H2 + CO2", "H2 + C102"))
+class Test_Divide_Equations(unittest.TestCase):
+    ''' as compare_ use divide_,
+    tests here must consider different
+    division (not equality) cases '''
+    def test_divide_wrong_factors(self):
+        self.assertFalse(divide_chemical_expression(
+            "5(H1H212)^70010- + 10H2O", "5H2O + 10(H1H212)^70010-"))
+    def test_divide_right(self):
+        self.assertEqual(divide_chemical_expression(
+            "5(H1H212)^70010- + 10H2O", "10H2O + 5(H1H212)^70010-"), 1)
+    def test_divide_wrong_reagents(self):
+        self.assertFalse(divide_chemical_expression(
+            "H2O + CO2", "CO2"))
+    def test_divide_right_simple(self):
+        self.assertEqual(divide_chemical_expression(
+            "H2O + CO2", "H2O+CO2"), 1)
+    def test_divide_right_phases(self):
+        self.assertEqual(divide_chemical_expression(
+            "H2O(s) + CO2", "2H2O(s)+2CO2"), 2)
+    def test_divide_wrong_phases(self):
+        self.assertFalse(divide_chemical_expression(
+            "H2O(s) + CO2", "2H2O+2CO2(s)"))
+    def test_divide_wrong_phases_but_phases_ignored(self):
+        self.assertEqual(divide_chemical_expression(
+            "H2O(s) + CO2", "2H2O+2CO2(s)", ignore_state=True), 2)
+    def test_divide_order(self):
+        self.assertEqual(divide_chemical_expression(
+            "2CO2 + H2O", "2H2O+4CO2"), 2)
+    def test_divide_fract_to_int(self):
+        self.assertEqual(divide_chemical_expression(
+            "3/2CO2 + H2O", "2H2O+3CO2"), 2)
+    def test_divide_fract_to_frac(self):
+        self.assertEqual(divide_chemical_expression(
+            "3/4CO2 + H2O", "2H2O+9/6CO2"), 2)
+    def test_divide_fract_to_frac_wrog(self):
+        self.assertFalse(divide_chemical_expression(
+            "6/2CO2 + H2O", "2H2O+9/6CO2"), 2)
+class Test_Render_Equations(unittest.TestCase):
+        def test_render1(self):
+            s = "H2O + CO2"
+            out = clean_and_render_to_html(s)
+            correct = "H<sub>2</sub>O+CO<sub>2</sub>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render_uncorrect_reaction(self):
+            s = "O2C + OH2"
+            out = clean_and_render_to_html(s)
+            correct = "O<sub>2</sub>C+OH<sub>2</sub>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render2(self):
+            s = "CO2 + H2O + Fe(OH)3"
+            out = clean_and_render_to_html(s)
+            correct = "CO<sub>2</sub>+H<sub>2</sub>O+Fe(OH)<sub>3</sub>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render3(self):
+            s = "3H2O + 2CO2"
+            out = clean_and_render_to_html(s)
+            correct = "3H<sub>2</sub>O+2CO<sub>2</sub>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render4(self):
+            s = "H^+ + OH^-"
+            out = clean_and_render_to_html(s)
+            correct = "H<sup>+</sup>+OH<sup>-</sup>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render5(self):
+            s = "Fe(OH)^2- + (OH)^-"
+            out = clean_and_render_to_html(s)
+            correct = "Fe(OH)<sup>2-</sup>+(OH)<sup>-</sup>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render6(self):
+            s = "7/2H^+ + 3/5OH^-"
+            out = clean_and_render_to_html(s)
+            correct = "<sup>7</sup>&frasl;<sub>2</sub>H<sup>+</sup>+<sup>3</sup>&frasl;<sub>5</sub>OH<sup>-</sup>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render7(self):
+            s = "5(H1H212)^70010- + 2H2O + 7/2HCl + H2O"
+            out = clean_and_render_to_html(s)
+            correct = "5(H<sub>1</sub>H<sub>212</sub>)<sup>70010-</sup>+2H<sub>2</sub>O+<sup>7</sup>&frasl;<sub>2</sub>HCl+H<sub>2</sub>O"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render8(self):
+            s = "H2O(s) + CO2"
+            out = clean_and_render_to_html(s)
+            correct = "H<sub>2</sub>O(s)+CO<sub>2</sub>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render9(self):
+            s = "5[Ni(NH3)4]^2+ + 5/2SO4^2-"
+            #import ipdb; ipdb.set_trace()
+            out = clean_and_render_to_html(s)
+            correct = "5[Ni(NH<sub>3</sub>)<sub>4</sub>]<sup>2+</sup>+<sup>5</sup>&frasl;<sub>2</sub>SO<sub>4</sub><sup>2-</sup>"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+        def test_render_error(self):
+            s = "5.2H20"
+            self.assertRaises(ParseException, clean_and_render_to_html, s)
+        def test_render_simple_brackets(self):
+            s = "(Ar)"
+            out = clean_and_render_to_html(s)
+            correct = "(Ar)"
+            log(out + ' ------- ' + correct, 'html')
+            self.assertEqual(out, correct)
+def suite():
+    testcases = [Test_Compare_Equations, Test_Divide_Equations, Test_Render_Equations]
+    suites = []
+    for testcase in testcases:
+        suites.append(unittest.TestLoader().loadTestsFromTestCase(testcase))
+    return unittest.TestSuite(suites)
+if __name__ == "__main__":
+    local_debug = True
+    with open('render.html', 'w') as f:
+        unittest.TextTestRunner(verbosity=2).run(suite())
+    # open render.html to look at rendered equations