Merge pull request #726 from edx/peterb/numerical/evaluate-staff

Use calc's evaluator for staff answers in `Numercial` type

Merge pull request #726 from edx/peterb/numerical/evaluate-staff
Use calc's evaluator for staff answers in `Numercial` type
8211600b · Peter Baratta · f14e9302 · e0f0ab09 · 8211600b · 8211600b
Commit 8211600b authored Aug 20, 2013 by Peter Baratta
Hide whitespace changes
Inline Side-by-side

Showing with 185 additions and 185 deletions

CHANGELOG.rst
+4 -0

common/lib/capa/capa/responsetypes.py
+90 -37

common/lib/capa/capa/tests/test_responsetypes.py
+91 -148

No files found.
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,10 @@ the top.  Include a label indicating the component affected.
 Blades: Took videoalpha out of alpha, replacing the old video player
+Common: Allow instructors to input complicated expressions as answers to
+`NumericalResponse`s. Prior to the change only numbers were allowed, now any
+answer from '1/3' to 'sqrt(12)*(1-1/3^2+1/5/3^2)' are valid.
 LMS: Enable beta instructor dashboard. The beta dashboard is a rearchitecture
 of the existing instructor dashboard and is available by clicking a link at
 the top right of the existing dashboard.

--- a/common/lib/capa/capa/responsetypes.py
+++ b/common/lib/capa/capa/responsetypes.py
@@ -17,6 +17,7 @@ import logging
 import numbers
 import numpy
 import os
+from pyparsing import ParseException
 import sys
 import random
 import re
@@ -826,45 +827,89 @@ class NumericalResponse(LoncapaResponse):
    required_attributes = ['answer']
    max_inputfields = 1
+    def __init__(self, *args, **kwargs):
+        self.correct_answer = ''
+        self.tolerance = '0'  # Default value
+        super(NumericalResponse, self).__init__(*args, **kwargs)
    def setup_response(self):
        xml = self.xml
        context = self.context
        self.correct_answer = contextualize_text(xml.get('answer'), context)
+        # Find the tolerance
+        tolerance_xml = xml.xpath(
+            '//*[@id=$id]//responseparam[@type="tolerance"]/@default',
+            id=xml.get('id')
+        )
+        if tolerance_xml:  # If it isn't an empty list...
+            self.tolerance = contextualize_text(tolerance_xml[0], context)
+    def get_staff_ans(self):
+        """
+        Given the staff answer as a string, find its float value.
+        Use `evaluator` for this, but for backward compatability, try the
+        built-in method `complex` (which used to be the standard).
+        """
        try:
-            self.tolerance_xml = xml.xpath(
+            correct_ans = complex(self.correct_answer)
-                '//*[@id=$id]//responseparam[@type="tolerance"]/@default',
+        except ValueError:
-                id=xml.get('id'))[0]
+            # When `correct_answer` is not of the form X+Yj, it raises a
-            self.tolerance = contextualize_text(self.tolerance_xml, context)
+            # `ValueError`. Then test if instead it is a math expression.
-        except IndexError:  # xpath found an empty list, so (...)[0] is the error
+            # `complex` seems to only generate `ValueErrors`, only catch these.
-            self.tolerance = '0'
+            try:
+                correct_ans = evaluator({}, {}, self.correct_answer)
+            except Exception:
+                log.debug("Content error--answer '%s' is not a valid number", self.correct_answer)
+                raise StudentInputError(
+                    "There was a problem with the staff answer to this problem"
+                )
+        return correct_ans
    def get_score(self, student_answers):
        '''Grade a numeric response '''
        student_answer = student_answers[self.answer_id]
-        try:
+        correct_float = self.get_staff_ans()
-            correct_ans = complex(self.correct_answer)
-        except ValueError:
-            log.debug("Content error--answer '{0}' is not a valid complex number".format(
-                self.correct_answer))
-            raise StudentInputError(
-                "There was a problem with the staff answer to this problem")
-        try:
+        general_exception = StudentInputError(
-            correct = compare_with_tolerance(
+            u"Could not interpret '{0}' as a number".format(cgi.escape(student_answer))
-                evaluator(dict(), dict(), student_answer),
+        )
-                correct_ans, self.tolerance)
-        # We should catch this explicitly.
-        # I think this is just pyparsing.ParseException, calc.UndefinedVariable:
-        # But we'd need to confirm
-        except:
-            # Use the traceback-preserving version of re-raising with a
-            # different type
-            type, value, traceback = sys.exc_info()
-            raise StudentInputError, ("Could not interpret '%s' as a number" %
+        # Begin `evaluator` block
-                                      cgi.escape(student_answer)), traceback
+        # Catch a bunch of exceptions and give nicer messages to the student.
+        try:
+            student_float = evaluator({}, {}, student_answer)
+        except UndefinedVariable as undef_var:
+            raise StudentInputError(
+                u"You may not use variables ({0}) in numerical problems".format(undef_var.message)
+            )
+        except ValueError as val_err:
+            if 'factorial' in val_err.message:
+                # This is thrown when fact() or factorial() is used in an answer
+                #   that evaluates on negative and/or non-integer inputs
+                # ve.message will be: `factorial() only accepts integral values` or
+                # `factorial() not defined for negative values`
+                raise StudentInputError(
+                    ("factorial function evaluated outside its domain:"
+                     "'{0}'").format(cgi.escape(student_answer))
+                )
+            else:
+                raise general_exception
+        except ParseException:
+            raise StudentInputError(
+                u"Invalid math syntax: '{0}'".format(cgi.escape(student_answer))
+            )
+        except Exception:
+            raise general_exception
+        # End `evaluator` block -- we figured out the student's answer!
+        correct = compare_with_tolerance(
+            student_float, correct_float, self.tolerance
+        )
        if correct:
            return CorrectMap(self.answer_id, 'correct')
        else:
@@ -1691,18 +1736,26 @@ class FormulaResponse(LoncapaResponse):
    required_attributes = ['answer', 'samples']
    max_inputfields = 1
+    def __init__(self, *args, **kwargs):
+        self.correct_answer = ''
+        self.samples = ''
+        self.tolerance = '1e-5'  # Default value
+        self.case_sensitive = False
+        super(FormulaResponse, self).__init__(*args, **kwargs)
    def setup_response(self):
        xml = self.xml
        context = self.context
        self.correct_answer = contextualize_text(xml.get('answer'), context)
        self.samples = contextualize_text(xml.get('samples'), context)
-        try:
-            self.tolerance_xml = xml.xpath(
+        # Find the tolerance
-                '//*[@id=$id]//responseparam[@type="tolerance"]/@default',
+        tolerance_xml = xml.xpath(
-                id=xml.get('id'))[0]
+            '//*[@id=$id]//responseparam[@type="tolerance"]/@default',
-            self.tolerance = contextualize_text(self.tolerance_xml, context)
+            id=xml.get('id')
-        except Exception:
+        )
-            self.tolerance = '0.00001'
+        if tolerance_xml:  # If it isn't an empty list...
+            self.tolerance = contextualize_text(tolerance_xml[0], context)
        ts = xml.get('type')
        if ts is None:
@@ -1734,7 +1787,7 @@ class FormulaResponse(LoncapaResponse):
        ranges = dict(zip(variables, sranges))
        for _ in range(numsamples):
            instructor_variables = self.strip_dict(dict(self.context))
-            student_variables = dict()
+            student_variables = {}
            # ranges give numerical ranges for testing
            for var in ranges:
                # TODO: allow specified ranges (i.e. integers and complex numbers) for random variables
@@ -1746,7 +1799,7 @@ class FormulaResponse(LoncapaResponse):
            # Call `evaluator` on the instructor's answer and get a number
            instructor_result = evaluator(
-                instructor_variables, dict(),
+                instructor_variables, {},
                expected, case_sensitive=self.case_sensitive
            )
            try:
@@ -1756,7 +1809,7 @@ class FormulaResponse(LoncapaResponse):
                # Call `evaluator` on the student's answer; look for exceptions
                student_result = evaluator(
                    student_variables,
-                    dict(),
+                    {},
                    given,
                    case_sensitive=self.case_sensitive
                )
@@ -2422,7 +2475,7 @@ class ChoiceTextResponse(LoncapaResponse):
            # if all that is important is verifying numericality
            try:
                partial_correct = compare_with_tolerance(
-                    evaluator(dict(), dict(), answer_value),
+                    evaluator({}, {}, answer_value),
                    correct_ans,
                    tolerance
                )

--- a/common/lib/capa/capa/tests/test_responsetypes.py
+++ b/common/lib/capa/capa/tests/test_responsetypes.py
@@ -5,12 +5,14 @@ Tests of responsetypes
 from datetime import datetime
 import json
 import os
+import pyparsing
 import random
 import unittest
 import textwrap
 import mock
 from . import new_loncapa_problem, test_system
+import calc
 from capa.responsetypes import LoncapaProblemError, \
    StudentInputError, ResponseError
@@ -22,7 +24,7 @@ from pytz import UTC
 class ResponseTest(unittest.TestCase):
-    """ Base class for tests of capa responses."""
+    """Base class for tests of capa responses."""
    xml_factory_class = None
@@ -442,91 +444,6 @@ class FormulaResponseTest(ResponseTest):
        self.assert_grade(problem, '2*x', 'correct')
        self.assert_grade(problem, '3*x', 'incorrect')
-    def test_parallel_resistors(self):
-        """
-        Test parallel resistors
-        """
-        sample_dict = {'R1': (10, 10), 'R2': (2, 2), 'R3': (5, 5), 'R4': (1, 1)}
-        # Test problem
-        problem = self.build_problem(sample_dict=sample_dict,
-                                     num_samples=10,
-                                     tolerance=0.01,
-                                     answer="R1||R2")
-        # Expect answer to be marked correct
-        input_formula = "R1||R2"
-        self.assert_grade(problem, input_formula, "correct")
-        # Expect random number to be marked incorrect
-        input_formula = "13"
-        self.assert_grade(problem, input_formula, "incorrect")
-        # Expect incorrect answer marked incorrect
-        input_formula = "R3||R4"
-        self.assert_grade(problem, input_formula, "incorrect")
-    def test_default_variables(self):
-        """
-        Test the default variables provided in calc.py
-        which are: j (complex number), e, pi, k, c, T, q
-        """
-        # Sample x in the range [-10,10]
-        sample_dict = {'x': (-10, 10)}
-        default_variables = [('j', 2, 3), ('e', 2, 3), ('pi', 2, 3), ('c', 2, 3), ('T', 2, 3),
-                             ('k', 2 * 10 ** 23, 3 * 10 ** 23),   # note k = scipy.constants.k = 1.3806488e-23
-                             ('q', 2 * 10 ** 19, 3 * 10 ** 19)]   # note k = scipy.constants.e = 1.602176565e-19
-        for (var, cscalar, iscalar) in default_variables:
-            # The expected solution is numerically equivalent to cscalar*var
-            correct = '{0}*x*{1}'.format(cscalar, var)
-            incorrect = '{0}*x*{1}'.format(iscalar, var)
-            problem = self.build_problem(sample_dict=sample_dict,
-                                         num_samples=10,
-                                         tolerance=0.01,
-                                         answer=correct)
-            # Expect that the inputs are graded correctly
-            self.assert_grade(problem, correct, 'correct',
-                              msg="Failed on variable {0}; the given, correct answer was {1} but graded 'incorrect'".format(var, correct))
-            self.assert_grade(problem, incorrect, 'incorrect',
-                              msg="Failed on variable {0}; the given, incorrect answer was {1} but graded 'correct'".format(var, incorrect))
-    def test_default_functions(self):
-        """
-        Test the default functions provided in common/lib/capa/capa/calc.py
-        which are:
-          sin, cos, tan, sqrt, log10, log2, ln,
-          arccos, arcsin, arctan, abs,
-          fact, factorial
-        """
-        w = random.randint(3, 10)
-        sample_dict = {'x': (-10, 10),  # Sample x in the range [-10,10]
-                       'y': (1, 10),    # Sample y in the range [1,10] - logs, arccos need positive inputs
-                       'z': (-1, 1),    # Sample z in the range [1,10] - for arcsin, arctan
-                       'w': (w, w)}     # Sample w is a random, positive integer - factorial needs a positive, integer input,
-                                        # and the way formularesponse is defined, we can only specify a float range
-        default_functions = [('sin', 2, 3, 'x'), ('cos', 2, 3, 'x'), ('tan', 2, 3, 'x'), ('sqrt', 2, 3, 'y'), ('log10', 2, 3, 'y'),
-                             ('log2', 2, 3, 'y'), ('ln', 2, 3, 'y'), ('arccos', 2, 3, 'z'), ('arcsin', 2, 3, 'z'), ('arctan', 2, 3, 'x'),
-                             ('abs', 2, 3, 'x'), ('fact', 2, 3, 'w'), ('factorial', 2, 3, 'w')]
-        for (func, cscalar, iscalar, var) in default_functions:
-            print 'func is: {0}'.format(func)
-            # The expected solution is numerically equivalent to cscalar*func(var)
-            correct = '{0}*{1}({2})'.format(cscalar, func, var)
-            incorrect = '{0}*{1}({2})'.format(iscalar, func, var)
-            problem = self.build_problem(sample_dict=sample_dict,
-                                         num_samples=10,
-                                         tolerance=0.01,
-                                         answer=correct)
-            # Expect that the inputs are graded correctly
-            self.assert_grade(problem, correct, 'correct',
-                              msg="Failed on function {0}; the given, correct answer was {1} but graded 'incorrect'".format(func, correct))
-            self.assert_grade(problem, incorrect, 'incorrect',
-                              msg="Failed on function {0}; the given, incorrect answer was {1} but graded 'correct'".format(func, incorrect))
    def test_grade_infinity(self):
        """
        Test that a large input on a problem with relative tolerance isn't
@@ -885,92 +802,118 @@ class NumericalResponseTest(ResponseTest):
    from capa.tests.response_xml_factory import NumericalResponseXMLFactory
    xml_factory_class = NumericalResponseXMLFactory
+    # We blend the line between integration (using evaluator) and exclusively
+    # unit testing the NumericalResponse (mocking out the evaluator)
+    # For simple things its not worth the effort.
    def test_grade_exact(self):
-        problem = self.build_problem(question_text="What is 2 + 2?",
+        problem = self.build_problem(answer=4)
-                                     explanation="The answer is 4",
-                                     answer=4)
        correct_responses = ["4", "4.0", "4.00"]
        incorrect_responses = ["", "3.9", "4.1", "0"]
        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
    def test_grade_decimal_tolerance(self):
-        problem = self.build_problem(question_text="What is 2 + 2 approximately?",
+        problem = self.build_problem(answer=4, tolerance=0.1)
-                                     explanation="The answer is 4",
-                                     answer=4,
-                                     tolerance=0.1)
        correct_responses = ["4.0", "4.00", "4.09", "3.91"]
        incorrect_responses = ["", "4.11", "3.89", "0"]
        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
    def test_grade_percent_tolerance(self):
-        problem = self.build_problem(question_text="What is 2 + 2 approximately?",
+        problem = self.build_problem(answer=4, tolerance="10%")
-                                     explanation="The answer is 4",
-                                     answer=4,
-                                     tolerance="10%")
        correct_responses = ["4.0", "4.3", "3.7", "4.30", "3.70"]
        incorrect_responses = ["", "4.5", "3.5", "0"]
        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
-    def test_grade_infinity(self):
-        # This resolves a bug where a problem with relative tolerance would
-        # pass with any arbitrarily large student answer.
-        problem = self.build_problem(question_text="What is 2 + 2 approximately?",
-                                     explanation="The answer is 4",
-                                     answer=4,
-                                     tolerance="10%")
-        correct_responses = []
-        incorrect_responses = ["1e999", "-1e999"]
-        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
-    def test_grade_nan(self):
-        # Attempt to produce a value which causes the student's answer to be
-        # evaluated to nan. See if this is resolved correctly.
-        problem = self.build_problem(question_text="What is 2 + 2 approximately?",
-                                     explanation="The answer is 4",
-                                     answer=4,
-                                     tolerance="10%")
-        correct_responses = []
-        # Right now these evaluate to `nan`
-        # `4 + nan` should be incorrect
-        incorrect_responses = ["0*1e999", "4 + 0*1e999"]
-        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
    def test_grade_with_script(self):
        script_text = "computed_response = math.sqrt(4)"
-        problem = self.build_problem(question_text="What is sqrt(4)?",
+        problem = self.build_problem(answer="$computed_response", script=script_text)
-                                     explanation="The answer is 2",
-                                     answer="$computed_response",
-                                     script=script_text)
        correct_responses = ["2", "2.0"]
        incorrect_responses = ["", "2.01", "1.99", "0"]
        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
-    def test_grade_with_script_and_tolerance(self):
+    def test_raises_zero_division_err(self):
-        script_text = "computed_response = math.sqrt(4)"
+        """See if division by zero is handled correctly."""
-        problem = self.build_problem(question_text="What is sqrt(4)?",
+        problem = self.build_problem(answer="1")  # Answer doesn't matter
-                                     explanation="The answer is 2",
+        input_dict = {'1_2_1': '1/0'}
-                                     answer="$computed_response",
+        with self.assertRaises(StudentInputError):
-                                     tolerance="0.1",
+            problem.grade_answers(input_dict)
-                                     script=script_text)
-        correct_responses = ["2", "2.0", "2.05", "1.95"]
-        incorrect_responses = ["", "2.11", "1.89", "0"]
-        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
-    def test_exponential_answer(self):
+    def test_staff_inputs_expressions(self):
-        problem = self.build_problem(question_text="What 5 * 10?",
+        """Test that staff may enter in an expression as the answer."""
-                                     explanation="The answer is 50",
+        problem = self.build_problem(answer="1/3", tolerance=1e-3)
-                                     answer="5e+1")
+        correct_responses = ["1/3", "0.333333"]
-        correct_responses = ["50", "50.0", "5e1", "5e+1", "50e0", "500e-1"]
+        incorrect_responses = []
-        incorrect_responses = ["", "3.9", "4.1", "0", "5.01e1"]
        self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
-    def test_raises_zero_division_err(self):
+    def test_staff_inputs_expressions_legacy(self):
-        """See if division by zero is handled correctly"""
+        """Test that staff may enter in a complex number as the answer."""
-        problem = self.build_problem(question_text="What 5 * 10?",
+        problem = self.build_problem(answer="1+1j", tolerance=1e-3)
-                                     explanation="The answer is 50",
+        self.assert_grade(problem, '1+j', 'correct')
-                                     answer="5e+1")  # Answer doesn't matter
-        input_dict = {'1_2_1': '1/0'}
+    @mock.patch('capa.responsetypes.log')
-        self.assertRaises(StudentInputError, problem.grade_answers, input_dict)
+    def test_staff_inputs_bad_syntax(self, mock_log):
+        """Test that staff may enter in a complex number as the answer."""
+        staff_ans = "clearly bad syntax )[+1e"
+        problem = self.build_problem(answer=staff_ans, tolerance=1e-3)
+        msg = "There was a problem with the staff answer to this problem"
+        with self.assertRaisesRegexp(StudentInputError, msg):
+            self.assert_grade(problem, '1+j', 'correct')
+        mock_log.debug.assert_called_once_with(
+            "Content error--answer '%s' is not a valid number", staff_ans
+        )
+    def test_grade_infinity(self):
+        """
+        Check that infinity doesn't automatically get marked correct.
+        This resolves a bug where a problem with relative tolerance would
+        pass with any arbitrarily large student answer.
+        """
+        mapping = {
+            'some big input': float('inf'),
+            'some neg input': -float('inf'),
+            'weird NaN input': float('nan'),
+            '4': 4
+        }
+        def evaluator_side_effect(_, __, math_string):
+            """Look up the given response for `math_string`."""
+            return mapping[math_string]
+        problem = self.build_problem(answer=4, tolerance='10%')
+        with mock.patch('capa.responsetypes.evaluator') as mock_eval:
+            mock_eval.side_effect = evaluator_side_effect
+            self.assert_grade(problem, 'some big input', 'incorrect')
+            self.assert_grade(problem, 'some neg input', 'incorrect')
+            self.assert_grade(problem, 'weird NaN input', 'incorrect')
+    def test_err_handling(self):
+        """
+        See that `StudentInputError`s are raised when things go wrong.
+        """
+        problem = self.build_problem(answer=4)
+        errors = [  # (exception raised, message to student)
+            (calc.UndefinedVariable("x"), r"You may not use variables \(x\) in numerical problems"),
+            (ValueError("factorial() mess-up"), "factorial function evaluated outside its domain"),
+            (ValueError(), "Could not interpret '.*' as a number"),
+            (pyparsing.ParseException("oopsie"), "Invalid math syntax"),
+            (ZeroDivisionError(), "Could not interpret '.*' as a number")
+        ]
+        with mock.patch('capa.responsetypes.evaluator') as mock_eval:
+            for err, msg_regex in errors:
+                def evaluator_side_effect(_, __, math_string):
+                    """Raise an error only for the student input."""
+                    if math_string != '4':
+                        raise err
+                mock_eval.side_effect = evaluator_side_effect
+                with self.assertRaisesRegexp(StudentInputError, msg_regex):
+                    problem.grade_answers({'1_2_1': 'foobar'})
 class CustomResponseTest(ResponseTest):