Commit 554d342f by Peter Baratta

Merge pull request #2018 from edx/fix/peter/numerical_infinite_bug

Fix infinity bug for numericalresponse; add tests
parents e122efc5 d8002447
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
# File: courseware/capa/responsetypes.py # File: courseware/capa/responsetypes.py
# #
''' '''
Problem response evaluation. Handles checking of student responses, of a variety of types. Problem response evaluation. Handles checking of student responses,
of a variety of types.
Used by capa_problem.py Used by capa_problem.py
''' '''
...@@ -35,7 +36,7 @@ from datetime import datetime ...@@ -35,7 +36,7 @@ from datetime import datetime
from .util import * from .util import *
from lxml import etree from lxml import etree
from lxml.html.soupparser import fromstring as fromstring_bs # uses Beautiful Soup!!! FIXME? from lxml.html.soupparser import fromstring as fromstring_bs # uses Beautiful Soup!!! FIXME?
import xqueue_interface import capa.xqueue_interface
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -300,7 +301,7 @@ class LoncapaResponse(object): ...@@ -300,7 +301,7 @@ class LoncapaResponse(object):
# response # response
aid = self.answer_ids[-1] aid = self.answer_ids[-1]
new_cmap.set_hint_and_mode(aid, hint_text, hintmode) new_cmap.set_hint_and_mode(aid, hint_text, hintmode)
log.debug('after hint: new_cmap = %s' % new_cmap) log.debug('after hint: new_cmap = %s', new_cmap)
@abc.abstractmethod @abc.abstractmethod
def get_score(self, student_answers): def get_score(self, student_answers):
...@@ -790,6 +791,10 @@ class OptionResponse(LoncapaResponse): ...@@ -790,6 +791,10 @@ class OptionResponse(LoncapaResponse):
class NumericalResponse(LoncapaResponse): class NumericalResponse(LoncapaResponse):
'''
This response type expects a number or formulaic expression that evaluates
to a number (e.g. `4+5/2^2`), and accepts with a tolerance.
'''
response_tag = 'numericalresponse' response_tag = 'numericalresponse'
hint_tag = 'numericalhint' hint_tag = 'numericalhint'
...@@ -806,12 +811,12 @@ class NumericalResponse(LoncapaResponse): ...@@ -806,12 +811,12 @@ class NumericalResponse(LoncapaResponse):
'//*[@id=$id]//responseparam[@type="tolerance"]/@default', '//*[@id=$id]//responseparam[@type="tolerance"]/@default',
id=xml.get('id'))[0] id=xml.get('id'))[0]
self.tolerance = contextualize_text(self.tolerance_xml, context) self.tolerance = contextualize_text(self.tolerance_xml, context)
except Exception: except IndexError: # xpath found an empty list, so (...)[0] is the error
self.tolerance = '0' self.tolerance = '0'
try: try:
self.answer_id = xml.xpath('//*[@id=$id]//textline/@id', self.answer_id = xml.xpath('//*[@id=$id]//textline/@id',
id=xml.get('id'))[0] id=xml.get('id'))[0]
except Exception: except IndexError: # Same as above
self.answer_id = None self.answer_id = None
def get_score(self, student_answers): def get_score(self, student_answers):
...@@ -836,7 +841,6 @@ class NumericalResponse(LoncapaResponse): ...@@ -836,7 +841,6 @@ class NumericalResponse(LoncapaResponse):
except: except:
# Use the traceback-preserving version of re-raising with a # Use the traceback-preserving version of re-raising with a
# different type # different type
import sys
type, value, traceback = sys.exc_info() type, value, traceback = sys.exc_info()
raise StudentInputError, ("Could not interpret '%s' as a number" % raise StudentInputError, ("Could not interpret '%s' as a number" %
...@@ -1869,8 +1873,6 @@ class FormulaResponse(LoncapaResponse): ...@@ -1869,8 +1873,6 @@ class FormulaResponse(LoncapaResponse):
log.debug('formularesponse: error %s in formula' % err) log.debug('formularesponse: error %s in formula' % err)
raise StudentInputError("Invalid input: Could not parse '%s' as a formula" % raise StudentInputError("Invalid input: Could not parse '%s' as a formula" %
cgi.escape(given)) cgi.escape(given))
if numpy.isnan(student_result) or numpy.isinf(student_result):
return "incorrect"
if not compare_with_tolerance(student_result, instructor_result, self.tolerance): if not compare_with_tolerance(student_result, instructor_result, self.tolerance):
return "incorrect" return "incorrect"
return "correct" return "correct"
......
...@@ -438,6 +438,43 @@ class FormulaResponseTest(ResponseTest): ...@@ -438,6 +438,43 @@ class FormulaResponseTest(ResponseTest):
self.assert_grade(problem, incorrect, 'incorrect', self.assert_grade(problem, incorrect, 'incorrect',
msg="Failed on function {0}; the given, incorrect answer was {1} but graded 'correct'".format(func, incorrect)) msg="Failed on function {0}; the given, incorrect answer was {1} but graded 'correct'".format(func, incorrect))
def test_grade_infinity(self):
# This resolves a bug where a problem with relative tolerance would
# pass with any arbitrarily large student answer.
sample_dict = {'x': (1, 2)}
# Test problem
problem = self.build_problem(sample_dict=sample_dict,
num_samples=10,
tolerance="1%",
answer="x")
# Expect such a large answer to be marked incorrect
input_formula = "x*1e999"
self.assert_grade(problem, input_formula, "incorrect")
# Expect such a large negative answer to be marked incorrect
input_formula = "-x*1e999"
self.assert_grade(problem, input_formula, "incorrect")
def test_grade_nan(self):
# Attempt to produce a value which causes the student's answer to be
# evaluated to nan. See if this is resolved correctly.
sample_dict = {'x': (1, 2)}
# Test problem
problem = self.build_problem(sample_dict=sample_dict,
num_samples=10,
tolerance="1%",
answer="x")
# Expect an incorrect answer (+ nan) to be marked incorrect
# Right now this evaluates to 'nan' for a given x (Python implementation-dependent)
input_formula = "10*x + 0*1e999"
self.assert_grade(problem, input_formula, "incorrect")
# Expect an correct answer (+ nan) to be marked incorrect
input_formula = "x + 0*1e999"
self.assert_grade(problem, input_formula, "incorrect")
class StringResponseTest(ResponseTest): class StringResponseTest(ResponseTest):
from response_xml_factory import StringResponseXMLFactory from response_xml_factory import StringResponseXMLFactory
...@@ -714,6 +751,30 @@ class NumericalResponseTest(ResponseTest): ...@@ -714,6 +751,30 @@ class NumericalResponseTest(ResponseTest):
incorrect_responses = ["", "4.5", "3.5", "0"] incorrect_responses = ["", "4.5", "3.5", "0"]
self.assert_multiple_grade(problem, correct_responses, incorrect_responses) self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
def test_grade_infinity(self):
# This resolves a bug where a problem with relative tolerance would
# pass with any arbitrarily large student answer.
problem = self.build_problem(question_text="What is 2 + 2 approximately?",
explanation="The answer is 4",
answer=4,
tolerance="10%")
correct_responses = []
incorrect_responses = ["1e999", "-1e999"]
self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
def test_grade_nan(self):
# Attempt to produce a value which causes the student's answer to be
# evaluated to nan. See if this is resolved correctly.
problem = self.build_problem(question_text="What is 2 + 2 approximately?",
explanation="The answer is 4",
answer=4,
tolerance="10%")
correct_responses = []
# Right now these evaluate to `nan`
# `4 + nan` should be incorrect
incorrect_responses = ["0*1e999", "4 + 0*1e999"]
self.assert_multiple_grade(problem, correct_responses, incorrect_responses)
def test_grade_with_script(self): def test_grade_with_script(self):
script_text = "computed_response = math.sqrt(4)" script_text = "computed_response = math.sqrt(4)"
problem = self.build_problem(question_text="What is sqrt(4)?", problem = self.build_problem(question_text="What is sqrt(4)?",
......
from .calc import evaluator, UndefinedVariable from .calc import evaluator, UndefinedVariable
from cmath import isinf
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# #
...@@ -20,7 +21,14 @@ def compare_with_tolerance(v1, v2, tol): ...@@ -20,7 +21,14 @@ def compare_with_tolerance(v1, v2, tol):
tolerance = tolerance_rel * max(abs(v1), abs(v2)) tolerance = tolerance_rel * max(abs(v1), abs(v2))
else: else:
tolerance = evaluator(dict(), dict(), tol) tolerance = evaluator(dict(), dict(), tol)
return abs(v1 - v2) <= tolerance
if isinf(v1) or isinf(v2):
# If an input is infinite, we can end up with `abs(v1-v2)` and
# `tolerance` both equal to infinity. Then, below we would have
# `inf <= inf` which is a fail. Instead, compare directly.
return v1 == v2
else:
return abs(v1 - v2) <= tolerance
def contextualize_text(text, context): # private def contextualize_text(text, context): # private
...@@ -51,7 +59,8 @@ def convert_files_to_filenames(answers): ...@@ -51,7 +59,8 @@ def convert_files_to_filenames(answers):
new_answers = dict() new_answers = dict()
for answer_id in answers.keys(): for answer_id in answers.keys():
answer = answers[answer_id] answer = answers[answer_id]
if is_list_of_files(answer): # Files are stored as a list, even if one file # Files are stored as a list, even if one file
if is_list_of_files(answer):
new_answers[answer_id] = [f.name for f in answer] new_answers[answer_id] = [f.name for f in answer]
else: else:
new_answers[answer_id] = answers[answer_id] new_answers[answer_id] = answers[answer_id]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment