Commit 1d536256 by Vik Paruchuri

Add in ability to mock a server, a lot more testing code for open ended

parent 5c55595e
......@@ -20,7 +20,7 @@ from xmodule.x_module import ModuleSystem
from mock import Mock
open_ended_grading_interface = {
'url': 'blah',
'url': 'blah/',
'username': 'incorrect_user',
'password': 'incorrect_pass',
'staff_grading' : 'staff_grading',
......
from threading import Thread
import socket
import threading
import SimpleHTTPServer
import SocketServer
class ThreadedRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
def handle(self):
data = self.request.recv(1024)
cur_thread = threading.current_thread()
response = "{}: {}".format(cur_thread.name, data)
self.request.sendall(response)
return
class ThreadedTCPServer(SocketServer.ThreadingMixIn, SocketServer.TCPServer):
pass
def create_server(host,port):
"""
Mock a server to be used for the open ended grading tests
@param host: the hostname ie "localhost" or "127.0.0.1"
@param port: the integer of the port to open a connection on
@return: The created server object
"""
server = ThreadedTCPServer((host,port), ThreadedRequestHandler)
# Start a thread with the server -- that thread will then start one
# more thread for each request
server_thread = threading.Thread(target=server.serve_forever)
# Exit the server thread when the main thread terminates
server_thread.daemon = True
server_thread.start()
return server
\ No newline at end of file
......@@ -20,6 +20,8 @@ import capa.xqueue_interface as xqueue_interface
from datetime import datetime
import logging
from mock_server import create_server
log = logging.getLogger(__name__)
from . import test_system
......@@ -29,6 +31,18 @@ COURSE = 'open_ended' # name of directory with course data
import test_util_open_ended
class MockQueryDict(dict):
"""
Mock a query set so that it can be used with default authorization
"""
def getlist(self, key, default=None):
try:
return super(MockQueryDict, self).__getitem__(key)
except KeyError:
if default is None:
return []
return default
class DummySystem(ImportSystem):
@patch('xmodule.modulestore.xml.OSFS', lambda dir: MemoryFS())
......@@ -471,9 +485,6 @@ class CombinedOpenEndedModuleTest(unittest.TestCase):
changed = combinedoe.update_task_states()
self.assertFalse(changed)
def test_ajax_actions(self):
self.combinedoe_container.handle_ajax('save_answer', {'student_answer' : "This is my answer"})
def test_get_score_realistic(self):
instance_state = r"""{"ready_to_reset": false, "skip_spelling_checks": true, "current_task_number": 1, "weight": 5.0, "graceperiod": "1 day 12 hours 59 minutes 59 seconds", "is_graded": "True", "task_states": ["{\"child_created\": false, \"child_attempts\": 4, \"version\": 1, \"child_history\": [{\"answer\": \"The students\\u2019 data are recorded in the table below.\\r\\n\\r\\nStarting Mass (g)\\tEnding Mass (g)\\tDifference in Mass (g)\\r\\nMarble\\t 9.8\\t 9.4\\t\\u20130.4\\r\\nLimestone\\t10.4\\t 9.1\\t\\u20131.3\\r\\nWood\\t11.2\\t11.2\\t 0.0\\r\\nPlastic\\t 7.2\\t 7.1\\t\\u20130.1\\r\\nAfter reading the group\\u2019s procedure, describe what additional information you would need in order to replicate the expe\", \"post_assessment\": \"{\\\"submission_id\\\": 3097, \\\"score\\\": 0, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: More grammar errors than average.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"the students data are recorded in the <bg>table below . starting mass</bg> g ending mass g difference in mass g marble . . . limestone . . . wood . . . plastic . . . after reading the groups <bg>procedure , describe what additional</bg> information you would need in order to replicate the <bs>expe</bs>\\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3233, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>0</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 0}, {\"answer\": \"After 24 hours, remove the samples from the containers and rinse each sample with distilled water.\\r\\nAllow the samples to sit and dry for 30 minutes.\\r\\nDetermine the mass of each sample.\\r\\nThe students\\u2019 data are recorded in the table below.\\r\\n\\r\\nStarting Mass (g)\\tEnding Mass (g)\\tDifference in Mass (g)\\r\\nMarble\\t 9.8\\t 9.4\\t\\u20130.4\\r\\nLimestone\\t10.4\\t 9.1\\t\\u20131.3\\r\\nWood\\t11.2\\t11.2\\t 0.0\\r\\nPlastic\\t 7.2\\t 7.1\\t\\u20130.1\\r\\nAfter reading the\", \"post_assessment\": \"[3]\", \"score\": 3}, {\"answer\": \"To replicate the experiment, the procedure would require more detail. One piece of information that is omitted is the amount of vinegar used in the experiment. It is also important to know what temperature the experiment was kept at during the 24 hours. Finally, the procedure needs to include details about the experiment, for example if the whole sample must be submerged.\", \"post_assessment\": \"[3]\", \"score\": 3}, {\"answer\": \"e the mass of four different samples.\\r\\nPour vinegar in each of four separate, but identical, containers.\\r\\nPlace a sample of one material into one container and label. Repeat with remaining samples, placing a single sample into a single container.\\r\\nAfter 24 hours, remove the samples from the containers and rinse each sample with distilled water.\\r\\nAllow the samples to sit and dry for 30 minutes.\\r\\nDetermine the mass of each sample.\\r\\nThe students\\u2019 data are recorded in the table below.\\r\\n\", \"post_assessment\": \"[3]\", \"score\": 3}, {\"answer\": \"\", \"post_assessment\": \"[3]\", \"score\": 3}], \"max_score\": 3, \"child_state\": \"done\"}", "{\"child_created\": false, \"child_attempts\": 0, \"version\": 1, \"child_history\": [{\"answer\": \"The students\\u2019 data are recorded in the table below.\\r\\n\\r\\nStarting Mass (g)\\tEnding Mass (g)\\tDifference in Mass (g)\\r\\nMarble\\t 9.8\\t 9.4\\t\\u20130.4\\r\\nLimestone\\t10.4\\t 9.1\\t\\u20131.3\\r\\nWood\\t11.2\\t11.2\\t 0.0\\r\\nPlastic\\t 7.2\\t 7.1\\t\\u20130.1\\r\\nAfter reading the group\\u2019s procedure, describe what additional information you would need in order to replicate the expe\", \"post_assessment\": \"{\\\"submission_id\\\": 3097, \\\"score\\\": 0, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: More grammar errors than average.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"the students data are recorded in the <bg>table below . starting mass</bg> g ending mass g difference in mass g marble . . . limestone . . . wood . . . plastic . . . after reading the groups <bg>procedure , describe what additional</bg> information you would need in order to replicate the <bs>expe</bs>\\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3233, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>0</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 0}, {\"answer\": \"After 24 hours, remove the samples from the containers and rinse each sample with distilled water.\\r\\nAllow the samples to sit and dry for 30 minutes.\\r\\nDetermine the mass of each sample.\\r\\nThe students\\u2019 data are recorded in the table below.\\r\\n\\r\\nStarting Mass (g)\\tEnding Mass (g)\\tDifference in Mass (g)\\r\\nMarble\\t 9.8\\t 9.4\\t\\u20130.4\\r\\nLimestone\\t10.4\\t 9.1\\t\\u20131.3\\r\\nWood\\t11.2\\t11.2\\t 0.0\\r\\nPlastic\\t 7.2\\t 7.1\\t\\u20130.1\\r\\nAfter reading the\", \"post_assessment\": \"{\\\"submission_id\\\": 3098, \\\"score\\\": 0, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: Ok.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"after hours , remove the samples from the containers and rinse each sample with distilled water . allow the samples to sit and dry for minutes . determine the mass of each sample . the students data are recorded in the <bg>table below . starting mass</bg> g ending mass g difference in mass g marble . . . limestone . . . wood . . . plastic . . . after reading the\\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3235, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>0</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 0}, {\"answer\": \"To replicate the experiment, the procedure would require more detail. One piece of information that is omitted is the amount of vinegar used in the experiment. It is also important to know what temperature the experiment was kept at during the 24 hours. Finally, the procedure needs to include details about the experiment, for example if the whole sample must be submerged.\", \"post_assessment\": \"{\\\"submission_id\\\": 3099, \\\"score\\\": 3, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: Ok.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"to replicate the experiment , the procedure would require <bg>more detail . one</bg> piece of information <bg>that is omitted is the</bg> amount of vinegar used in the experiment . it is also important to know what temperature the experiment was kept at during the hours . finally , the procedure needs to include details about the experiment , for example if the whole sample must be submerged .\\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3237, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>3</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 3}, {\"answer\": \"e the mass of four different samples.\\r\\nPour vinegar in each of four separate, but identical, containers.\\r\\nPlace a sample of one material into one container and label. Repeat with remaining samples, placing a single sample into a single container.\\r\\nAfter 24 hours, remove the samples from the containers and rinse each sample with distilled water.\\r\\nAllow the samples to sit and dry for 30 minutes.\\r\\nDetermine the mass of each sample.\\r\\nThe students\\u2019 data are recorded in the table below.\\r\\n\", \"post_assessment\": \"{\\\"submission_id\\\": 3100, \\\"score\\\": 0, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: Ok.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"e the mass of four different samples . pour vinegar in <bg>each of four separate</bg> , but identical , containers . place a sample of one material into one container and label . repeat with remaining samples , placing a single sample into a single container . after hours , remove the samples from the containers and rinse each sample with distilled water . allow the samples to sit and dry for minutes . determine the mass of each sample . the students data are recorded in the table below . \\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3239, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>0</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 0}, {\"answer\": \"\", \"post_assessment\": \"{\\\"submission_id\\\": 3101, \\\"score\\\": 0, \\\"feedback\\\": \\\"{\\\\\\\"spelling\\\\\\\": \\\\\\\"Spelling: Ok.\\\\\\\", \\\\\\\"grammar\\\\\\\": \\\\\\\"Grammar: Ok.\\\\\\\", \\\\\\\"markup-text\\\\\\\": \\\\\\\"invalid essay .\\\\\\\"}\\\", \\\"success\\\": true, \\\"grader_id\\\": 3241, \\\"grader_type\\\": \\\"ML\\\", \\\"rubric_scores_complete\\\": true, \\\"rubric_xml\\\": \\\"<rubric><category><description>Response Quality</description><score>0</score><option points='0'>The response is not a satisfactory answer to the question. It either fails to address the question or does so in a limited way, with no evidence of higher-order thinking.</option><option points='1'>The response is a marginal answer to the question. It may contain some elements of a proficient response, but it is inaccurate or incomplete.</option><option points='2'>The response is a proficient answer to the question. It is generally correct, although it may contain minor inaccuracies. There is limited evidence of higher-order thinking.</option><option points='3'>The response is correct, complete, and contains evidence of higher-order thinking.</option></category></rubric>\\\"}\", \"score\": 0}], \"max_score\": 3, \"child_state\": \"done\"}"], "attempts": "10000", "student_attempts": 0, "due": null, "state": "done", "accept_file_upload": false, "display_name": "Science Question -- Machine Assessed"}"""
instance_state = json.loads(instance_state)
......@@ -505,6 +516,11 @@ class CombinedOpenEndedModuleTest(unittest.TestCase):
self.assertEqual(score_dict['total'], 15.0)
class OpenEndedModuleXmlTest(unittest.TestCase):
problem_location = Location(["i4x", "edX", "oe_test", "combinedopenended", "SampleQuestion"])
answer = "blah blah"
assessment = [0,1]
hint = "blah"
test_server = create_server("127.0.0.1", 3034)
def setUp(self):
self.test_system = test_system()
......@@ -523,10 +539,59 @@ class OpenEndedModuleXmlTest(unittest.TestCase):
self.assertEquals(len(courses), 1)
return courses[0]
def test_open_ended_load(self):
def get_module_from_location(self, location):
course = self.get_course('open_ended')
log.info(course.id)
if not isinstance(location, Location):
location = Location(location)
descriptor = self.modulestore.get_instance(course.id, location, depth=None)
return descriptor.xmodule(self.test_system)
def test_open_ended_load_and_save(self):
module = self.get_module_from_location(self.problem_location)
module.handle_ajax("save_answer", {"student_answer" : self.answer})
task_one_json = json.loads(module.task_states[0])
self.assertEqual(task_one_json['child_history'][0]['answer'], self.answer)
def test_open_ended_flow_reset(self):
assessment = [0,1]
module = self.get_module_from_location(self.problem_location)
#Simulate a student saving an answer
module.handle_ajax("save_answer", {"student_answer" : self.answer})
status = module.handle_ajax("get_status", {})
#Mock a student submitting an assessment
assessment_dict = MockQueryDict()
assessment_dict.update({'assessment' : sum(assessment), 'score_list[]' : assessment})
module.handle_ajax("save_assessment", assessment_dict)
task_one_json = json.loads(module.task_states[0])
self.assertEqual(json.loads(task_one_json['child_history'][0]['post_assessment']), assessment)
module.handle_ajax("get_status", {})
#Move to the next step in the problem
module.handle_ajax("next_problem", {})
module.get_html()
module.handle_ajax("get_combined_rubric", {})
module.handle_ajax("reset", {})
def test_open_ended_flow_correct(self):
assessment = [1,1]
module = self.get_module_from_location(self.problem_location)
#Simulate a student saving an answer
module.handle_ajax("save_answer", {"student_answer" : self.answer})
status = module.handle_ajax("get_status", {})
#Mock a student submitting an assessment
assessment_dict = MockQueryDict()
assessment_dict.update({'assessment' : sum(assessment), 'score_list[]' : assessment})
module.handle_ajax("save_assessment", assessment_dict)
task_one_json = json.loads(module.task_states[0])
self.assertEqual(json.loads(task_one_json['child_history'][0]['post_assessment']), assessment)
module.handle_ajax("get_status", {})
#Move to the next step in the problem
module.handle_ajax("next_problem", {})
module.get_html()
module.handle_ajax("get_combined_rubric", {})
This is a very very simple course, useful for debugging self assessment code.
This is a very very simple course, useful for debugging open ended grading code.
<combinedopenended attempts="10000" display_name = "Humanities Question -- Machine Assessed">
<rubric>
<rubric>
<category>
<description>Writing Applications</description>
<option> The essay loses focus, has little information or supporting details, and the organization makes it difficult to follow.</option>
<option> The essay presents a mostly unified theme, includes sufficient information to convey the theme, and is generally organized well.</option>
</category>
<category>
<description> Language Conventions </description>
<option> The essay demonstrates a reasonable command of proper spelling and grammar. </option>
<option> The essay demonstrates superior command of proper spelling and grammar.</option>
</category>
</rubric>
</rubric>
<prompt>
<h4>Censorship in the Libraries</h4>
<p>"All of us can think of a book that we hope none of our children or any other children have taken off the shelf. But if I have the right to remove that book from the shelf -- that work I abhor -- then you also have exactly the same right and so does everyone else. And then we have no books left on the shelf for any of us." --Katherine Paterson, Author</p>
<p>Write a persuasive essay to a newspaper reflecting your vies on censorship in libraries. Do you believe that certain materials, such as books, music, movies, magazines, etc., should be removed from the shelves if they are found offensive? Support your position with convincing arguments from your own experience, observations, and/or reading.</p>
</prompt>
<task>
<selfassessment/>
</task>
<task>
<openended min_score_to_attempt="2" max_score_to_attempt="3">
<openendedparam>
<initial_display>Enter essay here.</initial_display>
<answer_display>This is the answer.</answer_display>
<grader_payload>{"grader_settings" : "ml_grading.conf", "problem_id" : "6.002x/Welcome/OETest"}</grader_payload>
</openendedparam>
</openended>
</task>
</combinedopenended>
\ No newline at end of file
<course org="edX" course="sa_test" url_name="2012_Fall"/>
<course org="edX" course="oe_test" url_name="2012_Fall"/>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment