Commit 9f407d25 by Will Daly

Merge pull request #437 from edx/will/ai-invalid-scores

Will/ai invalid scores
parents 52d38881 69aaec0f
......@@ -31,8 +31,7 @@ def get_grading_task_params(grading_workflow_uuid):
dict with keys:
* essay_text (unicode): The text of the essay submission.
* classifier_set (dict): Maps criterion names to serialized classifiers.
* course_id (unicode): The course ID that the training task is associated with.
* item_id (unicode): Identifies the item that the AI will be training to grade.
* valid_scores (dict): Maps criterion names to a list of valid scores for that criterion.
* algorithm_id (unicode): ID of the algorithm used to perform training.
Raises:
......@@ -67,12 +66,19 @@ def get_grading_task_params(grading_workflow_uuid):
raise AIGradingInternalError(msg)
try:
classifiers = list(classifier_set.classifiers.select_related().all())
return {
'essay_text': workflow.essay_text,
'classifier_set': classifier_set.classifiers_dict,
'course_id': workflow.course_id,
'item_id': workflow.item_id,
'classifier_set': {
classifier.criterion.name: classifier.download_classifier_data()
for classifier in classifiers
},
'algorithm_id': workflow.algorithm_id,
'valid_scores': {
classifier.criterion.name: classifier.valid_scores
for classifier in classifiers
}
}
except (DatabaseError, ClassifierSerializeError, IncompleteClassifierSet, ValueError) as ex:
msg = (
......
......@@ -7,6 +7,7 @@ import logging
import itertools
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.cache import cache
from django.db import models, transaction, DatabaseError
from django.utils.timezone import now
from django.core.exceptions import ObjectDoesNotExist
......@@ -266,6 +267,23 @@ class AIClassifier(models.Model):
"""
return json.loads(self.classifier_data.read()) # pylint:disable=E1101
@property
def valid_scores(self):
"""
Return a list of valid scores for the rubric criterion associated
with this classifier.
Returns:
list of integer scores, in ascending order.
"""
cache_key = u"openassessment.assessment.ai.classifier.{pk}.valid_scores".format(pk=self.pk)
valid_scores = cache.get(cache_key)
if valid_scores is None:
valid_scores = sorted([option.points for option in self.criterion.options.all()])
cache.set(cache_key, valid_scores)
return valid_scores
class AIWorkflow(models.Model):
"""
......
......@@ -243,9 +243,7 @@ class Rubric(models.Model):
# Find the IDs for the options matching the specified point value
option_id_set = set()
for criterion_name, option_points in criterion_points.iteritems():
if (criterion_name in rubric_points_dict and
option_points in rubric_points_dict[criterion_name]
):
if (criterion_name in rubric_points_dict and option_points in rubric_points_dict[criterion_name]):
option_id = rubric_points_dict[criterion_name][option_points]
option_id_set.add(option_id)
else:
......
......@@ -226,11 +226,22 @@ class AIWorkerGradingTest(CacheResetTest):
'essay_text': ANSWER,
'classifier_set': CLASSIFIERS,
'algorithm_id': ALGORITHM_ID,
'course_id': STUDENT_ITEM.get('course_id'),
'item_id': STUDENT_ITEM.get('item_id')
'valid_scores': {
u"vøȼȺƀᵾłȺɍɏ": [0, 1, 2],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
}
self.assertItemsEqual(params, expected_params)
def test_get_grading_task_params_num_queries(self):
with self.assertNumQueries(5):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
# The second time through we should be caching the queries
# to determine the valid scores for a classifier
with self.assertNumQueries(3):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
def test_get_grading_task_params_no_workflow(self):
with self.assertRaises(AIGradingRequestError):
ai_worker_api.get_grading_task_params("invalid_uuid")
......
......@@ -3,6 +3,7 @@
Tests for AI worker tasks.
"""
from contextlib import contextmanager
import itertools
import mock
from django.test.utils import override_settings
from submissions import api as sub_api
......@@ -45,15 +46,30 @@ class ErrorStubAIAlgorithm(AIAlgorithm):
raise ScoreError("Test error!")
class InvalidScoreAlgorithm(AIAlgorithm):
"""
Stub implementation that returns a score that isn't in the rubric.
"""
SCORE_CYCLE = itertools.cycle([-100, 0.7, 1.2, 100])
def train_classifier(self, examples):
return {}
def score(self, text, classifier):
return self.SCORE_CYCLE.next()
ALGORITHM_ID = u"test-stub"
ERROR_STUB_ALGORITHM_ID = u"error-stub"
UNDEFINED_CLASS_ALGORITHM_ID = u"undefined_class"
UNDEFINED_MODULE_ALGORITHM_ID = u"undefined_module"
INVALID_SCORE_ALGORITHM_ID = u"invalid_score"
AI_ALGORITHMS = {
ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__),
ERROR_STUB_ALGORITHM_ID: '{module}.ErrorStubAIAlgorithm'.format(module=__name__),
UNDEFINED_CLASS_ALGORITHM_ID: '{module}.NotDefinedAIAlgorithm'.format(module=__name__),
UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm'
UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm',
INVALID_SCORE_ALGORITHM_ID: '{module}.InvalidScoreAlgorithm'.format(module=__name__),
}
......@@ -109,9 +125,7 @@ class AITrainingTaskTest(CeleryTaskTest):
Create a training workflow in the database.
"""
examples = deserialize_training_examples(EXAMPLES, RUBRIC)
workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID)
self.workflow_uuid = workflow.uuid
def test_unknown_algorithm(self):
......@@ -252,6 +266,32 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow.classifier_set = classifier_set
workflow.save()
@mock.patch('openassessment.assessment.api.ai_worker.create_assessment')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_algorithm_gives_invalid_score(self, mock_create_assessment):
# If an algorithm provides a score that isn't in the rubric,
# we should choose the closest valid score.
self._set_algorithm_id(INVALID_SCORE_ALGORITHM_ID)
# The first score given by the algorithm should be below the minimum valid score
# The second score will be between two valid scores (0 and 1), rounding up
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 0,
u"ﻭɼค๓๓คɼ": 1
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
# The third score will be between two valid scores (1 and 2), rounding down
# The final score will be greater than the maximum score
self._reset_workflow()
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 1,
u"ﻭɼค๓๓คɼ": 2
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_retrieve_params_error(self, mock_call):
......@@ -277,6 +317,39 @@ class AIGradingTaskTest(CeleryTaskTest):
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_missing_criterion_for_valid_scores(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_valid_scores_empty_list(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {
u"vøȼȺƀᵾłȺɍɏ": [],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
def _set_algorithm_id(self, algorithm_id):
"""
Override the default algorithm ID for the grading workflow.
......@@ -291,3 +364,12 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.algorithm_id = algorithm_id
workflow.save()
def _reset_workflow(self):
"""
Reset the workflow so we can re-use it.
"""
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.completed_at = None
workflow.assessment = None
workflow.save()
......@@ -10,7 +10,7 @@ from celery.utils.log import get_task_logger
from dogapi import dog_stats_api
from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.errors import (
AIError, AIGradingInternalError, AIGradingRequestError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
)
from .algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.models.ai import AIClassifierSet, AIGradingWorkflow
......@@ -54,6 +54,7 @@ def grade_essay(workflow_uuid):
essay_text = params['essay_text']
classifier_set = params['classifier_set']
algorithm_id = params['algorithm_id']
valid_scores = params['valid_scores']
except (AIError, KeyError):
msg = (
u"An error occurred while retrieving the AI grading task "
......@@ -62,6 +63,23 @@ def grade_essay(workflow_uuid):
logger.exception(msg)
raise grade_essay.retry()
# Validate that the we have valid scores for each criterion
for criterion_name in classifier_set.keys():
msg = None
if criterion_name not in valid_scores:
msg = (
u"Could not find {criterion} in the list of valid scores "
u"for grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
elif len(valid_scores[criterion_name]) == 0:
msg = (
u"Valid scores for {criterion} is empty for "
u"grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
if msg:
logger.exception(msg)
raise AIGradingInternalError(msg)
# Retrieve the AI algorithm
try:
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
......@@ -76,7 +94,10 @@ def grade_essay(workflow_uuid):
# Use the algorithm to evaluate the essay for each criterion
try:
scores_by_criterion = {
criterion_name: algorithm.score(essay_text, classifier)
criterion_name: _closest_valid_score(
algorithm.score(essay_text, classifier),
valid_scores[criterion_name]
)
for criterion_name, classifier in classifier_set.iteritems()
}
except AIAlgorithmError:
......@@ -222,6 +243,35 @@ def reschedule_grading_tasks(course_id, item_id):
raise reschedule_grading_tasks.retry()
def _closest_valid_score(score, valid_scores):
"""
Return the closest valid score for a given score.
This is necessary, since rubric scores may be non-contiguous.
Args:
score (int or float): The score assigned by the algorithm.
valid_scores (list of int): Valid scores for this criterion,
assumed to be sorted in ascending order.
Returns:
int
"""
# If the score is already valid, return it
if score in valid_scores:
return score
# Otherwise, find the closest score in the list.
closest = valid_scores[0]
delta = abs(score - closest)
for valid in valid_scores[1:]:
new_delta = abs(score - valid)
if new_delta < delta:
closest = valid
delta = new_delta
return closest
def _log_start_reschedule_grading(course_id=None, item_id=None):
"""
Sends data about the rescheduling_grading task to datadog
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment