Commit 69aaec0f by Will Daly

Remove unused grading task params

Retrieve valid scores for a classifier
Assign closest score when AI algorithm returns a score not in the rubric.
parent 77db4e16
...@@ -31,8 +31,7 @@ def get_grading_task_params(grading_workflow_uuid): ...@@ -31,8 +31,7 @@ def get_grading_task_params(grading_workflow_uuid):
dict with keys: dict with keys:
* essay_text (unicode): The text of the essay submission. * essay_text (unicode): The text of the essay submission.
* classifier_set (dict): Maps criterion names to serialized classifiers. * classifier_set (dict): Maps criterion names to serialized classifiers.
* course_id (unicode): The course ID that the training task is associated with. * valid_scores (dict): Maps criterion names to a list of valid scores for that criterion.
* item_id (unicode): Identifies the item that the AI will be training to grade.
* algorithm_id (unicode): ID of the algorithm used to perform training. * algorithm_id (unicode): ID of the algorithm used to perform training.
Raises: Raises:
...@@ -67,12 +66,19 @@ def get_grading_task_params(grading_workflow_uuid): ...@@ -67,12 +66,19 @@ def get_grading_task_params(grading_workflow_uuid):
raise AIGradingInternalError(msg) raise AIGradingInternalError(msg)
try: try:
classifiers = list(classifier_set.classifiers.select_related().all())
return { return {
'essay_text': workflow.essay_text, 'essay_text': workflow.essay_text,
'classifier_set': classifier_set.classifiers_dict, 'classifier_set': {
'course_id': workflow.course_id, classifier.criterion.name: classifier.download_classifier_data()
'item_id': workflow.item_id, for classifier in classifiers
},
'algorithm_id': workflow.algorithm_id, 'algorithm_id': workflow.algorithm_id,
'valid_scores': {
classifier.criterion.name: classifier.valid_scores
for classifier in classifiers
}
} }
except (DatabaseError, ClassifierSerializeError, IncompleteClassifierSet, ValueError) as ex: except (DatabaseError, ClassifierSerializeError, IncompleteClassifierSet, ValueError) as ex:
msg = ( msg = (
......
...@@ -7,6 +7,7 @@ import logging ...@@ -7,6 +7,7 @@ import logging
import itertools import itertools
from django.conf import settings from django.conf import settings
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.core.cache import cache
from django.db import models, transaction, DatabaseError from django.db import models, transaction, DatabaseError
from django.utils.timezone import now from django.utils.timezone import now
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
...@@ -266,6 +267,23 @@ class AIClassifier(models.Model): ...@@ -266,6 +267,23 @@ class AIClassifier(models.Model):
""" """
return json.loads(self.classifier_data.read()) # pylint:disable=E1101 return json.loads(self.classifier_data.read()) # pylint:disable=E1101
@property
def valid_scores(self):
"""
Return a list of valid scores for the rubric criterion associated
with this classifier.
Returns:
list of integer scores, in ascending order.
"""
cache_key = u"openassessment.assessment.ai.classifier.{pk}.valid_scores".format(pk=self.pk)
valid_scores = cache.get(cache_key)
if valid_scores is None:
valid_scores = sorted([option.points for option in self.criterion.options.all()])
cache.set(cache_key, valid_scores)
return valid_scores
class AIWorkflow(models.Model): class AIWorkflow(models.Model):
""" """
......
...@@ -243,9 +243,7 @@ class Rubric(models.Model): ...@@ -243,9 +243,7 @@ class Rubric(models.Model):
# Find the IDs for the options matching the specified point value # Find the IDs for the options matching the specified point value
option_id_set = set() option_id_set = set()
for criterion_name, option_points in criterion_points.iteritems(): for criterion_name, option_points in criterion_points.iteritems():
if (criterion_name in rubric_points_dict and if (criterion_name in rubric_points_dict and option_points in rubric_points_dict[criterion_name]):
option_points in rubric_points_dict[criterion_name]
):
option_id = rubric_points_dict[criterion_name][option_points] option_id = rubric_points_dict[criterion_name][option_points]
option_id_set.add(option_id) option_id_set.add(option_id)
else: else:
......
...@@ -226,11 +226,22 @@ class AIWorkerGradingTest(CacheResetTest): ...@@ -226,11 +226,22 @@ class AIWorkerGradingTest(CacheResetTest):
'essay_text': ANSWER, 'essay_text': ANSWER,
'classifier_set': CLASSIFIERS, 'classifier_set': CLASSIFIERS,
'algorithm_id': ALGORITHM_ID, 'algorithm_id': ALGORITHM_ID,
'course_id': STUDENT_ITEM.get('course_id'), 'valid_scores': {
'item_id': STUDENT_ITEM.get('item_id') u"vøȼȺƀᵾłȺɍɏ": [0, 1, 2],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
} }
self.assertItemsEqual(params, expected_params) self.assertItemsEqual(params, expected_params)
def test_get_grading_task_params_num_queries(self):
with self.assertNumQueries(5):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
# The second time through we should be caching the queries
# to determine the valid scores for a classifier
with self.assertNumQueries(3):
ai_worker_api.get_grading_task_params(self.workflow_uuid)
def test_get_grading_task_params_no_workflow(self): def test_get_grading_task_params_no_workflow(self):
with self.assertRaises(AIGradingRequestError): with self.assertRaises(AIGradingRequestError):
ai_worker_api.get_grading_task_params("invalid_uuid") ai_worker_api.get_grading_task_params("invalid_uuid")
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
Tests for AI worker tasks. Tests for AI worker tasks.
""" """
from contextlib import contextmanager from contextlib import contextmanager
import itertools
import mock import mock
from django.test.utils import override_settings from django.test.utils import override_settings
from submissions import api as sub_api from submissions import api as sub_api
...@@ -45,15 +46,30 @@ class ErrorStubAIAlgorithm(AIAlgorithm): ...@@ -45,15 +46,30 @@ class ErrorStubAIAlgorithm(AIAlgorithm):
raise ScoreError("Test error!") raise ScoreError("Test error!")
class InvalidScoreAlgorithm(AIAlgorithm):
"""
Stub implementation that returns a score that isn't in the rubric.
"""
SCORE_CYCLE = itertools.cycle([-100, 0.7, 1.2, 100])
def train_classifier(self, examples):
return {}
def score(self, text, classifier):
return self.SCORE_CYCLE.next()
ALGORITHM_ID = u"test-stub" ALGORITHM_ID = u"test-stub"
ERROR_STUB_ALGORITHM_ID = u"error-stub" ERROR_STUB_ALGORITHM_ID = u"error-stub"
UNDEFINED_CLASS_ALGORITHM_ID = u"undefined_class" UNDEFINED_CLASS_ALGORITHM_ID = u"undefined_class"
UNDEFINED_MODULE_ALGORITHM_ID = u"undefined_module" UNDEFINED_MODULE_ALGORITHM_ID = u"undefined_module"
INVALID_SCORE_ALGORITHM_ID = u"invalid_score"
AI_ALGORITHMS = { AI_ALGORITHMS = {
ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__), ALGORITHM_ID: '{module}.StubAIAlgorithm'.format(module=__name__),
ERROR_STUB_ALGORITHM_ID: '{module}.ErrorStubAIAlgorithm'.format(module=__name__), ERROR_STUB_ALGORITHM_ID: '{module}.ErrorStubAIAlgorithm'.format(module=__name__),
UNDEFINED_CLASS_ALGORITHM_ID: '{module}.NotDefinedAIAlgorithm'.format(module=__name__), UNDEFINED_CLASS_ALGORITHM_ID: '{module}.NotDefinedAIAlgorithm'.format(module=__name__),
UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm' UNDEFINED_MODULE_ALGORITHM_ID: 'openassessment.not.valid.NotDefinedAIAlgorithm',
INVALID_SCORE_ALGORITHM_ID: '{module}.InvalidScoreAlgorithm'.format(module=__name__),
} }
...@@ -109,9 +125,7 @@ class AITrainingTaskTest(CeleryTaskTest): ...@@ -109,9 +125,7 @@ class AITrainingTaskTest(CeleryTaskTest):
Create a training workflow in the database. Create a training workflow in the database.
""" """
examples = deserialize_training_examples(EXAMPLES, RUBRIC) examples = deserialize_training_examples(EXAMPLES, RUBRIC)
workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID) workflow = AITrainingWorkflow.start_workflow(examples, self.COURSE_ID, self.ITEM_ID, self.ALGORITHM_ID)
self.workflow_uuid = workflow.uuid self.workflow_uuid = workflow.uuid
def test_unknown_algorithm(self): def test_unknown_algorithm(self):
...@@ -252,6 +266,32 @@ class AIGradingTaskTest(CeleryTaskTest): ...@@ -252,6 +266,32 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow.classifier_set = classifier_set workflow.classifier_set = classifier_set
workflow.save() workflow.save()
@mock.patch('openassessment.assessment.api.ai_worker.create_assessment')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_algorithm_gives_invalid_score(self, mock_create_assessment):
# If an algorithm provides a score that isn't in the rubric,
# we should choose the closest valid score.
self._set_algorithm_id(INVALID_SCORE_ALGORITHM_ID)
# The first score given by the algorithm should be below the minimum valid score
# The second score will be between two valid scores (0 and 1), rounding up
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 0,
u"ﻭɼค๓๓คɼ": 1
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
# The third score will be between two valid scores (1 and 2), rounding down
# The final score will be greater than the maximum score
self._reset_workflow()
grade_essay(self.workflow_uuid)
expected_scores = {
u"vøȼȺƀᵾłȺɍɏ": 1,
u"ﻭɼค๓๓คɼ": 2
}
mock_create_assessment.assert_called_with(self.workflow_uuid, expected_scores)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params') @mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS) @override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_retrieve_params_error(self, mock_call): def test_retrieve_params_error(self, mock_call):
...@@ -277,6 +317,39 @@ class AIGradingTaskTest(CeleryTaskTest): ...@@ -277,6 +317,39 @@ class AIGradingTaskTest(CeleryTaskTest):
with self.assert_retry(grade_essay, AIGradingInternalError): with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid) grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_missing_criterion_for_valid_scores(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
@mock.patch('openassessment.assessment.worker.grading.ai_worker_api.get_grading_task_params')
@override_settings(ORA2_AI_ALGORITHMS=AI_ALGORITHMS)
def test_params_valid_scores_empty_list(self, mock_call):
mock_call.return_value = {
'essay_text': 'test',
'classifier_set': {
u"vøȼȺƀᵾłȺɍɏ": {},
u"ﻭɼค๓๓คɼ": {}
},
'algorithm_id': ALGORITHM_ID,
'valid_scores': {
u"vøȼȺƀᵾłȺɍɏ": [],
u"ﻭɼค๓๓คɼ": [0, 1, 2]
}
}
with self.assert_retry(grade_essay, AIGradingInternalError):
grade_essay(self.workflow_uuid)
def _set_algorithm_id(self, algorithm_id): def _set_algorithm_id(self, algorithm_id):
""" """
Override the default algorithm ID for the grading workflow. Override the default algorithm ID for the grading workflow.
...@@ -291,3 +364,12 @@ class AIGradingTaskTest(CeleryTaskTest): ...@@ -291,3 +364,12 @@ class AIGradingTaskTest(CeleryTaskTest):
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid) workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.algorithm_id = algorithm_id workflow.algorithm_id = algorithm_id
workflow.save() workflow.save()
def _reset_workflow(self):
"""
Reset the workflow so we can re-use it.
"""
workflow = AIGradingWorkflow.objects.get(uuid=self.workflow_uuid)
workflow.completed_at = None
workflow.assessment = None
workflow.save()
...@@ -10,7 +10,7 @@ from celery.utils.log import get_task_logger ...@@ -10,7 +10,7 @@ from celery.utils.log import get_task_logger
from dogapi import dog_stats_api from dogapi import dog_stats_api
from openassessment.assessment.api import ai_worker as ai_worker_api from openassessment.assessment.api import ai_worker as ai_worker_api
from openassessment.assessment.errors import ( from openassessment.assessment.errors import (
AIError, AIGradingInternalError, AIGradingRequestError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS AIError, AIGradingInternalError, AIReschedulingInternalError, ANTICIPATED_CELERY_ERRORS
) )
from .algorithm import AIAlgorithm, AIAlgorithmError from .algorithm import AIAlgorithm, AIAlgorithmError
from openassessment.assessment.models.ai import AIClassifierSet, AIGradingWorkflow from openassessment.assessment.models.ai import AIClassifierSet, AIGradingWorkflow
...@@ -54,6 +54,7 @@ def grade_essay(workflow_uuid): ...@@ -54,6 +54,7 @@ def grade_essay(workflow_uuid):
essay_text = params['essay_text'] essay_text = params['essay_text']
classifier_set = params['classifier_set'] classifier_set = params['classifier_set']
algorithm_id = params['algorithm_id'] algorithm_id = params['algorithm_id']
valid_scores = params['valid_scores']
except (AIError, KeyError): except (AIError, KeyError):
msg = ( msg = (
u"An error occurred while retrieving the AI grading task " u"An error occurred while retrieving the AI grading task "
...@@ -62,6 +63,23 @@ def grade_essay(workflow_uuid): ...@@ -62,6 +63,23 @@ def grade_essay(workflow_uuid):
logger.exception(msg) logger.exception(msg)
raise grade_essay.retry() raise grade_essay.retry()
# Validate that the we have valid scores for each criterion
for criterion_name in classifier_set.keys():
msg = None
if criterion_name not in valid_scores:
msg = (
u"Could not find {criterion} in the list of valid scores "
u"for grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
elif len(valid_scores[criterion_name]) == 0:
msg = (
u"Valid scores for {criterion} is empty for "
u"grading workflow with UUID {uuid}"
).format(criterion=criterion_name, uuid=workflow_uuid)
if msg:
logger.exception(msg)
raise AIGradingInternalError(msg)
# Retrieve the AI algorithm # Retrieve the AI algorithm
try: try:
algorithm = AIAlgorithm.algorithm_for_id(algorithm_id) algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
...@@ -76,7 +94,10 @@ def grade_essay(workflow_uuid): ...@@ -76,7 +94,10 @@ def grade_essay(workflow_uuid):
# Use the algorithm to evaluate the essay for each criterion # Use the algorithm to evaluate the essay for each criterion
try: try:
scores_by_criterion = { scores_by_criterion = {
criterion_name: algorithm.score(essay_text, classifier) criterion_name: _closest_valid_score(
algorithm.score(essay_text, classifier),
valid_scores[criterion_name]
)
for criterion_name, classifier in classifier_set.iteritems() for criterion_name, classifier in classifier_set.iteritems()
} }
except AIAlgorithmError: except AIAlgorithmError:
...@@ -222,6 +243,35 @@ def reschedule_grading_tasks(course_id, item_id): ...@@ -222,6 +243,35 @@ def reschedule_grading_tasks(course_id, item_id):
raise reschedule_grading_tasks.retry() raise reschedule_grading_tasks.retry()
def _closest_valid_score(score, valid_scores):
"""
Return the closest valid score for a given score.
This is necessary, since rubric scores may be non-contiguous.
Args:
score (int or float): The score assigned by the algorithm.
valid_scores (list of int): Valid scores for this criterion,
assumed to be sorted in ascending order.
Returns:
int
"""
# If the score is already valid, return it
if score in valid_scores:
return score
# Otherwise, find the closest score in the list.
closest = valid_scores[0]
delta = abs(score - closest)
for valid in valid_scores[1:]:
new_delta = abs(score - valid)
if new_delta < delta:
closest = valid
delta = new_delta
return closest
def _log_start_reschedule_grading(course_id=None, item_id=None): def _log_start_reschedule_grading(course_id=None, item_id=None):
""" """
Sends data about the rescheduling_grading task to datadog Sends data about the rescheduling_grading task to datadog
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment