Commit 3ba058d2 by Will Daly

Fall back to classifier sets with the same course/item and rubric structure

parent cfbd98bd
......@@ -544,16 +544,29 @@ class AIGradingWorkflow(AIWorkflow):
# this information here from the submissions models.
student_id = models.CharField(max_length=40, db_index=True)
@transaction.commit_on_success
def assign_most_recent_classifier_set(self):
"""
Finds the most relevant classifier set based on the following line of succession:
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC *content* hash, and ALGORITHM
- Newest first. If none exist...
2 -- Classifier sets with the same COURSE, ITEM, and RUBRIC *structure* hash, and ALGORITHM.
- Newest first. If none exist...
2 -- The newest classifier set with the same RUBRIC and ALGORITHM
3 -- The newest classifier set with the same RUBRIC and ALGORITHM
- Newest first. If none exist...
3 -- Do no assignment and return False
4 -- Do no assignment and return False
Case #1 is ideal: we get a classifier set trained for the rubric as currently defined.
Case #2 handles when a course author makes a cosmetic change to a rubric after training.
We don't want to stop grading students because an author fixed a typo!
Case #3 handles problems that are duplicated, such as the default problem prompt.
If we've already trained classifiers for the identical rubric somewhere else,
then the author can use them to test out the feature immediately.
Case #4: Someone will need to schedule training; however, we will still accept
student submissions and grade them once training completes.
Returns:
(bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
......@@ -561,34 +574,48 @@ class AIGradingWorkflow(AIWorkflow):
Raises:
DatabaseError
"""
# Retrieve classifier set ideal candidates (Match on all fields)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id,
course_id=self.course_id, item_id=self.item_id
)[:1]
# If we find classifiers for this rubric/algorithm/course/item
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# Retrieve classifier set candidates (non-ideal, but good enough)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id
)[:1]
# If found, associate non-ideal classifier set with AIGradingWorkflow
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# List of the parameters we will search for, in order of decreasing priority
search_parameters = [
# Case #1: same course / item / rubric (exact) / algorithm
{
'rubric__content_hash': self.rubric.content_hash,
'algorithm_id': self.algorithm_id,
'course_id': self.course_id,
'item_id': self.item_id
},
# Case #2: same course / item / rubric (structure only) / algorithm
{
'rubric__structure_hash': self.rubric.structure_hash, # pylint: disable=E1101
'algorithm_id': self.algorithm_id,
'course_id': self.course_id,
'item_id': self.item_id
},
# Case #3: same rubric (exact) / algorithm
{
'rubric__content_hash': self.rubric.content_hash,
'algorithm_id': self.algorithm_id
}
]
# Perform each query, starting with the highest priority
for params in search_parameters:
# Retrieve the most recent classifier set that matches our query
# (rely on implicit ordering in the model definition)
classifier_set_candidates = AIClassifierSet.objects.filter(**params)[:1]
# If we find a classifier set,
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# If we get to this point, no classifiers exist with this rubric and algorithm.
return False
@classmethod
@transaction.commit_on_success
def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
......
......@@ -2,10 +2,11 @@
"""
Test AI Django models.
"""
import copy
from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import (
AIClassifierSet, AIClassifier, AI_CLASSIFIER_STORAGE
AIClassifierSet, AIClassifier, AIGradingWorkflow, AI_CLASSIFIER_STORAGE
)
from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC
......@@ -48,3 +49,96 @@ class AIClassifierTest(CacheResetTest):
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"test"
ITEM_ID = u"test"
ALGORITHM_ID = "test"
def setUp(self):
"""
Create a new grading workflow.
"""
self.rubric = rubric_from_dict(RUBRIC)
self.workflow = AIGradingWorkflow.objects.create(
submission_uuid='test', essay_text='test',
rubric=self.rubric, algorithm_id=self.ALGORITHM_ID,
item_id=self.ITEM_ID, course_id=self.COURSE_ID
)
# Create a rubric with a similar structure, but different prompt
similar_rubric_dict = copy.deepcopy(RUBRIC)
similar_rubric_dict['prompt'] = 'Different prompt!'
self.similar_rubric = rubric_from_dict(similar_rubric_dict)
def test_assign_most_recent_classifier_set(self):
# No classifier sets are available
found = self.workflow.assign_most_recent_classifier_set()
self.assertFalse(found)
self.assertIs(self.workflow.classifier_set, None)
# Same rubric (exact), but different course id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
"different course!", self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact) but different item id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact), but different algorithm id
# Shouldn't change, since the algorithm ID doesn't match
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, "different algorithm!",
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure*, but in a different item
# Shouldn't change, since the rubric isn't an exact match.
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure* AND in the same course/item
# This should replace our current classifier set
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric and same course/item
# This is the ideal, so we should always prefer it
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment