Commit 3ba058d2 by Will Daly

Fall back to classifier sets with the same course/item and rubric structure

parent cfbd98bd
...@@ -544,16 +544,29 @@ class AIGradingWorkflow(AIWorkflow): ...@@ -544,16 +544,29 @@ class AIGradingWorkflow(AIWorkflow):
# this information here from the submissions models. # this information here from the submissions models.
student_id = models.CharField(max_length=40, db_index=True) student_id = models.CharField(max_length=40, db_index=True)
@transaction.commit_on_success
def assign_most_recent_classifier_set(self): def assign_most_recent_classifier_set(self):
""" """
Finds the most relevant classifier set based on the following line of succession: Finds the most relevant classifier set based on the following line of succession:
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM 1 -- Classifier sets with the same COURSE, ITEM, RUBRIC *content* hash, and ALGORITHM
- Newest first. If none exist...
2 -- Classifier sets with the same COURSE, ITEM, and RUBRIC *structure* hash, and ALGORITHM.
- Newest first. If none exist... - Newest first. If none exist...
2 -- The newest classifier set with the same RUBRIC and ALGORITHM 3 -- The newest classifier set with the same RUBRIC and ALGORITHM
- Newest first. If none exist... - Newest first. If none exist...
3 -- Do no assignment and return False 4 -- Do no assignment and return False
Case #1 is ideal: we get a classifier set trained for the rubric as currently defined.
Case #2 handles when a course author makes a cosmetic change to a rubric after training.
We don't want to stop grading students because an author fixed a typo!
Case #3 handles problems that are duplicated, such as the default problem prompt.
If we've already trained classifiers for the identical rubric somewhere else,
then the author can use them to test out the feature immediately.
Case #4: Someone will need to schedule training; however, we will still accept
student submissions and grade them once training completes.
Returns: Returns:
(bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow (bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
...@@ -561,34 +574,48 @@ class AIGradingWorkflow(AIWorkflow): ...@@ -561,34 +574,48 @@ class AIGradingWorkflow(AIWorkflow):
Raises: Raises:
DatabaseError DatabaseError
""" """
# Retrieve classifier set ideal candidates (Match on all fields) # List of the parameters we will search for, in order of decreasing priority
classifier_set_candidates = AIClassifierSet.objects.filter( search_parameters = [
rubric=self.rubric, algorithm_id=self.algorithm_id, # Case #1: same course / item / rubric (exact) / algorithm
course_id=self.course_id, item_id=self.item_id {
)[:1] 'rubric__content_hash': self.rubric.content_hash,
'algorithm_id': self.algorithm_id,
# If we find classifiers for this rubric/algorithm/course/item 'course_id': self.course_id,
# then associate the most recent classifiers with it and return true 'item_id': self.item_id
if len(classifier_set_candidates) > 0: },
self.classifier_set = classifier_set_candidates[0]
self.save() # Case #2: same course / item / rubric (structure only) / algorithm
return True {
'rubric__structure_hash': self.rubric.structure_hash, # pylint: disable=E1101
# Retrieve classifier set candidates (non-ideal, but good enough) 'algorithm_id': self.algorithm_id,
classifier_set_candidates = AIClassifierSet.objects.filter( 'course_id': self.course_id,
rubric=self.rubric, algorithm_id=self.algorithm_id 'item_id': self.item_id
)[:1] },
# If found, associate non-ideal classifier set with AIGradingWorkflow # Case #3: same rubric (exact) / algorithm
if len(classifier_set_candidates) > 0: {
self.classifier_set = classifier_set_candidates[0] 'rubric__content_hash': self.rubric.content_hash,
self.save() 'algorithm_id': self.algorithm_id
return True }
]
# Perform each query, starting with the highest priority
for params in search_parameters:
# Retrieve the most recent classifier set that matches our query
# (rely on implicit ordering in the model definition)
classifier_set_candidates = AIClassifierSet.objects.filter(**params)[:1]
# If we find a classifier set,
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# If we get to this point, no classifiers exist with this rubric and algorithm. # If we get to this point, no classifiers exist with this rubric and algorithm.
return False return False
@classmethod @classmethod
@transaction.commit_on_success @transaction.commit_on_success
def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id): def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
......
...@@ -2,10 +2,11 @@ ...@@ -2,10 +2,11 @@
""" """
Test AI Django models. Test AI Django models.
""" """
import copy
from django.test.utils import override_settings from django.test.utils import override_settings
from openassessment.test_utils import CacheResetTest from openassessment.test_utils import CacheResetTest
from openassessment.assessment.models import ( from openassessment.assessment.models import (
AIClassifierSet, AIClassifier, AI_CLASSIFIER_STORAGE AIClassifierSet, AIClassifier, AIGradingWorkflow, AI_CLASSIFIER_STORAGE
) )
from openassessment.assessment.serializers import rubric_from_dict from openassessment.assessment.serializers import rubric_from_dict
from .constants import RUBRIC from .constants import RUBRIC
...@@ -48,3 +49,96 @@ class AIClassifierTest(CacheResetTest): ...@@ -48,3 +49,96 @@ class AIClassifierTest(CacheResetTest):
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
) )
return AIClassifier.objects.filter(classifier_set=classifier_set)[0] return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
class AIGradingWorkflowTest(CacheResetTest):
"""
Tests for the AIGradingWorkflow model.
"""
CLASSIFIERS_DICT = {
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"test"
ITEM_ID = u"test"
ALGORITHM_ID = "test"
def setUp(self):
"""
Create a new grading workflow.
"""
self.rubric = rubric_from_dict(RUBRIC)
self.workflow = AIGradingWorkflow.objects.create(
submission_uuid='test', essay_text='test',
rubric=self.rubric, algorithm_id=self.ALGORITHM_ID,
item_id=self.ITEM_ID, course_id=self.COURSE_ID
)
# Create a rubric with a similar structure, but different prompt
similar_rubric_dict = copy.deepcopy(RUBRIC)
similar_rubric_dict['prompt'] = 'Different prompt!'
self.similar_rubric = rubric_from_dict(similar_rubric_dict)
def test_assign_most_recent_classifier_set(self):
# No classifier sets are available
found = self.workflow.assign_most_recent_classifier_set()
self.assertFalse(found)
self.assertIs(self.workflow.classifier_set, None)
# Same rubric (exact), but different course id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
"different course!", self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact) but different item id
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric (exact), but different algorithm id
# Shouldn't change, since the algorithm ID doesn't match
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, "different algorithm!",
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure*, but in a different item
# Shouldn't change, since the rubric isn't an exact match.
AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, "different item!"
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric *structure* AND in the same course/item
# This should replace our current classifier set
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
# Same rubric and same course/item
# This is the ideal, so we should always prefer it
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
self.COURSE_ID, self.ITEM_ID
)
found = self.workflow.assign_most_recent_classifier_set()
self.assertTrue(found)
self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment