Fall back to classifier sets with the same course/item and rubric structure

3ba058d2 · Will Daly · cfbd98bd · 3ba058d2 · 3ba058d2
Commit 3ba058d2 authored Jun 04, 2014 by Will Daly
Hide whitespace changes
Inline Side-by-side

Showing with 150 additions and 29 deletions

apps/openassessment/assessment/models/ai.py
+55 -28

apps/openassessment/assessment/test/test_ai_models.py
+95 -1

No files found.
--- a/apps/openassessment/assessment/models/ai.py
+++ b/apps/openassessment/assessment/models/ai.py
@@ -544,16 +544,29 @@ class AIGradingWorkflow(AIWorkflow):
    # this information here from the submissions models.
    student_id = models.CharField(max_length=40, db_index=True)
-    @transaction.commit_on_success
    def assign_most_recent_classifier_set(self):
        """
        Finds the most relevant classifier set based on the following line of succession:
-            1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM
+            1 -- Classifier sets with the same COURSE, ITEM, RUBRIC *content* hash, and ALGORITHM
+                - Newest first.  If none exist...
+            2 -- Classifier sets with the same COURSE, ITEM, and RUBRIC *structure* hash, and ALGORITHM.
                - Newest first.  If none exist...
-            2 -- The newest classifier set with the same RUBRIC and ALGORITHM
+            3 -- The newest classifier set with the same RUBRIC and ALGORITHM
                - Newest first.  If none exist...
-            3 -- Do no assignment and return False
+            4 -- Do no assignment and return False
+        Case #1 is ideal: we get a classifier set trained for the rubric as currently defined.
+        Case #2 handles when a course author makes a cosmetic change to a rubric after training.
+            We don't want to stop grading students because an author fixed a typo!
+        Case #3 handles problems that are duplicated, such as the default problem prompt.
+            If we've already trained classifiers for the identical rubric somewhere else,
+            then the author can use them to test out the feature immediately.
+        Case #4: Someone will need to schedule training; however, we will still accept
+            student submissions and grade them once training completes.
        Returns:
            (bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
@@ -561,34 +574,48 @@ class AIGradingWorkflow(AIWorkflow):
        Raises:
            DatabaseError
        """
-        # Retrieve classifier set ideal candidates (Match on all fields)
+        # List of the parameters we will search for, in order of decreasing priority
-        classifier_set_candidates = AIClassifierSet.objects.filter(
+        search_parameters = [
-            rubric=self.rubric, algorithm_id=self.algorithm_id,
+            # Case #1: same course / item / rubric (exact) / algorithm
-            course_id=self.course_id, item_id=self.item_id
+            {
-        )[:1]
+                'rubric__content_hash': self.rubric.content_hash,
+                'algorithm_id': self.algorithm_id,
-        # If we find classifiers for this rubric/algorithm/course/item
+                'course_id': self.course_id,
-        # then associate the most recent classifiers with it and return true
+                'item_id': self.item_id
-        if len(classifier_set_candidates) > 0:
+            },
-            self.classifier_set = classifier_set_candidates[0]
-            self.save()
+            # Case #2: same course / item / rubric (structure only) / algorithm
-            return True
+            {
+                'rubric__structure_hash': self.rubric.structure_hash,  # pylint: disable=E1101
-        # Retrieve classifier set candidates (non-ideal, but good enough)
+                'algorithm_id': self.algorithm_id,
-        classifier_set_candidates = AIClassifierSet.objects.filter(
+                'course_id': self.course_id,
-            rubric=self.rubric, algorithm_id=self.algorithm_id
+                'item_id': self.item_id
-        )[:1]
+            },
-        # If found, associate non-ideal classifier set with AIGradingWorkflow
+            # Case #3: same rubric (exact) / algorithm
-        if len(classifier_set_candidates) > 0:
+            {
-            self.classifier_set = classifier_set_candidates[0]
+                'rubric__content_hash': self.rubric.content_hash,
-            self.save()
+                'algorithm_id': self.algorithm_id
-            return True
+            }
+        ]
+        # Perform each query, starting with the highest priority
+        for params in search_parameters:
+            # Retrieve the most recent classifier set that matches our query
+            # (rely on implicit ordering in the model definition)
+            classifier_set_candidates = AIClassifierSet.objects.filter(**params)[:1]
+            # If we find a classifier set,
+            # then associate the most recent classifiers with it and return true
+            if len(classifier_set_candidates) > 0:
+                self.classifier_set = classifier_set_candidates[0]
+                self.save()
+                return True
        # If we get to this point, no classifiers exist with this rubric and algorithm.
        return False
    @classmethod
    @transaction.commit_on_success
    def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):

--- a/apps/openassessment/assessment/test/test_ai_models.py
+++ b/apps/openassessment/assessment/test/test_ai_models.py
@@ -2,10 +2,11 @@
 """
 Test AI Django models.
 """
+import copy
 from django.test.utils import override_settings
 from openassessment.test_utils import CacheResetTest
 from openassessment.assessment.models import (
-    AIClassifierSet, AIClassifier, AI_CLASSIFIER_STORAGE
+    AIClassifierSet, AIClassifier, AIGradingWorkflow, AI_CLASSIFIER_STORAGE
 )
 from openassessment.assessment.serializers import rubric_from_dict
 from .constants import RUBRIC
@@ -48,3 +49,96 @@ class AIClassifierTest(CacheResetTest):
            self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
        )
        return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
+class AIGradingWorkflowTest(CacheResetTest):
+    """
+    Tests for the AIGradingWorkflow model.
+    """
+    CLASSIFIERS_DICT = {
+        u"vøȼȺƀᵾłȺɍɏ": "test data",
+        u"ﻭɼค๓๓คɼ": "more test data"
+    }
+    COURSE_ID = u"test"
+    ITEM_ID = u"test"
+    ALGORITHM_ID = "test"
+    def setUp(self):
+        """
+        Create a new grading workflow.
+        """
+        self.rubric = rubric_from_dict(RUBRIC)
+        self.workflow = AIGradingWorkflow.objects.create(
+            submission_uuid='test', essay_text='test',
+            rubric=self.rubric, algorithm_id=self.ALGORITHM_ID,
+            item_id=self.ITEM_ID, course_id=self.COURSE_ID
+        )
+        # Create a rubric with a similar structure, but different prompt
+        similar_rubric_dict = copy.deepcopy(RUBRIC)
+        similar_rubric_dict['prompt'] = 'Different prompt!'
+        self.similar_rubric = rubric_from_dict(similar_rubric_dict)
+    def test_assign_most_recent_classifier_set(self):
+        # No classifier sets are available
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertFalse(found)
+        self.assertIs(self.workflow.classifier_set, None)
+        # Same rubric (exact), but different course id
+        classifier_set = AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
+            "different course!", self.ITEM_ID
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
+        # Same rubric (exact) but different item id
+        classifier_set = AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
+            self.COURSE_ID, "different item!"
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
+        # Same rubric (exact), but different algorithm id
+        # Shouldn't change, since the algorithm ID doesn't match
+        AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.rubric, "different algorithm!",
+            self.COURSE_ID, self.ITEM_ID
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
+        # Same rubric *structure*, but in a different item
+        # Shouldn't change, since the rubric isn't an exact match.
+        AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
+            self.COURSE_ID, "different item!"
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
+        # Same rubric *structure* AND in the same course/item
+        # This should replace our current classifier set
+        classifier_set = AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.similar_rubric, self.ALGORITHM_ID,
+            self.COURSE_ID, self.ITEM_ID
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)
+        # Same rubric and same course/item
+        # This is the ideal, so we should always prefer it
+        classifier_set = AIClassifierSet.create_classifier_set(
+            self.CLASSIFIERS_DICT, self.rubric, self.ALGORITHM_ID,
+            self.COURSE_ID, self.ITEM_ID
+        )
+        found = self.workflow.assign_most_recent_classifier_set()
+        self.assertTrue(found)
+        self.assertEqual(classifier_set.pk, self.workflow.classifier_set.pk)