Expaned logic around classifier assignment to AIGradingWorkflows

325f4f19 · gradyward · 3cbd6b7b · 325f4f19 · 325f4f19 · 325f4f19
Commit 325f4f19 authored Jun 04, 2014 by gradyward
8 changed files
--- a/apps/openassessment/assessment/migrations/0016_auto__add_field_aiclassifierset_course_id__add_field_aiclassifierset_i.py
+++ b/apps/openassessment/assessment/migrations/0016_auto__add_field_aiclassifierset_course_id__add_field_aiclassifierset_i.py
--- a/apps/openassessment/assessment/models/ai.py
+++ b/apps/openassessment/assessment/models/ai.py
@@ -92,9 +92,16 @@ class AIClassifierSet(models.Model):
    # The ID of the algorithm that was used to train classifiers in this set.
    algorithm_id = models.CharField(max_length=128, db_index=True)

+    # Course Entity and Item Discriminator
+    # Though these items are duplicated in the database tables for the AITrainingWorkflow,
+    # this is okay because it will drastically speed up the operation of assigning classifiers
+    # to AIGradingWorkflows
+    course_id = models.CharField(max_length=40, db_index=True)
+    item_id = models.CharField(max_length=128, db_index=True)
+
    @classmethod
    @transaction.commit_on_success
-    def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id):
+    def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id, course_id, item_id):
        """
        Create a set of classifiers.

@@ -103,6 +110,8 @@ class AIClassifierSet(models.Model):
                JSON-serializable classifiers.
            rubric (Rubric): The rubric model.
            algorithm_id (unicode): The ID of the algorithm used to train the classifiers.
+            course_id (unicode): The ID of the course that the classifier is going to be grading
+            item_id (unicode): The item within the course that the classifier is trained to grade.

        Returns:
            AIClassifierSet
@@ -114,7 +123,9 @@ class AIClassifierSet(models.Model):

        """
        # Create the classifier set
-        classifier_set = cls.objects.create(rubric=rubric, algorithm_id=algorithm_id)
+        classifier_set = cls.objects.create(
+            rubric=rubric, algorithm_id=algorithm_id, item_id=item_id, course_id=course_id
+        )

        # Retrieve the criteria for this rubric,
        # then organize them by criterion name
@@ -490,7 +501,7 @@ class AITrainingWorkflow(AIWorkflow):
            DatabaseError
        """
        self.classifier_set = AIClassifierSet.create_classifier_set(
-            classifier_set, self.rubric, self.algorithm_id
+            classifier_set, self.rubric, self.algorithm_id, self.course_id, self.item_id
        )
        self.mark_complete_and_save()

@@ -533,6 +544,51 @@ class AIGradingWorkflow(AIWorkflow):
    # this information here from the submissions models.
    student_id = models.CharField(max_length=40, db_index=True)

+    @transaction.commit_on_success
+    def assign_most_recent_classifier_set(self):
+        """
+        Finds the most relevant classifier set based on the following line of succession:
+
+            1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM
+                - Newest first.  If none exist...
+            2 -- The newest classifier set with the same RUBRIC and ALGORITHM
+                - Newest first.  If none exist...
+            3 -- Do no assignment and return False
+
+        Returns:
+            (bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
+
+        Raises:
+            DatabaseError
+        """
+        # Retrieve classifier set ideal candidates (Match on all fields)
+        classifier_set_candidates = AIClassifierSet.objects.filter(
+            rubric=self.rubric, algorithm_id=self.algorithm_id,
+            course_id=self.course_id, item_id=self.item_id
+        )[:1]
+
+        # If we find classifiers for this rubric/algorithm/course/item
+        # then associate the most recent classifiers with it and return true
+        if len(classifier_set_candidates) > 0:
+            self.classifier_set = classifier_set_candidates[0]
+            self.save()
+            return True
+
+        # Retrieve classifier set candidates (non-ideal, but good enough)
+        classifier_set_candidates = AIClassifierSet.objects.filter(
+            rubric=self.rubric, algorithm_id=self.algorithm_id
+        )[:1]
+
+        # If found, associate non-ideal classifier set with AIGradingWorkflow
+        if len(classifier_set_candidates) > 0:
+            self.classifier_set = classifier_set_candidates[0]
+            self.save()
+            return True
+
+        # If we get to this point, no classifiers exist with this rubric and algorithm.
+        return False
+
+
    @classmethod
    @transaction.commit_on_success
    def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
@@ -582,19 +638,8 @@ class AIGradingWorkflow(AIWorkflow):
            rubric=rubric
        )

-        # Retrieve classifier set candidates
-        classifier_set_candidates = AIClassifierSet.objects.filter(
-            rubric=rubric, algorithm_id=algorithm_id
-        )[:1]
-
-        # If we find classifiers for this rubric/algorithm
-        # then associate the classifiers with the workflow
-        # and schedule a grading task.
-        # Otherwise, the task will need to be scheduled later,
-        # once the classifiers have been trained.
-        if len(classifier_set_candidates) > 0:
-            workflow.classifier_set = classifier_set_candidates[0]
-            workflow.save()
+        # Retrieve and assign classifier set candidates
+        workflow.assign_most_recent_classifier_set()

        workflow._log_start_workflow()


--- a/apps/openassessment/assessment/test/test_ai.py
+++ b/apps/openassessment/assessment/test/test_ai.py
@@ -81,7 +81,7 @@ def train_classifiers(rubric_dict, classifier_score_overrides):
    """
    rubric = rubric_from_dict(rubric_dict)
    AIClassifierSet.create_classifier_set(
-        classifier_score_overrides, rubric, ALGORITHM_ID
+        classifier_score_overrides, rubric, ALGORITHM_ID, COURSE_ID, ITEM_ID
    )



--- a/apps/openassessment/assessment/test/test_ai_models.py
+++ b/apps/openassessment/assessment/test/test_ai_models.py
@@ -19,6 +19,8 @@ class AIClassifierTest(CacheResetTest):
        u"vøȼȺƀᵾłȺɍɏ": "test data",
        u"ﻭɼค๓๓คɼ": "more test data"
    }
+    COURSE_ID = u"†3ß† çøU®ß3"
+    ITEM_ID = u"fake_item_id"

    def test_upload_to_path_default(self):
        # No path prefix provided in the settings
@@ -43,6 +45,6 @@ class AIClassifierTest(CacheResetTest):
        """
        rubric = rubric_from_dict(RUBRIC)
        classifier_set = AIClassifierSet.create_classifier_set(
-            self.CLASSIFIERS_DICT, rubric, "test_algorithm"
+            self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
        )
        return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
--- a/apps/openassessment/assessment/test/test_ai_worker.py
+++ b/apps/openassessment/assessment/test/test_ai_worker.py
@@ -215,7 +215,7 @@ class AIWorkerGradingTest(CacheResetTest):
        # Associate the workflow with classifiers
        rubric = rubric_from_dict(RUBRIC)
        classifier_set = AIClassifierSet.create_classifier_set(
-            CLASSIFIERS, rubric, ALGORITHM_ID
+            CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
        )
        workflow.classifier_set = classifier_set
        workflow.save()

--- a/apps/openassessment/assessment/test/test_worker.py
+++ b/apps/openassessment/assessment/test/test_worker.py
@@ -247,7 +247,7 @@ class AIGradingTaskTest(CeleryTaskTest):
        # Associate the workflow with classifiers
        rubric = rubric_from_dict(RUBRIC)
        classifier_set = AIClassifierSet.create_classifier_set(
-            self.CLASSIFIERS, rubric, ALGORITHM_ID
+            self.CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
        )
        workflow.classifier_set = classifier_set
        workflow.save()

--- a/apps/openassessment/assessment/worker/grading.py
+++ b/apps/openassessment/assessment/worker/grading.py
@@ -147,37 +147,34 @@ def reschedule_grading_tasks(course_id, item_id):
        # are called in rapid succession. This is part of the reason this button is in the admin view.

        # Tries to find a set of classifiers that are already defined in our maintained_classifiers based on a
-        # description of the workflow in the form of a tuple (rubric, algorithm_id)
-        workflow_description = (workflow.rubric, workflow.algorithm_id)
+        # description of the workflow in the form of a tuple (rubric, course_id, item_id, algorithm_id)
+        workflow_description = (workflow.rubric, course_id, item_id, workflow.algorithm_id)
        found_classifiers = maintained_classifiers.get(workflow_description)

        # If no set of classifiers is found, we perform the query to try to find them. We take the most recent
        # and add it to our dictionary of maintained classifiers for future reference.
        if found_classifiers is None:
            try:
-                classifier_set_candidates = AIClassifierSet.objects.filter(
-                    rubric=workflow.rubric, algorithm_id=workflow.algorithm_id
-                ).order_by('-created_at')[:1]
-                found_classifiers = classifier_set_candidates[0]
-                maintained_classifiers[workflow_description] = found_classifiers
-            except IndexError:
-                msg = u"No classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
-                logger.log(msg)
+                found = workflow.assign_most_recent_classifier_set()
+                if found:
+                    found_classifiers = workflow.classifier_set
+                    maintained_classifiers[workflow_description] = found_classifiers
+                else:
+                    msg = u"No applicable classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
+                    logger.log(msg)
            except DatabaseError as ex:
                msg = (
                    u"A Database error occurred while trying to assign classifiers to an essay with uuid='{id}'"
                ).format(id=workflow.uuid)
                logger.exception(msg)

-        if found_classifiers is not None:
-
+        # If we found classifiers in our memoized lookup dictionary, we assign them and save.
+        else:
            workflow.classifier_set = found_classifiers
            try:
                workflow.save()
                logger.info(
-                    (
-                        u"Classifiers were successfully assigned to grading workflow with uuid={}"
-                    ).format(workflow.uuid)
+                    u"Classifiers were successfully assigned to grading workflow with uuid={}".format(workflow.uuid)
                )
            except DatabaseError as ex:
                msg = (
@@ -185,6 +182,8 @@ def reschedule_grading_tasks(course_id, item_id):
                ).format(id=workflow.uuid)
                logger.exception(msg)

+        if found_classifiers is not None:
+
            # Now we should (unless we had an exception above) have a classifier set.
            # Try to schedule the grading
            try:

--- a/apps/openassessment/management/commands/simulate_ai_grading_error.py
+++ b/apps/openassessment/management/commands/simulate_ai_grading_error.py
@@ -130,7 +130,7 @@ class Command(BaseCommand):

        # Create the classifier set
        classifier_set = AIClassifierSet.create_classifier_set(
-            classifier_data, rubric, algorithm_id
+            classifier_data, rubric, algorithm_id, course_id, item_id
        )
        print u"Successfully created classifier set with id {}".format(classifier_set.pk)