Commit 325f4f19 by gradyward

Expaned logic around classifier assignment to AIGradingWorkflows

parent 3cbd6b7b
......@@ -92,9 +92,16 @@ class AIClassifierSet(models.Model):
# The ID of the algorithm that was used to train classifiers in this set.
algorithm_id = models.CharField(max_length=128, db_index=True)
# Course Entity and Item Discriminator
# Though these items are duplicated in the database tables for the AITrainingWorkflow,
# this is okay because it will drastically speed up the operation of assigning classifiers
# to AIGradingWorkflows
course_id = models.CharField(max_length=40, db_index=True)
item_id = models.CharField(max_length=128, db_index=True)
@classmethod
@transaction.commit_on_success
def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id):
def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id, course_id, item_id):
"""
Create a set of classifiers.
......@@ -103,6 +110,8 @@ class AIClassifierSet(models.Model):
JSON-serializable classifiers.
rubric (Rubric): The rubric model.
algorithm_id (unicode): The ID of the algorithm used to train the classifiers.
course_id (unicode): The ID of the course that the classifier is going to be grading
item_id (unicode): The item within the course that the classifier is trained to grade.
Returns:
AIClassifierSet
......@@ -114,7 +123,9 @@ class AIClassifierSet(models.Model):
"""
# Create the classifier set
classifier_set = cls.objects.create(rubric=rubric, algorithm_id=algorithm_id)
classifier_set = cls.objects.create(
rubric=rubric, algorithm_id=algorithm_id, item_id=item_id, course_id=course_id
)
# Retrieve the criteria for this rubric,
# then organize them by criterion name
......@@ -490,7 +501,7 @@ class AITrainingWorkflow(AIWorkflow):
DatabaseError
"""
self.classifier_set = AIClassifierSet.create_classifier_set(
classifier_set, self.rubric, self.algorithm_id
classifier_set, self.rubric, self.algorithm_id, self.course_id, self.item_id
)
self.mark_complete_and_save()
......@@ -533,6 +544,51 @@ class AIGradingWorkflow(AIWorkflow):
# this information here from the submissions models.
student_id = models.CharField(max_length=40, db_index=True)
@transaction.commit_on_success
def assign_most_recent_classifier_set(self):
"""
Finds the most relevant classifier set based on the following line of succession:
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM
- Newest first. If none exist...
2 -- The newest classifier set with the same RUBRIC and ALGORITHM
- Newest first. If none exist...
3 -- Do no assignment and return False
Returns:
(bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
Raises:
DatabaseError
"""
# Retrieve classifier set ideal candidates (Match on all fields)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id,
course_id=self.course_id, item_id=self.item_id
)[:1]
# If we find classifiers for this rubric/algorithm/course/item
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# Retrieve classifier set candidates (non-ideal, but good enough)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id
)[:1]
# If found, associate non-ideal classifier set with AIGradingWorkflow
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# If we get to this point, no classifiers exist with this rubric and algorithm.
return False
@classmethod
@transaction.commit_on_success
def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
......@@ -582,19 +638,8 @@ class AIGradingWorkflow(AIWorkflow):
rubric=rubric
)
# Retrieve classifier set candidates
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=rubric, algorithm_id=algorithm_id
)[:1]
# If we find classifiers for this rubric/algorithm
# then associate the classifiers with the workflow
# and schedule a grading task.
# Otherwise, the task will need to be scheduled later,
# once the classifiers have been trained.
if len(classifier_set_candidates) > 0:
workflow.classifier_set = classifier_set_candidates[0]
workflow.save()
# Retrieve and assign classifier set candidates
workflow.assign_most_recent_classifier_set()
workflow._log_start_workflow()
......
......@@ -81,7 +81,7 @@ def train_classifiers(rubric_dict, classifier_score_overrides):
"""
rubric = rubric_from_dict(rubric_dict)
AIClassifierSet.create_classifier_set(
classifier_score_overrides, rubric, ALGORITHM_ID
classifier_score_overrides, rubric, ALGORITHM_ID, COURSE_ID, ITEM_ID
)
......
......@@ -19,6 +19,8 @@ class AIClassifierTest(CacheResetTest):
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self):
# No path prefix provided in the settings
......@@ -43,6 +45,6 @@ class AIClassifierTest(CacheResetTest):
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm"
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
......@@ -215,7 +215,7 @@ class AIWorkerGradingTest(CacheResetTest):
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS, rubric, ALGORITHM_ID
CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
......
......@@ -247,7 +247,7 @@ class AIGradingTaskTest(CeleryTaskTest):
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS, rubric, ALGORITHM_ID
self.CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
......
......@@ -147,37 +147,34 @@ def reschedule_grading_tasks(course_id, item_id):
# are called in rapid succession. This is part of the reason this button is in the admin view.
# Tries to find a set of classifiers that are already defined in our maintained_classifiers based on a
# description of the workflow in the form of a tuple (rubric, algorithm_id)
workflow_description = (workflow.rubric, workflow.algorithm_id)
# description of the workflow in the form of a tuple (rubric, course_id, item_id, algorithm_id)
workflow_description = (workflow.rubric, course_id, item_id, workflow.algorithm_id)
found_classifiers = maintained_classifiers.get(workflow_description)
# If no set of classifiers is found, we perform the query to try to find them. We take the most recent
# and add it to our dictionary of maintained classifiers for future reference.
if found_classifiers is None:
try:
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=workflow.rubric, algorithm_id=workflow.algorithm_id
).order_by('-created_at')[:1]
found_classifiers = classifier_set_candidates[0]
maintained_classifiers[workflow_description] = found_classifiers
except IndexError:
msg = u"No classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
logger.log(msg)
found = workflow.assign_most_recent_classifier_set()
if found:
found_classifiers = workflow.classifier_set
maintained_classifiers[workflow_description] = found_classifiers
else:
msg = u"No applicable classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
logger.log(msg)
except DatabaseError as ex:
msg = (
u"A Database error occurred while trying to assign classifiers to an essay with uuid='{id}'"
).format(id=workflow.uuid)
logger.exception(msg)
if found_classifiers is not None:
# If we found classifiers in our memoized lookup dictionary, we assign them and save.
else:
workflow.classifier_set = found_classifiers
try:
workflow.save()
logger.info(
(
u"Classifiers were successfully assigned to grading workflow with uuid={}"
).format(workflow.uuid)
u"Classifiers were successfully assigned to grading workflow with uuid={}".format(workflow.uuid)
)
except DatabaseError as ex:
msg = (
......@@ -185,6 +182,8 @@ def reschedule_grading_tasks(course_id, item_id):
).format(id=workflow.uuid)
logger.exception(msg)
if found_classifiers is not None:
# Now we should (unless we had an exception above) have a classifier set.
# Try to schedule the grading
try:
......
......@@ -130,7 +130,7 @@ class Command(BaseCommand):
# Create the classifier set
classifier_set = AIClassifierSet.create_classifier_set(
classifier_data, rubric, algorithm_id
classifier_data, rubric, algorithm_id, course_id, item_id
)
print u"Successfully created classifier set with id {}".format(classifier_set.pk)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment