Commit 9a4de3a9 by gradyward

Merge pull request #404 from edx/grady/fix-assign-classifiers

Expanded logic of how classifiers are assigned to AIGradingWorkflows.
parents 3cbd6b7b 325f4f19
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'AIClassifierSet.course_id'
db.add_column('assessment_aiclassifierset', 'course_id',
self.gf('django.db.models.fields.CharField')(default='', max_length=40, db_index=True),
keep_default=False)
# Adding field 'AIClassifierSet.item_id'
db.add_column('assessment_aiclassifierset', 'item_id',
self.gf('django.db.models.fields.CharField')(default='', max_length=128, db_index=True),
keep_default=False)
def backwards(self, orm):
# Deleting field 'AIClassifierSet.course_id'
db.delete_column('assessment_aiclassifierset', 'course_id')
# Deleting field 'AIClassifierSet.item_id'
db.delete_column('assessment_aiclassifierset', 'item_id')
models = {
'assessment.aiclassifier': {
'Meta': {'object_name': 'AIClassifier'},
'classifier_data': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}),
'classifier_set': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'classifiers'", 'to': "orm['assessment.AIClassifierSet']"}),
'criterion': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'+'", 'to': "orm['assessment.Criterion']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'assessment.aiclassifierset': {
'Meta': {'ordering': "['-created_at', '-id']", 'object_name': 'AIClassifierSet'},
'algorithm_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'+'", 'to': "orm['assessment.Rubric']"})
},
'assessment.aigradingworkflow': {
'Meta': {'object_name': 'AIGradingWorkflow'},
'algorithm_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'assessment': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'+'", 'null': 'True', 'to': "orm['assessment.Assessment']"}),
'classifier_set': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'+'", 'null': 'True', 'to': "orm['assessment.AIClassifierSet']"}),
'completed_at': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}),
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'essay_text': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'+'", 'to': "orm['assessment.Rubric']"}),
'scheduled_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'student_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'uuid': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'unique': 'True', 'max_length': '36', 'blank': 'True'})
},
'assessment.aitrainingworkflow': {
'Meta': {'object_name': 'AITrainingWorkflow'},
'algorithm_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'classifier_set': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'related_name': "'+'", 'null': 'True', 'to': "orm['assessment.AIClassifierSet']"}),
'completed_at': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}),
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'scheduled_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'training_examples': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'+'", 'symmetrical': 'False', 'to': "orm['assessment.TrainingExample']"}),
'uuid': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'unique': 'True', 'max_length': '36', 'blank': 'True'})
},
'assessment.assessment': {
'Meta': {'ordering': "['-scored_at', '-id']", 'object_name': 'Assessment'},
'feedback': ('django.db.models.fields.TextField', [], {'default': "''", 'max_length': '10000', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.Rubric']"}),
'score_type': ('django.db.models.fields.CharField', [], {'max_length': '2'}),
'scored_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'scorer_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'})
},
'assessment.assessmentfeedback': {
'Meta': {'object_name': 'AssessmentFeedback'},
'assessments': ('django.db.models.fields.related.ManyToManyField', [], {'default': 'None', 'related_name': "'assessment_feedback'", 'symmetrical': 'False', 'to': "orm['assessment.Assessment']"}),
'feedback_text': ('django.db.models.fields.TextField', [], {'default': "''", 'max_length': '10000'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'options': ('django.db.models.fields.related.ManyToManyField', [], {'default': 'None', 'related_name': "'assessment_feedback'", 'symmetrical': 'False', 'to': "orm['assessment.AssessmentFeedbackOption']"}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'})
},
'assessment.assessmentfeedbackoption': {
'Meta': {'object_name': 'AssessmentFeedbackOption'},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'text': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'})
},
'assessment.assessmentpart': {
'Meta': {'object_name': 'AssessmentPart'},
'assessment': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'parts'", 'to': "orm['assessment.Assessment']"}),
'feedback': ('django.db.models.fields.TextField', [], {'default': "''", 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'option': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'+'", 'to': "orm['assessment.CriterionOption']"})
},
'assessment.criterion': {
'Meta': {'ordering': "['rubric', 'order_num']", 'object_name': 'Criterion'},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'order_num': ('django.db.models.fields.PositiveIntegerField', [], {}),
'prompt': ('django.db.models.fields.TextField', [], {'max_length': '10000'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'criteria'", 'to': "orm['assessment.Rubric']"})
},
'assessment.criterionoption': {
'Meta': {'ordering': "['criterion', 'order_num']", 'object_name': 'CriterionOption'},
'criterion': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'options'", 'to': "orm['assessment.Criterion']"}),
'explanation': ('django.db.models.fields.TextField', [], {'max_length': '10000', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'order_num': ('django.db.models.fields.PositiveIntegerField', [], {}),
'points': ('django.db.models.fields.PositiveIntegerField', [], {})
},
'assessment.peerworkflow': {
'Meta': {'ordering': "['created_at', 'id']", 'object_name': 'PeerWorkflow'},
'completed_at': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}),
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'grading_completed_at': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'student_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'})
},
'assessment.peerworkflowitem': {
'Meta': {'ordering': "['started_at', 'id']", 'object_name': 'PeerWorkflowItem'},
'assessment': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.Assessment']", 'null': 'True'}),
'author': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'graded_by'", 'to': "orm['assessment.PeerWorkflow']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'scored': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'scorer': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'graded'", 'to': "orm['assessment.PeerWorkflow']"}),
'started_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'})
},
'assessment.rubric': {
'Meta': {'object_name': 'Rubric'},
'content_hash': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'assessment.studenttrainingworkflow': {
'Meta': {'object_name': 'StudentTrainingWorkflow'},
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'student_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'})
},
'assessment.studenttrainingworkflowitem': {
'Meta': {'ordering': "['workflow', 'order_num']", 'unique_together': "(('workflow', 'order_num'),)", 'object_name': 'StudentTrainingWorkflowItem'},
'completed_at': ('django.db.models.fields.DateTimeField', [], {'default': 'None', 'null': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'order_num': ('django.db.models.fields.PositiveIntegerField', [], {}),
'started_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'training_example': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.TrainingExample']"}),
'workflow': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'items'", 'to': "orm['assessment.StudentTrainingWorkflow']"})
},
'assessment.trainingexample': {
'Meta': {'object_name': 'TrainingExample'},
'content_hash': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'options_selected': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['assessment.CriterionOption']", 'symmetrical': 'False'}),
'raw_answer': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.Rubric']"})
}
}
complete_apps = ['assessment']
\ No newline at end of file
......@@ -92,9 +92,16 @@ class AIClassifierSet(models.Model):
# The ID of the algorithm that was used to train classifiers in this set.
algorithm_id = models.CharField(max_length=128, db_index=True)
# Course Entity and Item Discriminator
# Though these items are duplicated in the database tables for the AITrainingWorkflow,
# this is okay because it will drastically speed up the operation of assigning classifiers
# to AIGradingWorkflows
course_id = models.CharField(max_length=40, db_index=True)
item_id = models.CharField(max_length=128, db_index=True)
@classmethod
@transaction.commit_on_success
def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id):
def create_classifier_set(cls, classifiers_dict, rubric, algorithm_id, course_id, item_id):
"""
Create a set of classifiers.
......@@ -103,6 +110,8 @@ class AIClassifierSet(models.Model):
JSON-serializable classifiers.
rubric (Rubric): The rubric model.
algorithm_id (unicode): The ID of the algorithm used to train the classifiers.
course_id (unicode): The ID of the course that the classifier is going to be grading
item_id (unicode): The item within the course that the classifier is trained to grade.
Returns:
AIClassifierSet
......@@ -114,7 +123,9 @@ class AIClassifierSet(models.Model):
"""
# Create the classifier set
classifier_set = cls.objects.create(rubric=rubric, algorithm_id=algorithm_id)
classifier_set = cls.objects.create(
rubric=rubric, algorithm_id=algorithm_id, item_id=item_id, course_id=course_id
)
# Retrieve the criteria for this rubric,
# then organize them by criterion name
......@@ -490,7 +501,7 @@ class AITrainingWorkflow(AIWorkflow):
DatabaseError
"""
self.classifier_set = AIClassifierSet.create_classifier_set(
classifier_set, self.rubric, self.algorithm_id
classifier_set, self.rubric, self.algorithm_id, self.course_id, self.item_id
)
self.mark_complete_and_save()
......@@ -533,6 +544,51 @@ class AIGradingWorkflow(AIWorkflow):
# this information here from the submissions models.
student_id = models.CharField(max_length=40, db_index=True)
@transaction.commit_on_success
def assign_most_recent_classifier_set(self):
"""
Finds the most relevant classifier set based on the following line of succession:
1 -- Classifier sets with the same COURSE, ITEM, RUBRIC and ALGORITHM
- Newest first. If none exist...
2 -- The newest classifier set with the same RUBRIC and ALGORITHM
- Newest first. If none exist...
3 -- Do no assignment and return False
Returns:
(bool) indicates whether or not classifiers were able to be assigned to the AIGradingWorkflow
Raises:
DatabaseError
"""
# Retrieve classifier set ideal candidates (Match on all fields)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id,
course_id=self.course_id, item_id=self.item_id
)[:1]
# If we find classifiers for this rubric/algorithm/course/item
# then associate the most recent classifiers with it and return true
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# Retrieve classifier set candidates (non-ideal, but good enough)
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=self.rubric, algorithm_id=self.algorithm_id
)[:1]
# If found, associate non-ideal classifier set with AIGradingWorkflow
if len(classifier_set_candidates) > 0:
self.classifier_set = classifier_set_candidates[0]
self.save()
return True
# If we get to this point, no classifiers exist with this rubric and algorithm.
return False
@classmethod
@transaction.commit_on_success
def start_workflow(cls, submission_uuid, rubric_dict, algorithm_id):
......@@ -582,19 +638,8 @@ class AIGradingWorkflow(AIWorkflow):
rubric=rubric
)
# Retrieve classifier set candidates
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=rubric, algorithm_id=algorithm_id
)[:1]
# If we find classifiers for this rubric/algorithm
# then associate the classifiers with the workflow
# and schedule a grading task.
# Otherwise, the task will need to be scheduled later,
# once the classifiers have been trained.
if len(classifier_set_candidates) > 0:
workflow.classifier_set = classifier_set_candidates[0]
workflow.save()
# Retrieve and assign classifier set candidates
workflow.assign_most_recent_classifier_set()
workflow._log_start_workflow()
......
......@@ -81,7 +81,7 @@ def train_classifiers(rubric_dict, classifier_score_overrides):
"""
rubric = rubric_from_dict(rubric_dict)
AIClassifierSet.create_classifier_set(
classifier_score_overrides, rubric, ALGORITHM_ID
classifier_score_overrides, rubric, ALGORITHM_ID, COURSE_ID, ITEM_ID
)
......
......@@ -19,6 +19,8 @@ class AIClassifierTest(CacheResetTest):
u"vøȼȺƀᵾłȺɍɏ": "test data",
u"ﻭɼค๓๓คɼ": "more test data"
}
COURSE_ID = u"†3߆ çøU®ß3"
ITEM_ID = u"fake_item_id"
def test_upload_to_path_default(self):
# No path prefix provided in the settings
......@@ -43,6 +45,6 @@ class AIClassifierTest(CacheResetTest):
"""
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS_DICT, rubric, "test_algorithm"
self.CLASSIFIERS_DICT, rubric, "test_algorithm", self.COURSE_ID, self.ITEM_ID
)
return AIClassifier.objects.filter(classifier_set=classifier_set)[0]
......@@ -215,7 +215,7 @@ class AIWorkerGradingTest(CacheResetTest):
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
CLASSIFIERS, rubric, ALGORITHM_ID
CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
......
......@@ -247,7 +247,7 @@ class AIGradingTaskTest(CeleryTaskTest):
# Associate the workflow with classifiers
rubric = rubric_from_dict(RUBRIC)
classifier_set = AIClassifierSet.create_classifier_set(
self.CLASSIFIERS, rubric, ALGORITHM_ID
self.CLASSIFIERS, rubric, ALGORITHM_ID, STUDENT_ITEM.get('course_id'), STUDENT_ITEM.get('item_id')
)
workflow.classifier_set = classifier_set
workflow.save()
......
......@@ -147,37 +147,34 @@ def reschedule_grading_tasks(course_id, item_id):
# are called in rapid succession. This is part of the reason this button is in the admin view.
# Tries to find a set of classifiers that are already defined in our maintained_classifiers based on a
# description of the workflow in the form of a tuple (rubric, algorithm_id)
workflow_description = (workflow.rubric, workflow.algorithm_id)
# description of the workflow in the form of a tuple (rubric, course_id, item_id, algorithm_id)
workflow_description = (workflow.rubric, course_id, item_id, workflow.algorithm_id)
found_classifiers = maintained_classifiers.get(workflow_description)
# If no set of classifiers is found, we perform the query to try to find them. We take the most recent
# and add it to our dictionary of maintained classifiers for future reference.
if found_classifiers is None:
try:
classifier_set_candidates = AIClassifierSet.objects.filter(
rubric=workflow.rubric, algorithm_id=workflow.algorithm_id
).order_by('-created_at')[:1]
found_classifiers = classifier_set_candidates[0]
maintained_classifiers[workflow_description] = found_classifiers
except IndexError:
msg = u"No classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
logger.log(msg)
found = workflow.assign_most_recent_classifier_set()
if found:
found_classifiers = workflow.classifier_set
maintained_classifiers[workflow_description] = found_classifiers
else:
msg = u"No applicable classifiers yet exist for essay with uuid='{}'".format(workflow.uuid)
logger.log(msg)
except DatabaseError as ex:
msg = (
u"A Database error occurred while trying to assign classifiers to an essay with uuid='{id}'"
).format(id=workflow.uuid)
logger.exception(msg)
if found_classifiers is not None:
# If we found classifiers in our memoized lookup dictionary, we assign them and save.
else:
workflow.classifier_set = found_classifiers
try:
workflow.save()
logger.info(
(
u"Classifiers were successfully assigned to grading workflow with uuid={}"
).format(workflow.uuid)
u"Classifiers were successfully assigned to grading workflow with uuid={}".format(workflow.uuid)
)
except DatabaseError as ex:
msg = (
......@@ -185,6 +182,8 @@ def reschedule_grading_tasks(course_id, item_id):
).format(id=workflow.uuid)
logger.exception(msg)
if found_classifiers is not None:
# Now we should (unless we had an exception above) have a classifier set.
# Try to schedule the grading
try:
......
......@@ -130,7 +130,7 @@ class Command(BaseCommand):
# Create the classifier set
classifier_set = AIClassifierSet.create_classifier_set(
classifier_data, rubric, algorithm_id
classifier_data, rubric, algorithm_id, course_id, item_id
)
print u"Successfully created classifier set with id {}".format(classifier_set.pk)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment