Commit cdf68aca by Stephen Sanchez

Merge pull request #208 from edx/sanchez/peer_query_optimization

WIP: Updating the query to be simpler
parents 1f029cf8 d0cc6fb6
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'PeerWorkflow.graded_count'
db.add_column('assessment_peerworkflow', 'graded_count',
self.gf('django.db.models.fields.PositiveIntegerField')(default=0, db_index=True),
keep_default=False)
if not db.dry_run:
for workflow in orm.PeerWorkflow.objects.all():
graded_by = workflow.graded_by.all().order_by('id')
if graded_by:
workflow.graded_count = workflow.graded_by.filter(assessment__null=False).count()
workflow.save()
def backwards(self, orm):
# Deleting field 'PeerWorkflow.graded_count'
db.delete_column('assessment_peerworkflow', 'graded_count')
models = {
'assessment.assessment': {
'Meta': {'ordering': "['-scored_at', '-id']", 'object_name': 'Assessment'},
'feedback': ('django.db.models.fields.TextField', [], {'default': "''", 'max_length': '10000', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.Rubric']"}),
'score_type': ('django.db.models.fields.CharField', [], {'max_length': '2'}),
'scored_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'scorer_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'})
},
'assessment.assessmentfeedback': {
'Meta': {'object_name': 'AssessmentFeedback'},
'assessments': ('django.db.models.fields.related.ManyToManyField', [], {'default': 'None', 'related_name': "'assessment_feedback'", 'symmetrical': 'False', 'to': "orm['assessment.Assessment']"}),
'feedback_text': ('django.db.models.fields.TextField', [], {'default': "''", 'max_length': '10000'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'options': ('django.db.models.fields.related.ManyToManyField', [], {'default': 'None', 'related_name': "'assessment_feedback'", 'symmetrical': 'False', 'to': "orm['assessment.AssessmentFeedbackOption']"}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'})
},
'assessment.assessmentfeedbackoption': {
'Meta': {'object_name': 'AssessmentFeedbackOption'},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'text': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'})
},
'assessment.assessmentpart': {
'Meta': {'object_name': 'AssessmentPart'},
'assessment': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'parts'", 'to': "orm['assessment.Assessment']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'option': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.CriterionOption']"})
},
'assessment.criterion': {
'Meta': {'ordering': "['rubric', 'order_num']", 'object_name': 'Criterion'},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'order_num': ('django.db.models.fields.PositiveIntegerField', [], {}),
'prompt': ('django.db.models.fields.TextField', [], {'max_length': '10000'}),
'rubric': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'criteria'", 'to': "orm['assessment.Rubric']"})
},
'assessment.criterionoption': {
'Meta': {'ordering': "['criterion', 'order_num']", 'object_name': 'CriterionOption'},
'criterion': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'options'", 'to': "orm['assessment.Criterion']"}),
'explanation': ('django.db.models.fields.TextField', [], {'max_length': '10000', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'order_num': ('django.db.models.fields.PositiveIntegerField', [], {}),
'points': ('django.db.models.fields.PositiveIntegerField', [], {})
},
'assessment.peerworkflow': {
'Meta': {'ordering': "['created_at', 'id']", 'object_name': 'PeerWorkflow'},
'completed_at': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'db_index': 'True'}),
'course_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'created_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'graded_count': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'item_id': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
'student_id': ('django.db.models.fields.CharField', [], {'max_length': '40', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'db_index': 'True'})
},
'assessment.peerworkflowitem': {
'Meta': {'ordering': "['started_at', 'id']", 'object_name': 'PeerWorkflowItem'},
'assessment': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['assessment.Assessment']", 'null': 'True'}),
'author': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'graded_by'", 'to': "orm['assessment.PeerWorkflow']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'scored': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'scorer': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'graded'", 'to': "orm['assessment.PeerWorkflow']"}),
'started_at': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'submission_uuid': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'})
},
'assessment.rubric': {
'Meta': {'object_name': 'Rubric'},
'content_hash': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
}
}
complete_apps = ['assessment']
\ No newline at end of file
...@@ -484,6 +484,7 @@ class PeerWorkflow(models.Model): ...@@ -484,6 +484,7 @@ class PeerWorkflow(models.Model):
submission_uuid = models.CharField(max_length=128, db_index=True, unique=True) submission_uuid = models.CharField(max_length=128, db_index=True, unique=True)
created_at = models.DateTimeField(default=now, db_index=True) created_at = models.DateTimeField(default=now, db_index=True)
completed_at = models.DateTimeField(null=True, db_index=True) completed_at = models.DateTimeField(null=True, db_index=True)
graded_count = models.PositiveIntegerField(default=0, db_index=True)
class Meta: class Meta:
ordering = ["created_at", "id"] ordering = ["created_at", "id"]
......
...@@ -759,19 +759,43 @@ def _get_submission_for_review(workflow, graded_by, over_grading=False): ...@@ -759,19 +759,43 @@ def _get_submission_for_review(workflow, graded_by, over_grading=False):
"1" "1"
""" """
order = " having count(pwi.id) < %s order by pw.created_at, pw.id "
timeout = (timezone.now() - TIME_LIMIT).strftime("%Y-%m-%d %H:%M:%S") timeout = (timezone.now() - TIME_LIMIT).strftime("%Y-%m-%d %H:%M:%S")
sub = _get_next_submission( try:
order, peer_workflows = list(PeerWorkflow.objects.raw(
workflow, "select pw.id, pw.submission_uuid "
workflow.item_id, "from assessment_peerworkflow pw "
workflow.course_id, "left join assessment_peerworkflowitem pwi "
workflow.student_id, "on pw.id=pwi.author_id "
workflow.id, "where pw.item_id=%s "
timeout, "and pw.course_id=%s "
graded_by "and pw.student_id<>%s "
) "and pw.graded_count < %s "
return sub "and pw.id not in (select pwi.author_id from assessment_peerworkflowitem pwi where pwi.scorer_id=%s) "
"and (pwi.scorer_id is NULL or pwi.assessment_id is not NULL or pwi.started_at > %s) "
"group by pw.id "
"having count(pwi.id) < %s "
"limit 1; ",
[
workflow.item_id,
workflow.course_id,
workflow.student_id,
graded_by,
workflow.id,
timeout,
graded_by
]
))
if not peer_workflows:
return None
return peer_workflows[0].submission_uuid
except DatabaseError:
error_message = _(
u"An internal error occurred while retrieving a peer submission "
u"for student {}".format(workflow)
)
logger.exception(error_message)
raise PeerAssessmentInternalError(error_message)
def _get_submission_for_over_grading(workflow): def _get_submission_for_over_grading(workflow):
...@@ -789,17 +813,37 @@ def _get_submission_for_over_grading(workflow): ...@@ -789,17 +813,37 @@ def _get_submission_for_over_grading(workflow):
6) Returns the workflow with the fewest assessments. 6) Returns the workflow with the fewest assessments.
""" """
order = " order by c, pw.created_at, pw.id " try:
timeout = (timezone.now() - TIME_LIMIT).strftime("%Y-%m-%d %H:%M:%S") peer_workflows = list(PeerWorkflow.objects.raw(
return _get_next_submission( "select pw.id, pw.submission_uuid "
order, "from assessment_peerworkflow pw "
workflow, "left join assessment_peerworkflowitem pwi "
workflow.item_id, "on pw.id=pwi.author_id "
workflow.course_id, "where pw.item_id=%s "
workflow.student_id, "and pw.course_id=%s "
workflow.id, "and pw.student_id<>%s "
timeout "and pw.id not in (select pwi.author_id from assessment_peerworkflowitem pwi where pwi.scorer_id=%s) "
) "group by pw.id "
"order by count(pwi.id), pw.created_at, pw.id "
"limit 1; ",
[
workflow.item_id,
workflow.course_id,
workflow.student_id,
workflow.id
]
))
if not peer_workflows:
return None
return peer_workflows[0].submission_uuid
except DatabaseError:
error_message = _(
u"An internal error occurred while retrieving a peer submission "
u"for student {}".format(workflow)
)
logger.exception(error_message)
raise PeerAssessmentInternalError(error_message)
def _get_next_submission(order, workflow, *args): def _get_next_submission(order, workflow, *args):
...@@ -812,17 +856,15 @@ def _get_next_submission(order, workflow, *args): ...@@ -812,17 +856,15 @@ def _get_next_submission(order, workflow, *args):
For example, for a general peer assessment query, the following would be For example, for a general peer assessment query, the following would be
the generated SQL query: the generated SQL query:
select pw.id, pw.submission_uuid , pw.student_id, count(pwi.id) as c select pw.id, pw.submission_uuid , pw.student_id
from assessment_peerworkflow pw from assessment_peerworkflow pw
left join assessment_peerworkflowitem pwi
on pw.id=pwi.author_id
where pw.completed_at is NULL where pw.completed_at is NULL
and pw.item_id='item_one' and pw.item_id='item_one'
and pw.course_id='Demo_Course' and pw.course_id='Demo_Course'
and pw.student_id<>'Tim' and pw.student_id<>'Tim'
and pw.id not in (select pwi.author_id from assessment_peerworkflowitem pwi where pwi.scorer_id=3159) and pw.id not in (select pwi.author_id from assessment_peerworkflowitem pwi where pwi.scorer_id=3159)
and (pwi.scorer_id is NULL or pwi.assessment_id is not NULL or pwi.started_at > '2014-03-04 20:09:04') and (select count(pwi.id) from assessment_peerworkflowitem pwi where pwi.scorer_id=3159)
group by pw.id having count(pwi.id) < 3 and (pw.started_at is NULL or pw.graded_count < 3 or (pw.graded_count = 3 and pw.started_at < '2014-03-14 20:09:04'))
order by pw.created_at, pw.id order by pw.created_at, pw.id
limit 1; limit 1;
...@@ -838,36 +880,7 @@ def _get_next_submission(order, workflow, *args): ...@@ -838,36 +880,7 @@ def _get_next_submission(order, workflow, *args):
A submission uuid for the submission that should be peer assessed. A submission uuid for the submission that should be peer assessed.
""" """
try:
raw_query = (
"select pw.id, pw.submission_uuid, count(pwi.id) as c "
"from assessment_peerworkflow pw "
"left join assessment_peerworkflowitem pwi "
"on pw.id=pwi.author_id "
"where pw.completed_at is NULL "
"and pw.item_id=%s "
"and pw.course_id=%s "
"and pw.student_id<>%s "
"and pw.id not in (select pwi.author_id from assessment_peerworkflowitem pwi where pwi.scorer_id=%s) "
"and (pwi.scorer_id is NULL or pwi.assessment_id is not NULL or pwi.started_at > %s) "
"group by pw.id "
"{} "
"limit 1; "
)
query = raw_query.format(order)
peer_workflows = list(PeerWorkflow.objects.raw(query, args))
if not peer_workflows:
return None
return peer_workflows[0].submission_uuid
except DatabaseError:
error_message = _(
u"An internal error occurred while retrieving a peer submission "
u"for student {}".format(workflow)
)
logger.exception(error_message)
raise PeerAssessmentInternalError(error_message)
def _close_active_assessment(workflow, submission_uuid, assessment): def _close_active_assessment(workflow, submission_uuid, assessment):
...@@ -897,6 +910,7 @@ def _close_active_assessment(workflow, submission_uuid, assessment): ...@@ -897,6 +910,7 @@ def _close_active_assessment(workflow, submission_uuid, assessment):
try: try:
item = workflow.graded.get(submission_uuid=submission_uuid) item = workflow.graded.get(submission_uuid=submission_uuid)
item.assessment = assessment item.assessment = assessment
item.scorer.graded_count += 1
item.save() item.save()
except (DatabaseError, PeerWorkflowItem.DoesNotExist): except (DatabaseError, PeerWorkflowItem.DoesNotExist):
error_message = _( error_message = _(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment