Commit 4b6cff6c by David Ormsbee

Merge pull request #182 from edx/ormsbee/kill_queries

Reducing query count
parents c1dbf128 428824c3
......@@ -15,6 +15,7 @@ from copy import deepcopy
from hashlib import sha1
import json
from django.core.cache import cache
from django.db import models
from django.utils.timezone import now
from django.utils.translation import ugettext as _
......@@ -105,17 +106,35 @@ class Rubric(models.Model):
InvalidOptionSelection: the selected options do not match the rubric.
"""
# Select all criteria and options for this rubric
# We use `select_related()` to minimize the number of database queries
rubric_options = CriterionOption.objects.filter(criterion__rubric=self).select_related()
# Cache based on the content_hash, not the id. It's slightly safer, and
# we don't have to worry about invalidation of the cache while running
# tests.
rubric_criteria_dict_cache_key = (
"assessment.rubric_criteria_dict.{}".format(self.content_hash)
)
# Create a dict of dicts that maps:
# criterion names --> option names --> option ids
rubric_criteria_dict = defaultdict(dict)
#
# If we've already generated one of these for this rubric, grab it from
# the cache instead of hitting the database again.
rubric_criteria_dict = cache.get(rubric_criteria_dict_cache_key)
if not rubric_criteria_dict:
rubric_criteria_dict = defaultdict(dict)
# Select all criteria and options for this rubric
# We use `select_related()` to minimize the number of database queries
rubric_options = CriterionOption.objects.filter(
criterion__rubric=self
).select_related()
# Construct dictionaries for each option in the rubric
for option in rubric_options:
rubric_criteria_dict[option.criterion.name][option.name] = option.id
# Construct dictionaries for each option in the rubric
for option in rubric_options:
rubric_criteria_dict[option.criterion.name][option.name] = option.id
# Save it in our cache
cache.set(rubric_criteria_dict_cache_key, rubric_criteria_dict)
# Validate: are options selected for each criterion in the rubric?
if len(options_selected) != len(rubric_criteria_dict):
......@@ -329,11 +348,26 @@ class Assessment(models.Model):
"bar": [6, 7, 8]
}
"""
assessments = list(assessments) # Force us to read it all
if not assessments:
return []
# Generate a cache key that represents all the assessments we're being
# asked to grab scores from (comma separated list of assessment IDs)
cache_key = "assessments.scores_by_criterion.{}".format(
",".join(str(assessment.id) for assessment in assessments)
)
scores = cache.get(cache_key)
if scores:
return scores
scores = defaultdict(list)
for assessment in assessments:
for part in assessment.parts.all():
for part in assessment.parts.all().select_related("option__criterion"):
criterion_name = part.option.criterion.name
scores[criterion_name].append(part.option.points)
cache.set(cache_key, scores)
return scores
......@@ -362,6 +396,14 @@ class AssessmentPart(models.Model):
def points_possible(self):
return self.option.criterion.points_possible
@classmethod
def add_to_assessment(cls, assessment, option_ids):
"""Creates AssessmentParts and adds them to `assessment`."""
cls.objects.bulk_create([
cls(assessment=assessment, option_id=option_id)
for option_id in option_ids
])
class AssessmentFeedback(models.Model):
"""A response to a submission's feedback, judging accuracy or helpfulness."""
......
......@@ -14,12 +14,14 @@ from django.db import DatabaseError
from django.db.models import Q
from openassessment.assessment.models import (
Assessment, InvalidOptionSelection, PeerWorkflow, PeerWorkflowItem,
AssessmentFeedback
Assessment, AssessmentFeedback, AssessmentPart,
InvalidOptionSelection, PeerWorkflow, PeerWorkflowItem,
)
from openassessment.assessment.serializers import (
AssessmentSerializer, rubric_from_dict, AssessmentFeedbackSerializer,
full_assessment_dict)
AssessmentSerializer, AssessmentFeedbackSerializer, RubricSerializer,
full_assessment_dict, rubric_from_dict, serialize_assessments,
)
from submissions import api as sub_api
from submissions.api import get_submission_and_student
from submissions.models import Submission, StudentItem
from submissions.serializers import SubmissionSerializer, StudentItemSerializer
......@@ -78,7 +80,7 @@ def is_complete(submission_uuid, requirements):
workflow = PeerWorkflow.objects.get(submission_uuid=submission_uuid)
except PeerWorkflow.DoesNotExist:
return False
return _check_student_done_grading(workflow, requirements["must_grade"])
return _num_peers_graded(workflow) >= requirements["must_grade"]
def get_score(submission_uuid, requirements):
......@@ -182,7 +184,6 @@ def create_assessment(
"submission_uuid": submission.uuid,
"score_type": PEER_TYPE,
"feedback": feedback,
"parts": [{"option": option_id} for option_id in option_ids]
}
if scored_at is not None:
......@@ -192,8 +193,14 @@ def create_assessment(
if not peer_serializer.is_valid():
raise PeerAssessmentRequestError(peer_serializer.errors)
assessment = peer_serializer.save()
# We do this to do a run around django-rest-framework serializer
# validation, which would otherwise require two DB queries per
# option to do validation. We already validated these options above.
AssessmentPart.add_to_assessment(assessment, option_ids)
student_item = submission.student_item
student_item_dict = StudentItemSerializer(student_item).data
......@@ -223,7 +230,7 @@ def create_assessment(
# Close the active assessment
_close_active_assessment(scorer_workflow, submission_uuid, assessment)
return peer_serializer.data
return full_assessment_dict(assessment)
except DatabaseError:
error_message = _(
u"An error occurred while creating assessment {} for submission: "
......@@ -250,12 +257,20 @@ def get_rubric_max_scores(submission_uuid):
the submission, or its associated rubric.
"""
try:
assessments = Assessment.objects.filter(submission_uuid=submission_uuid).order_by( "-scored_at", "-id")
if assessments:
return {
criterion.name: criterion.points_possible
for criterion in assessments[0].rubric.criteria.all()
}
assessments = list(
Assessment.objects.filter(
submission_uuid=submission_uuid
).order_by( "-scored_at", "-id").select_related("rubric")[:1]
)
if not assessments:
return None
assessment = assessments[0]
rubric_dict = RubricSerializer.serialized_from_cache(assessment.rubric)
return {
criterion["name"]: criterion["points_possible"]
for criterion in rubric_dict["criteria"]
}
except Submission.DoesNotExist:
return None
except DatabaseError:
......@@ -341,11 +356,11 @@ def has_finished_required_evaluating(student_item_dict, required_assessments):
"""
workflow = _get_latest_workflow(student_item_dict)
done = False
count = 0
peers_graded = 0
if workflow:
done = _check_student_done_grading(workflow, required_assessments)
count = workflow.items.all().exclude(assessment=-1).count()
return done, count
peers_graded = _num_peers_graded(workflow)
done = (peers_graded >= required_assessments)
return done, peers_graded
def get_assessments(submission_uuid, scored_only=True, limit=None):
......@@ -398,13 +413,13 @@ def get_assessments(submission_uuid, scored_only=True, limit=None):
if scored_only:
assessments = PeerWorkflowItem.get_scored_assessments(
submission_uuid
)
)[:limit]
else:
assessments = Assessment.objects.filter(
submission_uuid=submission_uuid,
score_type=PEER_TYPE
)
return [full_assessment_dict(assessment) for assessment in assessments[:limit]]
)[:limit]
return serialize_assessments(assessments)
except DatabaseError:
error_message = _(
u"Error getting assessments for submission {}".format(submission_uuid)
......@@ -486,10 +501,10 @@ def get_submission_to_assess(
submission_uuid = _get_submission_for_over_grading(workflow)
if submission_uuid:
try:
submission = Submission.objects.get(uuid=submission_uuid)
submission_data = sub_api.get_submission(submission_uuid)
_create_peer_workflow_item(workflow, submission_uuid)
return SubmissionSerializer(submission).data
except Submission.DoesNotExist:
return submission_data
except sub_api.SubmissionDoesNotExist:
error_message = _(
u"Could not find a submission with the uuid {} for student {} "
u"in the peer workflow."
......@@ -890,16 +905,14 @@ def _close_active_assessment(workflow, submission_uuid, assessment):
raise PeerAssessmentWorkflowError(error_message)
def _check_student_done_grading(workflow, must_grade):
"""Checks if the student has graded enough peers.
def _num_peers_graded(workflow):
"""Returns the number of peers the student owning the workflow has graded.
Determines if the student has graded enough peers.
Args:
workflow (PeerWorkflow): The workflow associated with the current
student.
must_grade (int): The number of submissions the student has to peer
assess before they are finished.
Returns:
True if the student is done peer assessing, False if not.
......@@ -912,10 +925,10 @@ def _check_student_done_grading(workflow, must_grade):
>>> student_id="Bob",
>>> )
>>> workflow = _get_latest_workflow(student_item_dict)
>>> _check_student_done_grading(workflow, 3)
>>> _num_peers_graded(workflow, 3)
True
"""
return workflow.items.all().exclude(assessment=-1).count() >= must_grade
return workflow.items.all().exclude(assessment=-1).count()
def get_assessment_feedback(submission_uuid):
......
"""
Public interface for self-assessment.
"""
from django.core.cache import cache
from django.utils.translation import ugettext as _
from submissions.api import (
get_submission_and_student, get_submission,
SubmissionNotFoundError, SubmissionRequestError
)
from openassessment.assessment.serializers import (
rubric_from_dict, AssessmentSerializer, full_assessment_dict, InvalidRubric
AssessmentSerializer, InvalidRubric, RubricSerializer,
full_assessment_dict, rubric_from_dict, serialize_assessments
)
from openassessment.assessment.models import (
Assessment, AssessmentPart, InvalidOptionSelection
)
from openassessment.assessment.models import Assessment, InvalidOptionSelection
# Assessments are tagged as "self-evaluation"
......@@ -74,7 +78,6 @@ def create_assessment(submission_uuid, user_id, options_selected, rubric_dict, s
"submission_uuid": submission_uuid,
"score_type": SELF_TYPE,
"feedback": u"",
"parts": [{"option": option_id} for option_id in option_ids],
}
if scored_at is not None:
......@@ -86,10 +89,15 @@ def create_assessment(submission_uuid, user_id, options_selected, rubric_dict, s
msg = _("Could not create self assessment: {errors}").format(errors=serializer.errors)
raise SelfAssessmentRequestError(msg)
serializer.save()
assessment = serializer.save()
# We do this to do a run around django-rest-framework serializer
# validation, which would otherwise require two DB queries per
# option to do validation. We already validated these options above.
AssessmentPart.add_to_assessment(assessment, option_ids)
# Return the serialized assessment
return serializer.data
return full_assessment_dict(assessment)
def get_assessment(submission_uuid):
......@@ -112,14 +120,11 @@ def get_assessment(submission_uuid):
# but not at the database level. Someone could take advantage of the race condition
# between checking the number of self-assessments and creating a new self-assessment.
# To be safe, we retrieve just the most recent submission.
assessments = Assessment.objects.filter(
serialized_assessments = serialize_assessments(Assessment.objects.filter(
score_type=SELF_TYPE, submission_uuid=submission_uuid
).order_by('-scored_at')
).order_by('-scored_at')[:1])
if assessments.exists():
assessment_dict = full_assessment_dict(assessments[0])
return assessment_dict
return None
return serialized_assessments[0] if serialized_assessments else None
def is_complete(submission_uuid):
......
......@@ -4,7 +4,9 @@ Serializers are created to ensure models do not have to be accessed outside the
scope of the Tim APIs.
"""
from copy import deepcopy
import logging
from django.core.cache import cache
from django.utils.translation import ugettext as _
from rest_framework import serializers
from openassessment.assessment.models import (
......@@ -12,6 +14,9 @@ from openassessment.assessment.models import (
PeerWorkflowItem, PeerWorkflow)
logger = logging.getLogger(__name__)
class InvalidRubric(Exception):
"""This can be raised during the deserialization process."""
def __init__(self, errors):
......@@ -66,10 +71,11 @@ class CriterionOptionSerializer(NestedModelSerializer):
class CriterionSerializer(NestedModelSerializer):
"""Serializer for :class:`Criterion`"""
options = CriterionOptionSerializer(required=True, many=True)
points_possible = serializers.Field(source='points_possible')
class Meta:
model = Criterion
fields = ('order_num', 'name', 'prompt', 'options')
fields = ('order_num', 'name', 'prompt', 'options', 'points_possible')
def validate_options(self, attrs, source):
"""Make sure we have at least one CriterionOption in a Criterion."""
......@@ -97,6 +103,49 @@ class RubricSerializer(NestedModelSerializer):
raise serializers.ValidationError("Must have at least one criterion")
return attrs
@classmethod
def serialized_from_cache(cls, rubric, local_cache=None):
"""For a given `Rubric` model object, return a serialized version.
This method will attempt to use the cache if possible, first looking at
the `local_cache` dict you can pass in, and then looking at whatever
Django cache is configured.
Args:
rubric (Rubric): The Rubric model to get the serialized form of.
local_cach (dict): Mapping of `rubric.content_hash` to serialized
rubric dictionary. We include this so that we can call this
method in a loop.
Returns:
dict: `Rubric` fields as a dictionary, with `criteria` and `options`
relations followed.
"""
# Optional local cache you can send in (for when you're calling this
# in a loop).
local_cache = local_cache or {}
# Check our in-memory cache...
if rubric.content_hash in local_cache:
return local_cache[rubric.content_hash]
# Check the external cache (e.g. memcached)
rubric_dict_cache_key = (
"RubricSerializer.serialized_from_cache.{}"
.format(rubric.content_hash)
)
rubric_dict = cache.get(rubric_dict_cache_key)
if rubric_dict:
local_cache[rubric.content_hash] = rubric_dict
return rubric_dict
# Grab it from the database
rubric_dict = RubricSerializer(rubric).data
cache.set(rubric_dict_cache_key, rubric_dict)
local_cache[rubric.content_hash] = rubric_dict
return rubric_dict
class AssessmentPartSerializer(serializers.ModelSerializer):
"""Serializer for :class:`AssessmentPart`."""
......@@ -107,11 +156,7 @@ class AssessmentPartSerializer(serializers.ModelSerializer):
class AssessmentSerializer(serializers.ModelSerializer):
"""Serializer for :class:`Assessment`."""
parts = AssessmentPartSerializer(required=True, many=True)
points_earned = serializers.Field(source='points_earned')
points_possible = serializers.Field(source='points_possible')
"""Simplified serializer for :class:`Assessment` that's lighter on the DB."""
class Meta:
model = Assessment
......@@ -122,20 +167,32 @@ class AssessmentSerializer(serializers.ModelSerializer):
'scorer_id',
'score_type',
'feedback',
)
# Foreign Key
'parts',
def serialize_assessments(assessments_qset):
assessments = list(assessments_qset.select_related("rubric"))
rubric_cache = {}
# Computed, not part of the model
'points_earned',
'points_possible',
return [
full_assessment_dict(
assessment,
RubricSerializer.serialized_from_cache(
assessment.rubric, rubric_cache
)
)
for assessment in assessments
]
def full_assessment_dict(assessment):
def full_assessment_dict(assessment, rubric_dict=None):
"""
Return a dict representation of the Assessment model,
including nested assessment parts.
Return a dict representation of the Assessment model, including nested
assessment parts. We do some of the serialization ourselves here instead
of relying on the Django REST Framework serializers. This is for performance
reasons -- we have a cached rubric easily available, and we don't want to
follow all the DB relations from assessment -> assessment part -> option ->
criterion.
Args:
assessment (Assessment): The Assessment model to serialize
......@@ -143,18 +200,45 @@ def full_assessment_dict(assessment):
Returns:
dict with keys 'rubric' (serialized Rubric model) and 'parts' (serialized assessment parts)
"""
assessment_cache_key = "assessment.full_assessment_dict.{}.{}.{}".format(
assessment.id, assessment.submission_uuid, assessment.scored_at.isoformat()
)
assessment_dict = cache.get(assessment_cache_key)
if assessment_dict:
return assessment_dict
assessment_dict = AssessmentSerializer(assessment).data
rubric_dict = RubricSerializer(assessment.rubric).data
if not rubric_dict:
rubric_dict = RubricSerializer.serialized_from_cache(assessment.rubric)
assessment_dict["rubric"] = rubric_dict
# This part looks a little goofy, but it's in the name of saving dozens of
# SQL lookups. The rubric_dict has the entire serialized output of the
# `Rubric`, its child `Criterion` and grandchild `CriterionOption`. This
# includes calculated things like `points_possible` which aren't actually in
# the DB model. Instead of invoking the serializers for `Criterion` and
# `CriterionOption` again, we simply index into the places we expect them to
# be from the big, saved `Rubric` serialization.
parts = []
for part in assessment.parts.all():
part_dict = AssessmentPartSerializer(part).data
options_dict = CriterionOptionSerializer(part.option).data
criterion_dict = CriterionSerializer(part.option.criterion).data
for part in assessment.parts.all().select_related("option__criterion"):
criterion_dict = rubric_dict["criteria"][part.option.criterion.order_num]
options_dict = criterion_dict["options"][part.option.order_num]
options_dict["criterion"] = criterion_dict
part_dict["option"] = options_dict
parts.append(part_dict)
parts.append({
"option": options_dict
})
# Now manually built up the dynamically calculated values on the
# `Assessment` so we can again avoid DB calls.
assessment_dict["parts"] = parts
assessment_dict["points_earned"] = sum(
part_dict["option"]["points"] for part_dict in parts
)
assessment_dict["points_possible"] = rubric_dict["points_possible"]
cache.set(assessment_cache_key, assessment_dict)
return assessment_dict
......
......@@ -381,14 +381,11 @@ def get_scores(course_id, student_id):
def get_latest_score_for_submission(submission_uuid):
try:
submission = Submission.objects.get(uuid=submission_uuid)
score = Score.objects.filter(submission=submission).order_by("-id")[0]
score = Score.objects.filter(
submission__uuid=submission_uuid
).order_by("-id").select_related("submission")[0]
except IndexError:
return None
except Submission.DoesNotExist:
raise SubmissionNotFoundError(
u"No submission matching uuid {}".format(submission_uuid)
)
return ScoreSerializer(score).data
......
......@@ -18,3 +18,6 @@ sphinxcontrib-napoleon==0.2.3
# runserver_plus
Werkzeug==0.9.4
# caching
python-memcached==1.53
......@@ -21,3 +21,11 @@ MIDDLEWARE_CLASSES += (
DEBUG_TOOLBAR_PATCH_SETTINGS = False
INTERNAL_IPS = ('127.0.0.1',)
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
'LOCATION': '127.0.0.1:11211',
'TIMEOUT': 60 * 60 * 8
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment