Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-ora2
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-ora2
Commits
4b6cff6c
Commit
4b6cff6c
authored
Mar 21, 2014
by
David Ormsbee
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #182 from edx/ormsbee/kill_queries
Reducing query count
parents
c1dbf128
428824c3
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
227 additions
and
75 deletions
+227
-75
apps/openassessment/assessment/models.py
+50
-8
apps/openassessment/assessment/peer_api.py
+42
-29
apps/openassessment/assessment/self_api.py
+16
-11
apps/openassessment/assessment/serializers.py
+105
-21
apps/submissions/api.py
+3
-6
requirements/dev.txt
+3
-0
settings/dev.py
+8
-0
No files found.
apps/openassessment/assessment/models.py
View file @
4b6cff6c
...
...
@@ -15,6 +15,7 @@ from copy import deepcopy
from
hashlib
import
sha1
import
json
from
django.core.cache
import
cache
from
django.db
import
models
from
django.utils.timezone
import
now
from
django.utils.translation
import
ugettext
as
_
...
...
@@ -105,17 +106,35 @@ class Rubric(models.Model):
InvalidOptionSelection: the selected options do not match the rubric.
"""
# Select all criteria and options for this rubric
# We use `select_related()` to minimize the number of database queries
rubric_options
=
CriterionOption
.
objects
.
filter
(
criterion__rubric
=
self
)
.
select_related
()
# Cache based on the content_hash, not the id. It's slightly safer, and
# we don't have to worry about invalidation of the cache while running
# tests.
rubric_criteria_dict_cache_key
=
(
"assessment.rubric_criteria_dict.{}"
.
format
(
self
.
content_hash
)
)
# Create a dict of dicts that maps:
# criterion names --> option names --> option ids
rubric_criteria_dict
=
defaultdict
(
dict
)
#
# If we've already generated one of these for this rubric, grab it from
# the cache instead of hitting the database again.
rubric_criteria_dict
=
cache
.
get
(
rubric_criteria_dict_cache_key
)
if
not
rubric_criteria_dict
:
rubric_criteria_dict
=
defaultdict
(
dict
)
# Select all criteria and options for this rubric
# We use `select_related()` to minimize the number of database queries
rubric_options
=
CriterionOption
.
objects
.
filter
(
criterion__rubric
=
self
)
.
select_related
()
# Construct dictionaries for each option in the rubric
for
option
in
rubric_options
:
rubric_criteria_dict
[
option
.
criterion
.
name
][
option
.
name
]
=
option
.
id
# Construct dictionaries for each option in the rubric
for
option
in
rubric_options
:
rubric_criteria_dict
[
option
.
criterion
.
name
][
option
.
name
]
=
option
.
id
# Save it in our cache
cache
.
set
(
rubric_criteria_dict_cache_key
,
rubric_criteria_dict
)
# Validate: are options selected for each criterion in the rubric?
if
len
(
options_selected
)
!=
len
(
rubric_criteria_dict
):
...
...
@@ -329,11 +348,26 @@ class Assessment(models.Model):
"bar": [6, 7, 8]
}
"""
assessments
=
list
(
assessments
)
# Force us to read it all
if
not
assessments
:
return
[]
# Generate a cache key that represents all the assessments we're being
# asked to grab scores from (comma separated list of assessment IDs)
cache_key
=
"assessments.scores_by_criterion.{}"
.
format
(
","
.
join
(
str
(
assessment
.
id
)
for
assessment
in
assessments
)
)
scores
=
cache
.
get
(
cache_key
)
if
scores
:
return
scores
scores
=
defaultdict
(
list
)
for
assessment
in
assessments
:
for
part
in
assessment
.
parts
.
all
():
for
part
in
assessment
.
parts
.
all
()
.
select_related
(
"option__criterion"
)
:
criterion_name
=
part
.
option
.
criterion
.
name
scores
[
criterion_name
]
.
append
(
part
.
option
.
points
)
cache
.
set
(
cache_key
,
scores
)
return
scores
...
...
@@ -362,6 +396,14 @@ class AssessmentPart(models.Model):
def
points_possible
(
self
):
return
self
.
option
.
criterion
.
points_possible
@classmethod
def
add_to_assessment
(
cls
,
assessment
,
option_ids
):
"""Creates AssessmentParts and adds them to `assessment`."""
cls
.
objects
.
bulk_create
([
cls
(
assessment
=
assessment
,
option_id
=
option_id
)
for
option_id
in
option_ids
])
class
AssessmentFeedback
(
models
.
Model
):
"""A response to a submission's feedback, judging accuracy or helpfulness."""
...
...
apps/openassessment/assessment/peer_api.py
View file @
4b6cff6c
...
...
@@ -14,12 +14,14 @@ from django.db import DatabaseError
from
django.db.models
import
Q
from
openassessment.assessment.models
import
(
Assessment
,
InvalidOptionSelection
,
PeerWorkflow
,
PeerWorkflowItem
,
AssessmentFeedback
Assessment
,
AssessmentFeedback
,
AssessmentPart
,
InvalidOptionSelection
,
PeerWorkflow
,
PeerWorkflowItem
,
)
from
openassessment.assessment.serializers
import
(
AssessmentSerializer
,
rubric_from_dict
,
AssessmentFeedbackSerializer
,
full_assessment_dict
)
AssessmentSerializer
,
AssessmentFeedbackSerializer
,
RubricSerializer
,
full_assessment_dict
,
rubric_from_dict
,
serialize_assessments
,
)
from
submissions
import
api
as
sub_api
from
submissions.api
import
get_submission_and_student
from
submissions.models
import
Submission
,
StudentItem
from
submissions.serializers
import
SubmissionSerializer
,
StudentItemSerializer
...
...
@@ -78,7 +80,7 @@ def is_complete(submission_uuid, requirements):
workflow
=
PeerWorkflow
.
objects
.
get
(
submission_uuid
=
submission_uuid
)
except
PeerWorkflow
.
DoesNotExist
:
return
False
return
_
check_student_done_grading
(
workflow
,
requirements
[
"must_grade"
])
return
_
num_peers_graded
(
workflow
)
>=
requirements
[
"must_grade"
]
def
get_score
(
submission_uuid
,
requirements
):
...
...
@@ -182,7 +184,6 @@ def create_assessment(
"submission_uuid"
:
submission
.
uuid
,
"score_type"
:
PEER_TYPE
,
"feedback"
:
feedback
,
"parts"
:
[{
"option"
:
option_id
}
for
option_id
in
option_ids
]
}
if
scored_at
is
not
None
:
...
...
@@ -192,8 +193,14 @@ def create_assessment(
if
not
peer_serializer
.
is_valid
():
raise
PeerAssessmentRequestError
(
peer_serializer
.
errors
)
assessment
=
peer_serializer
.
save
()
# We do this to do a run around django-rest-framework serializer
# validation, which would otherwise require two DB queries per
# option to do validation. We already validated these options above.
AssessmentPart
.
add_to_assessment
(
assessment
,
option_ids
)
student_item
=
submission
.
student_item
student_item_dict
=
StudentItemSerializer
(
student_item
)
.
data
...
...
@@ -223,7 +230,7 @@ def create_assessment(
# Close the active assessment
_close_active_assessment
(
scorer_workflow
,
submission_uuid
,
assessment
)
return
peer_serializer
.
data
return
full_assessment_dict
(
assessment
)
except
DatabaseError
:
error_message
=
_
(
u"An error occurred while creating assessment {} for submission: "
...
...
@@ -250,12 +257,20 @@ def get_rubric_max_scores(submission_uuid):
the submission, or its associated rubric.
"""
try
:
assessments
=
Assessment
.
objects
.
filter
(
submission_uuid
=
submission_uuid
)
.
order_by
(
"-scored_at"
,
"-id"
)
if
assessments
:
return
{
criterion
.
name
:
criterion
.
points_possible
for
criterion
in
assessments
[
0
]
.
rubric
.
criteria
.
all
()
}
assessments
=
list
(
Assessment
.
objects
.
filter
(
submission_uuid
=
submission_uuid
)
.
order_by
(
"-scored_at"
,
"-id"
)
.
select_related
(
"rubric"
)[:
1
]
)
if
not
assessments
:
return
None
assessment
=
assessments
[
0
]
rubric_dict
=
RubricSerializer
.
serialized_from_cache
(
assessment
.
rubric
)
return
{
criterion
[
"name"
]:
criterion
[
"points_possible"
]
for
criterion
in
rubric_dict
[
"criteria"
]
}
except
Submission
.
DoesNotExist
:
return
None
except
DatabaseError
:
...
...
@@ -341,11 +356,11 @@ def has_finished_required_evaluating(student_item_dict, required_assessments):
"""
workflow
=
_get_latest_workflow
(
student_item_dict
)
done
=
False
count
=
0
peers_graded
=
0
if
workflow
:
done
=
_check_student_done_grading
(
workflow
,
required_assessments
)
count
=
workflow
.
items
.
all
()
.
exclude
(
assessment
=-
1
)
.
count
(
)
return
done
,
count
peers_graded
=
_num_peers_graded
(
workflow
)
done
=
(
peers_graded
>=
required_assessments
)
return
done
,
peers_graded
def
get_assessments
(
submission_uuid
,
scored_only
=
True
,
limit
=
None
):
...
...
@@ -398,13 +413,13 @@ def get_assessments(submission_uuid, scored_only=True, limit=None):
if
scored_only
:
assessments
=
PeerWorkflowItem
.
get_scored_assessments
(
submission_uuid
)
)
[:
limit
]
else
:
assessments
=
Assessment
.
objects
.
filter
(
submission_uuid
=
submission_uuid
,
score_type
=
PEER_TYPE
)
return
[
full_assessment_dict
(
assessment
)
for
assessment
in
assessments
[:
limit
]]
)
[:
limit
]
return
serialize_assessments
(
assessments
)
except
DatabaseError
:
error_message
=
_
(
u"Error getting assessments for submission {}"
.
format
(
submission_uuid
)
...
...
@@ -486,10 +501,10 @@ def get_submission_to_assess(
submission_uuid
=
_get_submission_for_over_grading
(
workflow
)
if
submission_uuid
:
try
:
submission
=
Submission
.
objects
.
get
(
uuid
=
submission_uuid
)
submission
_data
=
sub_api
.
get_submission
(
submission_uuid
)
_create_peer_workflow_item
(
workflow
,
submission_uuid
)
return
SubmissionSerializer
(
submission
)
.
data
except
Submission
.
DoesNotExist
:
return
submission_
data
except
sub_api
.
Submission
DoesNotExist
:
error_message
=
_
(
u"Could not find a submission with the uuid {} for student {} "
u"in the peer workflow."
...
...
@@ -890,16 +905,14 @@ def _close_active_assessment(workflow, submission_uuid, assessment):
raise
PeerAssessmentWorkflowError
(
error_message
)
def
_
check_student_done_grading
(
workflow
,
must_grade
):
"""
Checks if the student has graded enough peers
.
def
_
num_peers_graded
(
workflow
):
"""
Returns the number of peers the student owning the workflow has graded
.
Determines if the student has graded enough peers.
Args:
workflow (PeerWorkflow): The workflow associated with the current
student.
must_grade (int): The number of submissions the student has to peer
assess before they are finished.
Returns:
True if the student is done peer assessing, False if not.
...
...
@@ -912,10 +925,10 @@ def _check_student_done_grading(workflow, must_grade):
>>> student_id="Bob",
>>> )
>>> workflow = _get_latest_workflow(student_item_dict)
>>> _
check_student_done_grading
(workflow, 3)
>>> _
num_peers_graded
(workflow, 3)
True
"""
return
workflow
.
items
.
all
()
.
exclude
(
assessment
=-
1
)
.
count
()
>=
must_grade
return
workflow
.
items
.
all
()
.
exclude
(
assessment
=-
1
)
.
count
()
def
get_assessment_feedback
(
submission_uuid
):
...
...
apps/openassessment/assessment/self_api.py
View file @
4b6cff6c
"""
Public interface for self-assessment.
"""
from
django.core.cache
import
cache
from
django.utils.translation
import
ugettext
as
_
from
submissions.api
import
(
get_submission_and_student
,
get_submission
,
SubmissionNotFoundError
,
SubmissionRequestError
)
from
openassessment.assessment.serializers
import
(
rubric_from_dict
,
AssessmentSerializer
,
full_assessment_dict
,
InvalidRubric
AssessmentSerializer
,
InvalidRubric
,
RubricSerializer
,
full_assessment_dict
,
rubric_from_dict
,
serialize_assessments
)
from
openassessment.assessment.models
import
(
Assessment
,
AssessmentPart
,
InvalidOptionSelection
)
from
openassessment.assessment.models
import
Assessment
,
InvalidOptionSelection
# Assessments are tagged as "self-evaluation"
...
...
@@ -74,7 +78,6 @@ def create_assessment(submission_uuid, user_id, options_selected, rubric_dict, s
"submission_uuid"
:
submission_uuid
,
"score_type"
:
SELF_TYPE
,
"feedback"
:
u""
,
"parts"
:
[{
"option"
:
option_id
}
for
option_id
in
option_ids
],
}
if
scored_at
is
not
None
:
...
...
@@ -86,10 +89,15 @@ def create_assessment(submission_uuid, user_id, options_selected, rubric_dict, s
msg
=
_
(
"Could not create self assessment: {errors}"
)
.
format
(
errors
=
serializer
.
errors
)
raise
SelfAssessmentRequestError
(
msg
)
serializer
.
save
()
assessment
=
serializer
.
save
()
# We do this to do a run around django-rest-framework serializer
# validation, which would otherwise require two DB queries per
# option to do validation. We already validated these options above.
AssessmentPart
.
add_to_assessment
(
assessment
,
option_ids
)
# Return the serialized assessment
return
serializer
.
data
return
full_assessment_dict
(
assessment
)
def
get_assessment
(
submission_uuid
):
...
...
@@ -112,14 +120,11 @@ def get_assessment(submission_uuid):
# but not at the database level. Someone could take advantage of the race condition
# between checking the number of self-assessments and creating a new self-assessment.
# To be safe, we retrieve just the most recent submission.
assessments
=
Assessment
.
objects
.
filter
(
serialized_assessments
=
serialize_assessments
(
Assessment
.
objects
.
filter
(
score_type
=
SELF_TYPE
,
submission_uuid
=
submission_uuid
)
.
order_by
(
'-scored_at'
)
)
.
order_by
(
'-scored_at'
)
[:
1
])
if
assessments
.
exists
():
assessment_dict
=
full_assessment_dict
(
assessments
[
0
])
return
assessment_dict
return
None
return
serialized_assessments
[
0
]
if
serialized_assessments
else
None
def
is_complete
(
submission_uuid
):
...
...
apps/openassessment/assessment/serializers.py
View file @
4b6cff6c
...
...
@@ -4,7 +4,9 @@ Serializers are created to ensure models do not have to be accessed outside the
scope of the Tim APIs.
"""
from
copy
import
deepcopy
import
logging
from
django.core.cache
import
cache
from
django.utils.translation
import
ugettext
as
_
from
rest_framework
import
serializers
from
openassessment.assessment.models
import
(
...
...
@@ -12,6 +14,9 @@ from openassessment.assessment.models import (
PeerWorkflowItem
,
PeerWorkflow
)
logger
=
logging
.
getLogger
(
__name__
)
class
InvalidRubric
(
Exception
):
"""This can be raised during the deserialization process."""
def
__init__
(
self
,
errors
):
...
...
@@ -66,10 +71,11 @@ class CriterionOptionSerializer(NestedModelSerializer):
class
CriterionSerializer
(
NestedModelSerializer
):
"""Serializer for :class:`Criterion`"""
options
=
CriterionOptionSerializer
(
required
=
True
,
many
=
True
)
points_possible
=
serializers
.
Field
(
source
=
'points_possible'
)
class
Meta
:
model
=
Criterion
fields
=
(
'order_num'
,
'name'
,
'prompt'
,
'options'
)
fields
=
(
'order_num'
,
'name'
,
'prompt'
,
'options'
,
'points_possible'
)
def
validate_options
(
self
,
attrs
,
source
):
"""Make sure we have at least one CriterionOption in a Criterion."""
...
...
@@ -97,6 +103,49 @@ class RubricSerializer(NestedModelSerializer):
raise
serializers
.
ValidationError
(
"Must have at least one criterion"
)
return
attrs
@classmethod
def
serialized_from_cache
(
cls
,
rubric
,
local_cache
=
None
):
"""For a given `Rubric` model object, return a serialized version.
This method will attempt to use the cache if possible, first looking at
the `local_cache` dict you can pass in, and then looking at whatever
Django cache is configured.
Args:
rubric (Rubric): The Rubric model to get the serialized form of.
local_cach (dict): Mapping of `rubric.content_hash` to serialized
rubric dictionary. We include this so that we can call this
method in a loop.
Returns:
dict: `Rubric` fields as a dictionary, with `criteria` and `options`
relations followed.
"""
# Optional local cache you can send in (for when you're calling this
# in a loop).
local_cache
=
local_cache
or
{}
# Check our in-memory cache...
if
rubric
.
content_hash
in
local_cache
:
return
local_cache
[
rubric
.
content_hash
]
# Check the external cache (e.g. memcached)
rubric_dict_cache_key
=
(
"RubricSerializer.serialized_from_cache.{}"
.
format
(
rubric
.
content_hash
)
)
rubric_dict
=
cache
.
get
(
rubric_dict_cache_key
)
if
rubric_dict
:
local_cache
[
rubric
.
content_hash
]
=
rubric_dict
return
rubric_dict
# Grab it from the database
rubric_dict
=
RubricSerializer
(
rubric
)
.
data
cache
.
set
(
rubric_dict_cache_key
,
rubric_dict
)
local_cache
[
rubric
.
content_hash
]
=
rubric_dict
return
rubric_dict
class
AssessmentPartSerializer
(
serializers
.
ModelSerializer
):
"""Serializer for :class:`AssessmentPart`."""
...
...
@@ -107,11 +156,7 @@ class AssessmentPartSerializer(serializers.ModelSerializer):
class
AssessmentSerializer
(
serializers
.
ModelSerializer
):
"""Serializer for :class:`Assessment`."""
parts
=
AssessmentPartSerializer
(
required
=
True
,
many
=
True
)
points_earned
=
serializers
.
Field
(
source
=
'points_earned'
)
points_possible
=
serializers
.
Field
(
source
=
'points_possible'
)
"""Simplified serializer for :class:`Assessment` that's lighter on the DB."""
class
Meta
:
model
=
Assessment
...
...
@@ -122,20 +167,32 @@ class AssessmentSerializer(serializers.ModelSerializer):
'scorer_id'
,
'score_type'
,
'feedback'
,
)
# Foreign Key
'parts'
,
def
serialize_assessments
(
assessments_qset
):
assessments
=
list
(
assessments_qset
.
select_related
(
"rubric"
))
rubric_cache
=
{}
# Computed, not part of the model
'points_earned'
,
'points_possible'
,
return
[
full_assessment_dict
(
assessment
,
RubricSerializer
.
serialized_from_cache
(
assessment
.
rubric
,
rubric_cache
)
)
for
assessment
in
assessments
]
def
full_assessment_dict
(
assessment
):
def
full_assessment_dict
(
assessment
,
rubric_dict
=
None
):
"""
Return a dict representation of the Assessment model,
including nested assessment parts.
Return a dict representation of the Assessment model, including nested
assessment parts. We do some of the serialization ourselves here instead
of relying on the Django REST Framework serializers. This is for performance
reasons -- we have a cached rubric easily available, and we don't want to
follow all the DB relations from assessment -> assessment part -> option ->
criterion.
Args:
assessment (Assessment): The Assessment model to serialize
...
...
@@ -143,18 +200,45 @@ def full_assessment_dict(assessment):
Returns:
dict with keys 'rubric' (serialized Rubric model) and 'parts' (serialized assessment parts)
"""
assessment_cache_key
=
"assessment.full_assessment_dict.{}.{}.{}"
.
format
(
assessment
.
id
,
assessment
.
submission_uuid
,
assessment
.
scored_at
.
isoformat
()
)
assessment_dict
=
cache
.
get
(
assessment_cache_key
)
if
assessment_dict
:
return
assessment_dict
assessment_dict
=
AssessmentSerializer
(
assessment
)
.
data
rubric_dict
=
RubricSerializer
(
assessment
.
rubric
)
.
data
if
not
rubric_dict
:
rubric_dict
=
RubricSerializer
.
serialized_from_cache
(
assessment
.
rubric
)
assessment_dict
[
"rubric"
]
=
rubric_dict
# This part looks a little goofy, but it's in the name of saving dozens of
# SQL lookups. The rubric_dict has the entire serialized output of the
# `Rubric`, its child `Criterion` and grandchild `CriterionOption`. This
# includes calculated things like `points_possible` which aren't actually in
# the DB model. Instead of invoking the serializers for `Criterion` and
# `CriterionOption` again, we simply index into the places we expect them to
# be from the big, saved `Rubric` serialization.
parts
=
[]
for
part
in
assessment
.
parts
.
all
():
part_dict
=
AssessmentPartSerializer
(
part
)
.
data
options_dict
=
CriterionOptionSerializer
(
part
.
option
)
.
data
criterion_dict
=
CriterionSerializer
(
part
.
option
.
criterion
)
.
data
for
part
in
assessment
.
parts
.
all
()
.
select_related
(
"option__criterion"
):
criterion_dict
=
rubric_dict
[
"criteria"
][
part
.
option
.
criterion
.
order_num
]
options_dict
=
criterion_dict
[
"options"
][
part
.
option
.
order_num
]
options_dict
[
"criterion"
]
=
criterion_dict
part_dict
[
"option"
]
=
options_dict
parts
.
append
(
part_dict
)
parts
.
append
({
"option"
:
options_dict
})
# Now manually built up the dynamically calculated values on the
# `Assessment` so we can again avoid DB calls.
assessment_dict
[
"parts"
]
=
parts
assessment_dict
[
"points_earned"
]
=
sum
(
part_dict
[
"option"
][
"points"
]
for
part_dict
in
parts
)
assessment_dict
[
"points_possible"
]
=
rubric_dict
[
"points_possible"
]
cache
.
set
(
assessment_cache_key
,
assessment_dict
)
return
assessment_dict
...
...
apps/submissions/api.py
View file @
4b6cff6c
...
...
@@ -381,14 +381,11 @@ def get_scores(course_id, student_id):
def
get_latest_score_for_submission
(
submission_uuid
):
try
:
submission
=
Submission
.
objects
.
get
(
uuid
=
submission_uuid
)
score
=
Score
.
objects
.
filter
(
submission
=
submission
)
.
order_by
(
"-id"
)[
0
]
score
=
Score
.
objects
.
filter
(
submission__uuid
=
submission_uuid
)
.
order_by
(
"-id"
)
.
select_related
(
"submission"
)[
0
]
except
IndexError
:
return
None
except
Submission
.
DoesNotExist
:
raise
SubmissionNotFoundError
(
u"No submission matching uuid {}"
.
format
(
submission_uuid
)
)
return
ScoreSerializer
(
score
)
.
data
...
...
requirements/dev.txt
View file @
4b6cff6c
...
...
@@ -18,3 +18,6 @@ sphinxcontrib-napoleon==0.2.3
# runserver_plus
Werkzeug==0.9.4
# caching
python-memcached==1.53
settings/dev.py
View file @
4b6cff6c
...
...
@@ -21,3 +21,11 @@ MIDDLEWARE_CLASSES += (
DEBUG_TOOLBAR_PATCH_SETTINGS
=
False
INTERNAL_IPS
=
(
'127.0.0.1'
,)
CACHES
=
{
'default'
:
{
'BACKEND'
:
'django.core.cache.backends.memcached.MemcachedCache'
,
'LOCATION'
:
'127.0.0.1:11211'
,
'TIMEOUT'
:
60
*
60
*
8
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment