Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-submissions
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-submissions
Commits
51bb105c
Commit
51bb105c
authored
Aug 04, 2014
by
Will Daly
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8 from edx/will/optimize-top-scores
Use read replica and cache for top scores
parents
01e67a25
b058adcf
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
163 additions
and
43 deletions
+163
-43
submissions/api.py
+53
-18
submissions/tests/test_api.py
+78
-25
submissions/tests/test_read_replica.py
+32
-0
No files found.
submissions/api.py
View file @
51bb105c
...
...
@@ -366,13 +366,17 @@ def get_submissions(student_item_dict, limit=None):
return
SubmissionSerializer
(
submission_models
,
many
=
True
)
.
data
def
get_top_submissions
(
course_id
,
item_id
,
item_type
,
number_of_top_scores
):
def
get_top_submissions
(
course_id
,
item_id
,
item_type
,
number_of_top_scores
,
use_cache
=
True
,
read_replica
=
True
):
"""Get a number of top scores for an assessment based on a particular student item
This function will return top scores for the piece of assessment.
A score is only calculated for a student item if it has completed the workflow for
a particular assessment module.
In general, users of top submissions can tolerate some latency
in the search results, so by default this call uses
a cache and the read replica (if available).
Args:
course_id (str): The course to retrieve for the top scores
item_id (str): The item within the course to retrieve for the top scores
...
...
@@ -380,6 +384,11 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores):
number_of_top_scores (int): The number of scores to return, greater than 0 and no
more than 100.
Kwargs:
use_cache (bool): If true, check the cache before retrieving querying the database.
read_replica (bool): If true, attempt to use the read replica database.
If no read replica is available, use the default database.
Returns:
topscores (dict): The top scores for the assessment for the student item.
An empty array if there are no scores.
...
...
@@ -412,23 +421,49 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores):
)
logger
.
exception
(
error_msg
)
raise
SubmissionRequestError
(
msg
=
error_msg
)
try
:
scores
=
Score
.
objects
.
filter
(
student_item__course_id
=
course_id
,
student_item__item_id
=
item_id
,
student_item__item_type
=
item_type
,
)
.
select_related
(
"submission"
)
.
order_by
(
"-points_earned"
)[:
number_of_top_scores
]
except
DatabaseError
:
msg
=
u"Could not fetch top scores for course {}, item {} of type {}"
.
format
(
course_id
,
item_id
,
item_type
)
logger
.
exception
(
msg
)
raise
SubmissionInternalError
(
msg
)
topsubmissions
=
[]
for
score
in
scores
:
answer
=
SubmissionSerializer
(
score
.
submission
)
.
data
[
'answer'
]
topsubmissions
.
append
({
'score'
:
score
.
points_earned
,
'content'
:
answer
})
return
topsubmissions
# First check the cache (unless caching is disabled)
cache_key
=
"submissions.top_submissions.{course}.{item}.{type}.{number}"
.
format
(
course
=
course_id
,
item
=
item_id
,
type
=
item_type
,
number
=
number_of_top_scores
)
top_submissions
=
cache
.
get
(
cache_key
)
if
use_cache
else
None
# If we can't find it in the cache (or caching is disabled), check the database
# By default, prefer the read-replica.
if
top_submissions
is
None
:
try
:
query
=
Score
.
objects
.
filter
(
student_item__course_id
=
course_id
,
student_item__item_id
=
item_id
,
student_item__item_type
=
item_type
,
)
.
select_related
(
"submission"
)
.
order_by
(
"-points_earned"
)
if
read_replica
:
query
=
_use_read_replica
(
query
)
scores
=
query
[:
number_of_top_scores
]
except
DatabaseError
:
msg
=
u"Could not fetch top scores for course {}, item {} of type {}"
.
format
(
course_id
,
item_id
,
item_type
)
logger
.
exception
(
msg
)
raise
SubmissionInternalError
(
msg
)
# Retrieve the submission content for each top score
top_submissions
=
[
{
"score"
:
score
.
points_earned
,
"content"
:
SubmissionSerializer
(
score
.
submission
)
.
data
[
'answer'
]
}
for
score
in
scores
]
# Always store the retrieved list in the cache
cache
.
set
(
cache_key
,
top_submissions
)
return
top_submissions
def
get_score
(
student_item
):
...
...
submissions/tests/test_api.py
View file @
51bb105c
...
...
@@ -284,9 +284,7 @@ class TestSubmissionsApi(TestCase):
def
test_get_top_submissions
(
self
):
student_item
=
copy
.
deepcopy
(
STUDENT_ITEM
)
student_item
[
"course_id"
]
=
"get_scores_course"
student_item
[
"item_id"
]
=
"i4x://a/b/c/s1"
student_1
=
api
.
create_submission
(
student_item
,
"Hello World"
)
...
...
@@ -297,9 +295,15 @@ class TestSubmissionsApi(TestCase):
api
.
set_score
(
student_2
[
'uuid'
],
4
,
10
)
api
.
set_score
(
student_3
[
'uuid'
],
2
,
10
)
#Get top scores works correctly
#
Get top scores works correctly
with
self
.
assertNumQueries
(
1
):
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
3
)
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
3
,
use_cache
=
False
,
read_replica
=
False
,
)
self
.
assertEqual
(
top_scores
,
[
...
...
@@ -318,8 +322,14 @@ class TestSubmissionsApi(TestCase):
]
)
#Fewer top scores available than the number requested.
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
10
)
# Fewer top scores available than the number requested.
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
10
,
use_cache
=
False
,
read_replica
=
False
)
self
.
assertEqual
(
top_scores
,
[
...
...
@@ -338,8 +348,14 @@ class TestSubmissionsApi(TestCase):
]
)
#More top scores available than the number requested.
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
2
)
# More top scores available than the number requested.
top_scores
=
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
2
,
use_cache
=
False
,
read_replica
=
False
)
self
.
assertEqual
(
top_scores
,
[
...
...
@@ -354,35 +370,76 @@ class TestSubmissionsApi(TestCase):
]
)
def
test_get_top_submissions_from_cache
(
self
):
student_1
=
api
.
create_submission
(
STUDENT_ITEM
,
"Hello World"
)
student_2
=
api
.
create_submission
(
STUDENT_ITEM
,
"Hello World"
)
student_3
=
api
.
create_submission
(
STUDENT_ITEM
,
"Hello World"
)
api
.
set_score
(
student_1
[
'uuid'
],
8
,
10
)
api
.
set_score
(
student_2
[
'uuid'
],
4
,
10
)
api
.
set_score
(
student_3
[
'uuid'
],
2
,
10
)
# The first call should hit the database
with
self
.
assertNumQueries
(
1
):
scores
=
api
.
get_top_submissions
(
STUDENT_ITEM
[
"course_id"
],
STUDENT_ITEM
[
"item_id"
],
STUDENT_ITEM
[
"item_type"
],
2
,
use_cache
=
True
,
read_replica
=
False
)
self
.
assertEqual
(
scores
,
[
{
"content"
:
"Hello World"
,
"score"
:
8
},
{
"content"
:
"Hello World"
,
"score"
:
4
},
])
# The second call should use the cache
with
self
.
assertNumQueries
(
0
):
cached_scores
=
api
.
get_top_submissions
(
STUDENT_ITEM
[
"course_id"
],
STUDENT_ITEM
[
"item_id"
],
STUDENT_ITEM
[
"item_type"
],
2
,
use_cache
=
True
,
read_replica
=
False
)
self
.
assertEqual
(
cached_scores
,
scores
)
@raises
(
api
.
SubmissionRequestError
)
def
test_error_on_get_top_submissions_too_few
(
self
):
student_item
=
copy
.
deepcopy
(
STUDENT_ITEM
)
student_item
[
"course_id"
]
=
"get_scores_course"
student_item
[
"item_id"
]
=
"i4x://a/b/c/s1"
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
-
1
)
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
0
,
read_replica
=
False
)
@raises
(
api
.
SubmissionRequestError
)
def
test_error_on_get_top_submissions_too_many
(
self
):
student_item
=
copy
.
deepcopy
(
STUDENT_ITEM
)
student_item
[
"course_id"
]
=
"get_scores_course"
student_item
[
"item_id"
]
=
"i4x://a/b/c/s1"
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
api
.
MAX_TOP_SUBMISSIONS
+
1
)
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
api
.
MAX_TOP_SUBMISSIONS
+
1
,
read_replica
=
False
)
@patch.object
(
Score
.
objects
,
'filter'
)
@raises
(
api
.
SubmissionInternalError
)
def
test_error_on_get_top_submissions_db_error
(
self
,
mock_filter
):
mock_filter
.
side_effect
=
DatabaseError
(
"Bad things happened"
)
student_item
=
copy
.
deepcopy
(
STUDENT_ITEM
)
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
1
)
api
.
get_top_submissions
(
student_item
[
"course_id"
],
student_item
[
"item_id"
],
"Peer_Submission"
,
1
,
read_replica
=
False
)
@patch.object
(
ScoreSummary
.
objects
,
'filter'
)
@raises
(
api
.
SubmissionInternalError
)
...
...
@@ -390,11 +447,7 @@ class TestSubmissionsApi(TestCase):
mock_filter
.
side_effect
=
DatabaseError
(
"Bad things happened"
)
api
.
get_scores
(
"some_course"
,
"some_student"
)
def
_assert_score
(
self
,
score
,
expected_points_earned
,
expected_points_possible
):
def
_assert_score
(
self
,
score
,
expected_points_earned
,
expected_points_possible
):
self
.
assertIsNotNone
(
score
)
self
.
assertEqual
(
score
[
"points_earned"
],
expected_points_earned
)
self
.
assertEqual
(
score
[
"points_possible"
],
expected_points_possible
)
submissions/tests/test_read_replica.py
View file @
51bb105c
...
...
@@ -40,3 +40,34 @@ class ReadReplicaTest(TransactionTestCase):
retrieved
=
sub_api
.
get_latest_score_for_submission
(
self
.
submission
[
'uuid'
],
read_replica
=
True
)
self
.
assertEqual
(
retrieved
[
'points_possible'
],
self
.
SCORE
[
'points_possible'
])
self
.
assertEqual
(
retrieved
[
'points_earned'
],
self
.
SCORE
[
'points_earned'
])
def
test_get_top_submissions
(
self
):
student_1
=
sub_api
.
create_submission
(
self
.
STUDENT_ITEM
,
"Hello World"
)
student_2
=
sub_api
.
create_submission
(
self
.
STUDENT_ITEM
,
"Hello World"
)
student_3
=
sub_api
.
create_submission
(
self
.
STUDENT_ITEM
,
"Hello World"
)
sub_api
.
set_score
(
student_1
[
'uuid'
],
8
,
10
)
sub_api
.
set_score
(
student_2
[
'uuid'
],
4
,
10
)
sub_api
.
set_score
(
student_3
[
'uuid'
],
2
,
10
)
# Use the read-replica
with
self
.
assertNumQueries
(
0
):
top_scores
=
sub_api
.
get_top_submissions
(
self
.
STUDENT_ITEM
[
'course_id'
],
self
.
STUDENT_ITEM
[
'item_id'
],
self
.
STUDENT_ITEM
[
'item_type'
],
2
,
read_replica
=
True
)
self
.
assertEqual
(
top_scores
,
[
{
'content'
:
"Hello World"
,
'score'
:
8
},
{
'content'
:
"Hello World"
,
'score'
:
4
},
]
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment