Commit 51bb105c by Will Daly

Merge pull request #8 from edx/will/optimize-top-scores

Use read replica and cache for top scores
parents 01e67a25 b058adcf
...@@ -366,13 +366,17 @@ def get_submissions(student_item_dict, limit=None): ...@@ -366,13 +366,17 @@ def get_submissions(student_item_dict, limit=None):
return SubmissionSerializer(submission_models, many=True).data return SubmissionSerializer(submission_models, many=True).data
def get_top_submissions(course_id, item_id, item_type, number_of_top_scores): def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True):
"""Get a number of top scores for an assessment based on a particular student item """Get a number of top scores for an assessment based on a particular student item
This function will return top scores for the piece of assessment. This function will return top scores for the piece of assessment.
A score is only calculated for a student item if it has completed the workflow for A score is only calculated for a student item if it has completed the workflow for
a particular assessment module. a particular assessment module.
In general, users of top submissions can tolerate some latency
in the search results, so by default this call uses
a cache and the read replica (if available).
Args: Args:
course_id (str): The course to retrieve for the top scores course_id (str): The course to retrieve for the top scores
item_id (str): The item within the course to retrieve for the top scores item_id (str): The item within the course to retrieve for the top scores
...@@ -380,6 +384,11 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores): ...@@ -380,6 +384,11 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores):
number_of_top_scores (int): The number of scores to return, greater than 0 and no number_of_top_scores (int): The number of scores to return, greater than 0 and no
more than 100. more than 100.
Kwargs:
use_cache (bool): If true, check the cache before retrieving querying the database.
read_replica (bool): If true, attempt to use the read replica database.
If no read replica is available, use the default database.
Returns: Returns:
topscores (dict): The top scores for the assessment for the student item. topscores (dict): The top scores for the assessment for the student item.
An empty array if there are no scores. An empty array if there are no scores.
...@@ -412,23 +421,49 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores): ...@@ -412,23 +421,49 @@ def get_top_submissions(course_id, item_id, item_type, number_of_top_scores):
) )
logger.exception(error_msg) logger.exception(error_msg)
raise SubmissionRequestError(msg=error_msg) raise SubmissionRequestError(msg=error_msg)
try:
scores = Score.objects.filter( # First check the cache (unless caching is disabled)
student_item__course_id=course_id, cache_key = "submissions.top_submissions.{course}.{item}.{type}.{number}".format(
student_item__item_id=item_id, course=course_id,
student_item__item_type=item_type, item=item_id,
).select_related("submission").order_by("-points_earned")[:number_of_top_scores] type=item_type,
except DatabaseError: number=number_of_top_scores
msg = u"Could not fetch top scores for course {}, item {} of type {}".format( )
course_id, item_id, item_type top_submissions = cache.get(cache_key) if use_cache else None
)
logger.exception(msg) # If we can't find it in the cache (or caching is disabled), check the database
raise SubmissionInternalError(msg) # By default, prefer the read-replica.
topsubmissions = [] if top_submissions is None:
for score in scores: try:
answer = SubmissionSerializer(score.submission).data['answer'] query = Score.objects.filter(
topsubmissions.append({'score': score.points_earned, 'content': answer}) student_item__course_id=course_id,
return topsubmissions student_item__item_id=item_id,
student_item__item_type=item_type,
).select_related("submission").order_by("-points_earned")
if read_replica:
query = _use_read_replica(query)
scores = query[:number_of_top_scores]
except DatabaseError:
msg = u"Could not fetch top scores for course {}, item {} of type {}".format(
course_id, item_id, item_type
)
logger.exception(msg)
raise SubmissionInternalError(msg)
# Retrieve the submission content for each top score
top_submissions = [
{
"score": score.points_earned,
"content": SubmissionSerializer(score.submission).data['answer']
}
for score in scores
]
# Always store the retrieved list in the cache
cache.set(cache_key, top_submissions)
return top_submissions
def get_score(student_item): def get_score(student_item):
......
...@@ -284,9 +284,7 @@ class TestSubmissionsApi(TestCase): ...@@ -284,9 +284,7 @@ class TestSubmissionsApi(TestCase):
def test_get_top_submissions(self): def test_get_top_submissions(self):
student_item = copy.deepcopy(STUDENT_ITEM) student_item = copy.deepcopy(STUDENT_ITEM)
student_item["course_id"] = "get_scores_course" student_item["course_id"] = "get_scores_course"
student_item["item_id"] = "i4x://a/b/c/s1" student_item["item_id"] = "i4x://a/b/c/s1"
student_1 = api.create_submission(student_item, "Hello World") student_1 = api.create_submission(student_item, "Hello World")
...@@ -297,9 +295,15 @@ class TestSubmissionsApi(TestCase): ...@@ -297,9 +295,15 @@ class TestSubmissionsApi(TestCase):
api.set_score(student_2['uuid'], 4, 10) api.set_score(student_2['uuid'], 4, 10)
api.set_score(student_3['uuid'], 2, 10) api.set_score(student_3['uuid'], 2, 10)
#Get top scores works correctly # Get top scores works correctly
with self.assertNumQueries(1): with self.assertNumQueries(1):
top_scores = api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", 3) top_scores = api.get_top_submissions(
student_item["course_id"],
student_item["item_id"],
"Peer_Submission", 3,
use_cache=False,
read_replica=False,
)
self.assertEqual( self.assertEqual(
top_scores, top_scores,
[ [
...@@ -318,8 +322,14 @@ class TestSubmissionsApi(TestCase): ...@@ -318,8 +322,14 @@ class TestSubmissionsApi(TestCase):
] ]
) )
#Fewer top scores available than the number requested. # Fewer top scores available than the number requested.
top_scores = api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", 10) top_scores = api.get_top_submissions(
student_item["course_id"],
student_item["item_id"],
"Peer_Submission", 10,
use_cache=False,
read_replica=False
)
self.assertEqual( self.assertEqual(
top_scores, top_scores,
[ [
...@@ -338,8 +348,14 @@ class TestSubmissionsApi(TestCase): ...@@ -338,8 +348,14 @@ class TestSubmissionsApi(TestCase):
] ]
) )
#More top scores available than the number requested. # More top scores available than the number requested.
top_scores = api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", 2) top_scores = api.get_top_submissions(
student_item["course_id"],
student_item["item_id"],
"Peer_Submission", 2,
use_cache=False,
read_replica=False
)
self.assertEqual( self.assertEqual(
top_scores, top_scores,
[ [
...@@ -354,35 +370,76 @@ class TestSubmissionsApi(TestCase): ...@@ -354,35 +370,76 @@ class TestSubmissionsApi(TestCase):
] ]
) )
def test_get_top_submissions_from_cache(self):
student_1 = api.create_submission(STUDENT_ITEM, "Hello World")
student_2 = api.create_submission(STUDENT_ITEM, "Hello World")
student_3 = api.create_submission(STUDENT_ITEM, "Hello World")
api.set_score(student_1['uuid'], 8, 10)
api.set_score(student_2['uuid'], 4, 10)
api.set_score(student_3['uuid'], 2, 10)
# The first call should hit the database
with self.assertNumQueries(1):
scores = api.get_top_submissions(
STUDENT_ITEM["course_id"],
STUDENT_ITEM["item_id"],
STUDENT_ITEM["item_type"], 2,
use_cache=True,
read_replica=False
)
self.assertEqual(scores, [
{ "content": "Hello World", "score": 8 },
{ "content": "Hello World", "score": 4 },
])
# The second call should use the cache
with self.assertNumQueries(0):
cached_scores = api.get_top_submissions(
STUDENT_ITEM["course_id"],
STUDENT_ITEM["item_id"],
STUDENT_ITEM["item_type"], 2,
use_cache=True,
read_replica=False
)
self.assertEqual(cached_scores, scores)
@raises(api.SubmissionRequestError) @raises(api.SubmissionRequestError)
def test_error_on_get_top_submissions_too_few(self): def test_error_on_get_top_submissions_too_few(self):
student_item = copy.deepcopy(STUDENT_ITEM) student_item = copy.deepcopy(STUDENT_ITEM)
student_item["course_id"] = "get_scores_course" student_item["course_id"] = "get_scores_course"
student_item["item_id"] = "i4x://a/b/c/s1" student_item["item_id"] = "i4x://a/b/c/s1"
api.get_top_submissions(
api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", -1) student_item["course_id"],
student_item["item_id"],
"Peer_Submission", 0,
read_replica=False
)
@raises(api.SubmissionRequestError) @raises(api.SubmissionRequestError)
def test_error_on_get_top_submissions_too_many(self): def test_error_on_get_top_submissions_too_many(self):
student_item = copy.deepcopy(STUDENT_ITEM) student_item = copy.deepcopy(STUDENT_ITEM)
student_item["course_id"] = "get_scores_course" student_item["course_id"] = "get_scores_course"
student_item["item_id"] = "i4x://a/b/c/s1" student_item["item_id"] = "i4x://a/b/c/s1"
api.get_top_submissions(
api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", api.MAX_TOP_SUBMISSIONS+1) student_item["course_id"],
student_item["item_id"],
"Peer_Submission",
api.MAX_TOP_SUBMISSIONS + 1,
read_replica=False
)
@patch.object(Score.objects, 'filter') @patch.object(Score.objects, 'filter')
@raises(api.SubmissionInternalError) @raises(api.SubmissionInternalError)
def test_error_on_get_top_submissions_db_error(self, mock_filter): def test_error_on_get_top_submissions_db_error(self, mock_filter):
mock_filter.side_effect = DatabaseError("Bad things happened") mock_filter.side_effect = DatabaseError("Bad things happened")
student_item = copy.deepcopy(STUDENT_ITEM) student_item = copy.deepcopy(STUDENT_ITEM)
api.get_top_submissions(student_item["course_id"], student_item["item_id"], "Peer_Submission", 1) api.get_top_submissions(
student_item["course_id"],
student_item["item_id"],
"Peer_Submission", 1,
read_replica=False
)
@patch.object(ScoreSummary.objects, 'filter') @patch.object(ScoreSummary.objects, 'filter')
@raises(api.SubmissionInternalError) @raises(api.SubmissionInternalError)
...@@ -390,11 +447,7 @@ class TestSubmissionsApi(TestCase): ...@@ -390,11 +447,7 @@ class TestSubmissionsApi(TestCase):
mock_filter.side_effect = DatabaseError("Bad things happened") mock_filter.side_effect = DatabaseError("Bad things happened")
api.get_scores("some_course", "some_student") api.get_scores("some_course", "some_student")
def _assert_score( def _assert_score(self, score, expected_points_earned, expected_points_possible):
self,
score,
expected_points_earned,
expected_points_possible):
self.assertIsNotNone(score) self.assertIsNotNone(score)
self.assertEqual(score["points_earned"], expected_points_earned) self.assertEqual(score["points_earned"], expected_points_earned)
self.assertEqual(score["points_possible"], expected_points_possible) self.assertEqual(score["points_possible"], expected_points_possible)
...@@ -40,3 +40,34 @@ class ReadReplicaTest(TransactionTestCase): ...@@ -40,3 +40,34 @@ class ReadReplicaTest(TransactionTestCase):
retrieved = sub_api.get_latest_score_for_submission(self.submission['uuid'], read_replica=True) retrieved = sub_api.get_latest_score_for_submission(self.submission['uuid'], read_replica=True)
self.assertEqual(retrieved['points_possible'], self.SCORE['points_possible']) self.assertEqual(retrieved['points_possible'], self.SCORE['points_possible'])
self.assertEqual(retrieved['points_earned'], self.SCORE['points_earned']) self.assertEqual(retrieved['points_earned'], self.SCORE['points_earned'])
def test_get_top_submissions(self):
student_1 = sub_api.create_submission(self.STUDENT_ITEM, "Hello World")
student_2 = sub_api.create_submission(self.STUDENT_ITEM, "Hello World")
student_3 = sub_api.create_submission(self.STUDENT_ITEM, "Hello World")
sub_api.set_score(student_1['uuid'], 8, 10)
sub_api.set_score(student_2['uuid'], 4, 10)
sub_api.set_score(student_3['uuid'], 2, 10)
# Use the read-replica
with self.assertNumQueries(0):
top_scores = sub_api.get_top_submissions(
self.STUDENT_ITEM['course_id'],
self.STUDENT_ITEM['item_id'],
self.STUDENT_ITEM['item_type'], 2,
read_replica=True
)
self.assertEqual(
top_scores,
[
{
'content': "Hello World",
'score': 8
},
{
'content': "Hello World",
'score': 4
},
]
)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment