Commit b32d34f7 by Sven Marnach

Refactor get_all_submissions implementation and add a test.

parent 29a96a4b
...@@ -3,10 +3,11 @@ Public interface for the submissions app. ...@@ -3,10 +3,11 @@ Public interface for the submissions app.
""" """
import copy import copy
import itertools
import logging import logging
import operator
import json import json
from collections import namedtuple
from django.conf import settings from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.db import IntegrityError, DatabaseError from django.db import IntegrityError, DatabaseError
...@@ -90,15 +91,6 @@ class SubmissionRequestError(SubmissionError): ...@@ -90,15 +91,6 @@ class SubmissionRequestError(SubmissionError):
) )
# For API stability and low memory usage, large result sets are returned using namedtuples.
# They take up only as much memory as tuples, but fields are accessible by name,
# so we can change the fields in the future if needed
LatestSubmission = namedtuple(
"LatestSubmission",
("student_id", "attempt_number", "submitted_at", "created_at", "answer"),
)
def create_submission(student_item_dict, answer, submitted_at=None, attempt_number=None): def create_submission(student_item_dict, answer, submitted_at=None, attempt_number=None):
"""Creates a submission for assessment. """Creates a submission for assessment.
...@@ -384,37 +376,44 @@ def get_submissions(student_item_dict, limit=None): ...@@ -384,37 +376,44 @@ def get_submissions(student_item_dict, limit=None):
def get_all_submissions(course_id, item_id, item_type, read_replica=True): def get_all_submissions(course_id, item_id, item_type, read_replica=True):
""" """For the given item, get the most recent submission for every student who has submitted.
For the given item, get the most recent submission for every student who has submitted.
This may return a very large result set! It is implemented as a generator for efficiency. This may return a very large result set! It is implemented as a generator for efficiency.
It yields namedtuples with the following fields:
Args:
course_id, item_id, item_type (string): The values of the respective student_item fields
to filter the submissions by.
read_replica (bool): If true, attempt to use the read replica database.
If no read replica is available, use the default database.
Yields:
Dicts representing the submissions with the following fields:
student_item
student_id student_id
attempt_number attempt_number
submitted_at submitted_at
created_at created_at
answer answer
Cannot fail unless there's a database error, but may return an empty list. Raises:
Cannot fail unless there's a database error, but may return an empty iterable.
""" """
query = Submission.objects.filter( submission_qs = Submission.objects
if read_replica:
submission_qs = _use_read_replica(submission_qs)
query = submission_qs.select_related('student_item').filter(
student_item__course_id=course_id, student_item__course_id=course_id,
student_item__item_id=item_id, student_item__item_id=item_id,
student_item__item_type=item_type, student_item__item_type=item_type,
).values_list( ).order_by('student_item__student_id', '-created_at').iterator()
'student_item__student_id', 'attempt_number', 'submitted_at', 'created_at', 'raw_answer'
).order_by('student_item__student_id', '-created_at')
last_student_id = None
for row in query.all():
# We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately # We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately
# our results may contain every entry of each student, not just the most recent. # our results will contain every entry of each student, not just the most recent.
if row[0] == last_student_id: for unused_student_id, row_iter in itertools.groupby(query, operator.attrgetter('student_item.student_id')):
continue # Skip this row; it's an old submission for a student we've already included submission = next(row_iter)
last_student_id = row[0] data = SubmissionSerializer(submission).data
# Parse 'raw_answer' as JSON: data['student_id'] = submission.student_item.student_id
answer = json.loads(row[4]) yield data
yield LatestSubmission(row[0], row[1], row[2], row[3], answer)
def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True): def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True):
......
...@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase): ...@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase):
self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1) self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1)
self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2) self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2)
def test_get_all_submissions(self):
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_TWO)
api.create_submission(STUDENT_ITEM, ANSWER_ONE)
api.create_submission(STUDENT_ITEM, ANSWER_TWO)
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_ONE)
with self.assertNumQueries(1):
submissions = list(api.get_all_submissions(
STUDENT_ITEM['course_id'],
STUDENT_ITEM['item_id'],
STUDENT_ITEM['item_type'],
read_replica=False,
))
student_item = self._get_student_item(STUDENT_ITEM)
second_student_item = self._get_student_item(SECOND_STUDENT_ITEM)
# The result is assumed to be sorted by student_id, which is not part of the specification
# of get_all_submissions(), but it is what it currently does.
self._assert_submission(submissions[0], ANSWER_ONE, second_student_item.pk, 2)
self.assertEqual(submissions[0]['student_id'], SECOND_STUDENT_ITEM['student_id'])
self._assert_submission(submissions[1], ANSWER_TWO, student_item.pk, 2)
self.assertEqual(submissions[1]['student_id'], STUDENT_ITEM['student_id'])
def test_get_submission(self): def test_get_submission(self):
# Test base case that we can create a submission and get it back # Test base case that we can create a submission and get it back
sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE) sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment