Commit b32d34f7 by Sven Marnach

Refactor get_all_submissions implementation and add a test.

parent 29a96a4b
......@@ -3,10 +3,11 @@ Public interface for the submissions app.
"""
import copy
import itertools
import logging
import operator
import json
from collections import namedtuple
from django.conf import settings
from django.core.cache import cache
from django.db import IntegrityError, DatabaseError
......@@ -90,15 +91,6 @@ class SubmissionRequestError(SubmissionError):
)
# For API stability and low memory usage, large result sets are returned using namedtuples.
# They take up only as much memory as tuples, but fields are accessible by name,
# so we can change the fields in the future if needed
LatestSubmission = namedtuple(
"LatestSubmission",
("student_id", "attempt_number", "submitted_at", "created_at", "answer"),
)
def create_submission(student_item_dict, answer, submitted_at=None, attempt_number=None):
"""Creates a submission for assessment.
......@@ -384,37 +376,44 @@ def get_submissions(student_item_dict, limit=None):
def get_all_submissions(course_id, item_id, item_type, read_replica=True):
"""
For the given item, get the most recent submission for every student who has submitted.
"""For the given item, get the most recent submission for every student who has submitted.
This may return a very large result set! It is implemented as a generator for efficiency.
It yields namedtuples with the following fields:
student_id
attempt_number
submitted_at
created_at
answer
Cannot fail unless there's a database error, but may return an empty list.
Args:
course_id, item_id, item_type (string): The values of the respective student_item fields
to filter the submissions by.
read_replica (bool): If true, attempt to use the read replica database.
If no read replica is available, use the default database.
Yields:
Dicts representing the submissions with the following fields:
student_item
student_id
attempt_number
submitted_at
created_at
answer
Raises:
Cannot fail unless there's a database error, but may return an empty iterable.
"""
query = Submission.objects.filter(
submission_qs = Submission.objects
if read_replica:
submission_qs = _use_read_replica(submission_qs)
query = submission_qs.select_related('student_item').filter(
student_item__course_id=course_id,
student_item__item_id=item_id,
student_item__item_type=item_type,
).values_list(
'student_item__student_id', 'attempt_number', 'submitted_at', 'created_at', 'raw_answer'
).order_by('student_item__student_id', '-created_at')
last_student_id = None
for row in query.all():
# We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately
# our results may contain every entry of each student, not just the most recent.
if row[0] == last_student_id:
continue # Skip this row; it's an old submission for a student we've already included
last_student_id = row[0]
# Parse 'raw_answer' as JSON:
answer = json.loads(row[4])
yield LatestSubmission(row[0], row[1], row[2], row[3], answer)
).order_by('student_item__student_id', '-created_at').iterator()
# We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately
# our results will contain every entry of each student, not just the most recent.
for unused_student_id, row_iter in itertools.groupby(query, operator.attrgetter('student_item.student_id')):
submission = next(row_iter)
data = SubmissionSerializer(submission).data
data['student_id'] = submission.student_item.student_id
yield data
def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True):
......
......@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase):
self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1)
self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2)
def test_get_all_submissions(self):
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_TWO)
api.create_submission(STUDENT_ITEM, ANSWER_ONE)
api.create_submission(STUDENT_ITEM, ANSWER_TWO)
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_ONE)
with self.assertNumQueries(1):
submissions = list(api.get_all_submissions(
STUDENT_ITEM['course_id'],
STUDENT_ITEM['item_id'],
STUDENT_ITEM['item_type'],
read_replica=False,
))
student_item = self._get_student_item(STUDENT_ITEM)
second_student_item = self._get_student_item(SECOND_STUDENT_ITEM)
# The result is assumed to be sorted by student_id, which is not part of the specification
# of get_all_submissions(), but it is what it currently does.
self._assert_submission(submissions[0], ANSWER_ONE, second_student_item.pk, 2)
self.assertEqual(submissions[0]['student_id'], SECOND_STUDENT_ITEM['student_id'])
self._assert_submission(submissions[1], ANSWER_TWO, student_item.pk, 2)
self.assertEqual(submissions[1]['student_id'], STUDENT_ITEM['student_id'])
def test_get_submission(self):
# Test base case that we can create a submission and get it back
sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment