Commit b32d34f7 by Sven Marnach

Refactor get_all_submissions implementation and add a test.

parent 29a96a4b
...@@ -3,10 +3,11 @@ Public interface for the submissions app. ...@@ -3,10 +3,11 @@ Public interface for the submissions app.
""" """
import copy import copy
import itertools
import logging import logging
import operator
import json import json
from collections import namedtuple
from django.conf import settings from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from django.db import IntegrityError, DatabaseError from django.db import IntegrityError, DatabaseError
...@@ -90,15 +91,6 @@ class SubmissionRequestError(SubmissionError): ...@@ -90,15 +91,6 @@ class SubmissionRequestError(SubmissionError):
) )
# For API stability and low memory usage, large result sets are returned using namedtuples.
# They take up only as much memory as tuples, but fields are accessible by name,
# so we can change the fields in the future if needed
LatestSubmission = namedtuple(
"LatestSubmission",
("student_id", "attempt_number", "submitted_at", "created_at", "answer"),
)
def create_submission(student_item_dict, answer, submitted_at=None, attempt_number=None): def create_submission(student_item_dict, answer, submitted_at=None, attempt_number=None):
"""Creates a submission for assessment. """Creates a submission for assessment.
...@@ -384,37 +376,44 @@ def get_submissions(student_item_dict, limit=None): ...@@ -384,37 +376,44 @@ def get_submissions(student_item_dict, limit=None):
def get_all_submissions(course_id, item_id, item_type, read_replica=True): def get_all_submissions(course_id, item_id, item_type, read_replica=True):
""" """For the given item, get the most recent submission for every student who has submitted.
For the given item, get the most recent submission for every student who has submitted.
This may return a very large result set! It is implemented as a generator for efficiency. This may return a very large result set! It is implemented as a generator for efficiency.
It yields namedtuples with the following fields:
student_id Args:
attempt_number course_id, item_id, item_type (string): The values of the respective student_item fields
submitted_at to filter the submissions by.
created_at read_replica (bool): If true, attempt to use the read replica database.
answer If no read replica is available, use the default database.
Cannot fail unless there's a database error, but may return an empty list. Yields:
Dicts representing the submissions with the following fields:
student_item
student_id
attempt_number
submitted_at
created_at
answer
Raises:
Cannot fail unless there's a database error, but may return an empty iterable.
""" """
query = Submission.objects.filter( submission_qs = Submission.objects
if read_replica:
submission_qs = _use_read_replica(submission_qs)
query = submission_qs.select_related('student_item').filter(
student_item__course_id=course_id, student_item__course_id=course_id,
student_item__item_id=item_id, student_item__item_id=item_id,
student_item__item_type=item_type, student_item__item_type=item_type,
).values_list( ).order_by('student_item__student_id', '-created_at').iterator()
'student_item__student_id', 'attempt_number', 'submitted_at', 'created_at', 'raw_answer'
).order_by('student_item__student_id', '-created_at') # We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately
# our results will contain every entry of each student, not just the most recent.
last_student_id = None for unused_student_id, row_iter in itertools.groupby(query, operator.attrgetter('student_item.student_id')):
for row in query.all(): submission = next(row_iter)
# We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately data = SubmissionSerializer(submission).data
# our results may contain every entry of each student, not just the most recent. data['student_id'] = submission.student_item.student_id
if row[0] == last_student_id: yield data
continue # Skip this row; it's an old submission for a student we've already included
last_student_id = row[0]
# Parse 'raw_answer' as JSON:
answer = json.loads(row[4])
yield LatestSubmission(row[0], row[1], row[2], row[3], answer)
def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True): def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True):
......
...@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase): ...@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase):
self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1) self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1)
self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2) self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2)
def test_get_all_submissions(self):
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_TWO)
api.create_submission(STUDENT_ITEM, ANSWER_ONE)
api.create_submission(STUDENT_ITEM, ANSWER_TWO)
api.create_submission(SECOND_STUDENT_ITEM, ANSWER_ONE)
with self.assertNumQueries(1):
submissions = list(api.get_all_submissions(
STUDENT_ITEM['course_id'],
STUDENT_ITEM['item_id'],
STUDENT_ITEM['item_type'],
read_replica=False,
))
student_item = self._get_student_item(STUDENT_ITEM)
second_student_item = self._get_student_item(SECOND_STUDENT_ITEM)
# The result is assumed to be sorted by student_id, which is not part of the specification
# of get_all_submissions(), but it is what it currently does.
self._assert_submission(submissions[0], ANSWER_ONE, second_student_item.pk, 2)
self.assertEqual(submissions[0]['student_id'], SECOND_STUDENT_ITEM['student_id'])
self._assert_submission(submissions[1], ANSWER_TWO, student_item.pk, 2)
self.assertEqual(submissions[1]['student_id'], STUDENT_ITEM['student_id'])
def test_get_submission(self): def test_get_submission(self):
# Test base case that we can create a submission and get it back # Test base case that we can create a submission and get it back
sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE) sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment