Merge pull request #14 from open-craft/data-export

Add get_all_submissions method to the API

Merge pull request #14 from open-craft/data-export
Add get_all_submissions method to the API
7c766502 · Sven Marnach · e2361932 · e4091fb7 · 7c766502 · 7c766502
Commit 7c766502 authored Jun 05, 2015 by Sven Marnach
Show whitespace changes
Inline Side-by-side

Showing with 67 additions and 0 deletions

submissions/api.py
+45 -0

submissions/tests/test_api.py
+22 -0

No files found.
--- a/submissions/api.py
+++ b/submissions/api.py
@@ -3,7 +3,9 @@ Public interface for the submissions app.

 """
 import copy
+import itertools
 import logging
+import operator
 import json

 from django.conf import settings
@@ -373,6 +375,49 @@ def get_submissions(student_item_dict, limit=None):
    return SubmissionSerializer(submission_models, many=True).data


+def get_all_submissions(course_id, item_id, item_type, read_replica=True):
+    """For the given item, get the most recent submission for every student who has submitted.
+
+    This may return a very large result set! It is implemented as a generator for efficiency.
+
+    Args:
+        course_id, item_id, item_type (string): The values of the respective student_item fields
+            to filter the submissions by.
+        read_replica (bool): If true, attempt to use the read replica database.
+            If no read replica is available, use the default database.
+
+    Yields:
+        Dicts representing the submissions with the following fields:
+            student_item
+            student_id
+            attempt_number
+            submitted_at
+            created_at
+            answer
+
+    Raises:
+        Cannot fail unless there's a database error, but may return an empty iterable.
+    """
+    submission_qs = Submission.objects
+    if read_replica:
+        submission_qs = _use_read_replica(submission_qs)
+    # We cannot use SELECT DISTINCT ON because it's PostgreSQL only, so unfortunately
+    # our results will contain every entry of each student, not just the most recent.
+    # We sort by student_id and primary key, so the reults will be grouped be grouped by
+    # student, with the most recent submission being the first one in each group.
+    query = submission_qs.select_related('student_item').filter(
+        student_item__course_id=course_id,
+        student_item__item_id=item_id,
+        student_item__item_type=item_type,
+    ).order_by('student_item__student_id', '-submitted_at', '-id').iterator()
+
+    for unused_student_id, row_iter in itertools.groupby(query, operator.attrgetter('student_item.student_id')):
+        submission = next(row_iter)
+        data = SubmissionSerializer(submission).data
+        data['student_id'] = submission.student_item.student_id
+        yield data
+
+
 def get_top_submissions(course_id, item_id, item_type, number_of_top_scores, use_cache=True, read_replica=True):
    """Get a number of top scores for an assessment based on a particular student item


--- a/submissions/tests/test_api.py
+++ b/submissions/tests/test_api.py
@@ -74,6 +74,28 @@ class TestSubmissionsApi(TestCase):
        self._assert_submission(submissions[1], ANSWER_ONE, student_item.pk, 1)
        self._assert_submission(submissions[0], ANSWER_TWO, student_item.pk, 2)

+    def test_get_all_submissions(self):
+        api.create_submission(SECOND_STUDENT_ITEM, ANSWER_TWO)
+        api.create_submission(STUDENT_ITEM, ANSWER_ONE)
+        api.create_submission(STUDENT_ITEM, ANSWER_TWO)
+        api.create_submission(SECOND_STUDENT_ITEM, ANSWER_ONE)
+        with self.assertNumQueries(1):
+            submissions = list(api.get_all_submissions(
+                STUDENT_ITEM['course_id'],
+                STUDENT_ITEM['item_id'],
+                STUDENT_ITEM['item_type'],
+                read_replica=False,
+            ))
+
+        student_item = self._get_student_item(STUDENT_ITEM)
+        second_student_item = self._get_student_item(SECOND_STUDENT_ITEM)
+        # The result is assumed to be sorted by student_id, which is not part of the specification
+        # of get_all_submissions(), but it is what it currently does.
+        self._assert_submission(submissions[0], ANSWER_ONE, second_student_item.pk, 2)
+        self.assertEqual(submissions[0]['student_id'], SECOND_STUDENT_ITEM['student_id'])
+        self._assert_submission(submissions[1], ANSWER_TWO, student_item.pk, 2)
+        self.assertEqual(submissions[1]['student_id'], STUDENT_ITEM['student_id'])
+
    def test_get_submission(self):
        # Test base case that we can create a submission and get it back
        sub_dict1 = api.create_submission(STUDENT_ITEM, ANSWER_ONE)