Merge pull request #54 from edx/course-problems-optimize

Optimized Courses Problems Endpoint

Merge pull request #54 from edx/course-problems-optimize
Optimized Courses Problems Endpoint
491e28ba · Clinton Blackburn · 6b88b6c4 · ef070668 · 491e28ba · 491e28ba
Commit 491e28ba authored Jan 28, 2015 by Clinton Blackburn
Hide whitespace changes
Inline Side-by-side

Showing with 55 additions and 36 deletions

analytics_data_api/utils.py
+10 -0

analytics_data_api/v0/serializers.py
+1 -0

analytics_data_api/v0/tests/views/test_courses.py
+13 -8

analytics_data_api/v0/views/courses.py
+31 -28

No files found.
--- a/analytics_data_api/utils.py
+++ b/analytics_data_api/utils.py
@@ -82,3 +82,13 @@ def consolidate_answers(problem):
        consolidated_answers.append(consolidated_answer)
    return consolidated_answers
+def dictfetchall(cursor):
+    """Returns all rows from a cursor as a dict"""
+    desc = cursor.description
+    return [
+        dict(zip([col[0] for col in desc], row))
+        for row in cursor.fetchall()
+    ]
--- a/analytics_data_api/v0/serializers.py
+++ b/analytics_data_api/v0/serializers.py
@@ -48,6 +48,7 @@ class ProblemSerializer(serializers.Serializer):
    total_submissions = serializers.IntegerField(default=0)
    correct_submissions = serializers.IntegerField(default=0)
    part_ids = serializers.CharField()
+    created = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
 class ProblemResponseAnswerDistributionSerializer(ModelSerializerWithCreatedField):

--- a/analytics_data_api/v0/tests/views/test_courses.py
+++ b/analytics_data_api/v0/tests/views/test_courses.py
@@ -600,29 +600,34 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
        # This data should never be returned by the tests below because the course_id doesn't match.
        G(models.ProblemResponseAnswerDistribution)
-        # This test assumes the view is using Python's groupby for grouping. Create multiple objects here to test the
+        # Create multiple objects here to test the grouping. Add a model with a different module_id to break up the
-        # grouping. Add a model with a different module_id to break up the natural order and ensure the view properly
+        # natural order and ensure the view properly sorts the objects before grouping.
-        # sorts the objects before grouping.
        module_id = 'i4x://test/problem/1'
        alt_module_id = 'i4x://test/problem/2'
+        created = datetime.datetime.utcnow()
+        alt_created = created + datetime.timedelta(seconds=2)
        o1 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=True,
-               count=100)
+               count=100, created=created)
        o2 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id,
-               correct=True, count=100)
+               correct=True, count=100, created=created)
        o3 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=False,
-               count=200)
+               count=200, created=alt_created)
        expected = [
            {
                'module_id': module_id,
                'total_submissions': 300,
                'correct_submissions': 100,
-                'part_ids': [o1.part_id, o3.part_id]
+                'part_ids': [o1.part_id, o3.part_id],
+                'created': alt_created.strftime(settings.DATETIME_FORMAT)
            },
            {
                'module_id': alt_module_id,
                'total_submissions': 100,
                'correct_submissions': 100,
-                'part_ids': [o2.part_id]
+                'part_ids': [o2.part_id],
+                'created': created.strftime(settings.DATETIME_FORMAT)
            }
        ]

--- a/analytics_data_api/v0/views/courses.py
+++ b/analytics_data_api/v0/views/courses.py
@@ -4,6 +4,7 @@ import warnings
 from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
+from django.db import connections
 from django.db.models import Max
 from django.http import Http404
 from django.utils.timezone import make_aware, utc
@@ -11,6 +12,7 @@ from rest_framework import generics
 from opaque_keys.edx.keys import CourseKey
 from analytics_data_api.constants import enrollment_modes
+from analytics_data_api.utils import dictfetchall
 from analytics_data_api.v0 import models, serializers
@@ -610,6 +612,7 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView):
        return returned_items
+# pylint: disable=abstract-method
 class ProblemsListView(BaseCourseView):
    """
    Get the problems.
@@ -627,36 +630,36 @@ class ProblemsListView(BaseCourseView):
            * correct_submissions: Total number of *correct* submissions.
            * part_ids: List of problem part IDs
    """
-    model = models.ProblemResponseAnswerDistribution
    serializer_class = serializers.ProblemSerializer
+    allow_empty = False
-    def apply_date_filtering(self, queryset):
-        # Date filtering is not possible for this data.
-        return queryset
    def get_queryset(self):
-        queryset = super(ProblemsListView, self).get_queryset()
+        sql = """
-        queryset = queryset.order_by('module_id', 'part_id')
+SELECT
+    module_id,
+    SUM(count) AS total_submissions,
+    SUM(CASE WHEN correct=1 THEN count ELSE 0 END) AS correct_submissions,
+    GROUP_CONCAT(DISTINCT part_id) AS part_ids,
+    MAX(created) AS created
+FROM answer_distribution
+WHERE course_id = %s
+GROUP BY module_id;
+        """
+        with connections[settings.ANALYTICS_DATABASE].cursor() as cursor:
+            cursor.execute(sql, [self.course_id])
+            rows = dictfetchall(cursor)
-        data = []
+        for row in rows:
+            # Convert the comma-separated list into an array of strings.
+            row['part_ids'] = row['part_ids'].split(',')
-        for problem_id, distribution in groupby(queryset, lambda x: x.module_id):
+            # Convert the aggregated decimal fields to integers
-            total = 0
+            row['total_submissions'] = int(row['total_submissions'])
-            correct = 0
+            row['correct_submissions'] = int(row['correct_submissions'])
-            part_ids = set()    # Use a set to remove duplicate values.
+            # Rather than write custom SQL for the SQLite backend, simply parse the timestamp.
-            for answer in distribution:
+            created = row['created']
-                part_ids.add(answer.part_id)
+            if not isinstance(created, datetime.datetime):
-                count = answer.count
+                row['created'] = datetime.datetime.strptime(created, '%Y-%m-%d %H:%M:%S.%f')
-                total += count
-                if answer.correct:
+        return rows
-                    correct += count
-            data.append({
-                'module_id': problem_id,
-                'total_submissions': total,
-                'correct_submissions': correct,
-                'part_ids': sorted(part_ids)
-            })
-        return data