Commit 491e28ba by Clinton Blackburn

Merge pull request #54 from edx/course-problems-optimize

Optimized Courses Problems Endpoint
parents 6b88b6c4 ef070668
...@@ -82,3 +82,13 @@ def consolidate_answers(problem): ...@@ -82,3 +82,13 @@ def consolidate_answers(problem):
consolidated_answers.append(consolidated_answer) consolidated_answers.append(consolidated_answer)
return consolidated_answers return consolidated_answers
def dictfetchall(cursor):
"""Returns all rows from a cursor as a dict"""
desc = cursor.description
return [
dict(zip([col[0] for col in desc], row))
for row in cursor.fetchall()
]
...@@ -48,6 +48,7 @@ class ProblemSerializer(serializers.Serializer): ...@@ -48,6 +48,7 @@ class ProblemSerializer(serializers.Serializer):
total_submissions = serializers.IntegerField(default=0) total_submissions = serializers.IntegerField(default=0)
correct_submissions = serializers.IntegerField(default=0) correct_submissions = serializers.IntegerField(default=0)
part_ids = serializers.CharField() part_ids = serializers.CharField()
created = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
class ProblemResponseAnswerDistributionSerializer(ModelSerializerWithCreatedField): class ProblemResponseAnswerDistributionSerializer(ModelSerializerWithCreatedField):
......
...@@ -600,29 +600,34 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication): ...@@ -600,29 +600,34 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
# This data should never be returned by the tests below because the course_id doesn't match. # This data should never be returned by the tests below because the course_id doesn't match.
G(models.ProblemResponseAnswerDistribution) G(models.ProblemResponseAnswerDistribution)
# This test assumes the view is using Python's groupby for grouping. Create multiple objects here to test the # Create multiple objects here to test the grouping. Add a model with a different module_id to break up the
# grouping. Add a model with a different module_id to break up the natural order and ensure the view properly # natural order and ensure the view properly sorts the objects before grouping.
# sorts the objects before grouping.
module_id = 'i4x://test/problem/1' module_id = 'i4x://test/problem/1'
alt_module_id = 'i4x://test/problem/2' alt_module_id = 'i4x://test/problem/2'
created = datetime.datetime.utcnow()
alt_created = created + datetime.timedelta(seconds=2)
o1 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=True, o1 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=True,
count=100) count=100, created=created)
o2 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id, o2 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id,
correct=True, count=100) correct=True, count=100, created=created)
o3 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=False, o3 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=False,
count=200) count=200, created=alt_created)
expected = [ expected = [
{ {
'module_id': module_id, 'module_id': module_id,
'total_submissions': 300, 'total_submissions': 300,
'correct_submissions': 100, 'correct_submissions': 100,
'part_ids': [o1.part_id, o3.part_id] 'part_ids': [o1.part_id, o3.part_id],
'created': alt_created.strftime(settings.DATETIME_FORMAT)
}, },
{ {
'module_id': alt_module_id, 'module_id': alt_module_id,
'total_submissions': 100, 'total_submissions': 100,
'correct_submissions': 100, 'correct_submissions': 100,
'part_ids': [o2.part_id] 'part_ids': [o2.part_id],
'created': created.strftime(settings.DATETIME_FORMAT)
} }
] ]
......
...@@ -4,6 +4,7 @@ import warnings ...@@ -4,6 +4,7 @@ import warnings
from django.conf import settings from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.db import connections
from django.db.models import Max from django.db.models import Max
from django.http import Http404 from django.http import Http404
from django.utils.timezone import make_aware, utc from django.utils.timezone import make_aware, utc
...@@ -11,6 +12,7 @@ from rest_framework import generics ...@@ -11,6 +12,7 @@ from rest_framework import generics
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from analytics_data_api.constants import enrollment_modes from analytics_data_api.constants import enrollment_modes
from analytics_data_api.utils import dictfetchall
from analytics_data_api.v0 import models, serializers from analytics_data_api.v0 import models, serializers
...@@ -610,6 +612,7 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView): ...@@ -610,6 +612,7 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView):
return returned_items return returned_items
# pylint: disable=abstract-method
class ProblemsListView(BaseCourseView): class ProblemsListView(BaseCourseView):
""" """
Get the problems. Get the problems.
...@@ -627,36 +630,36 @@ class ProblemsListView(BaseCourseView): ...@@ -627,36 +630,36 @@ class ProblemsListView(BaseCourseView):
* correct_submissions: Total number of *correct* submissions. * correct_submissions: Total number of *correct* submissions.
* part_ids: List of problem part IDs * part_ids: List of problem part IDs
""" """
model = models.ProblemResponseAnswerDistribution
serializer_class = serializers.ProblemSerializer serializer_class = serializers.ProblemSerializer
allow_empty = False
def apply_date_filtering(self, queryset):
# Date filtering is not possible for this data.
return queryset
def get_queryset(self): def get_queryset(self):
queryset = super(ProblemsListView, self).get_queryset() sql = """
queryset = queryset.order_by('module_id', 'part_id') SELECT
module_id,
SUM(count) AS total_submissions,
SUM(CASE WHEN correct=1 THEN count ELSE 0 END) AS correct_submissions,
GROUP_CONCAT(DISTINCT part_id) AS part_ids,
MAX(created) AS created
FROM answer_distribution
WHERE course_id = %s
GROUP BY module_id;
"""
with connections[settings.ANALYTICS_DATABASE].cursor() as cursor:
cursor.execute(sql, [self.course_id])
rows = dictfetchall(cursor)
data = [] for row in rows:
# Convert the comma-separated list into an array of strings.
row['part_ids'] = row['part_ids'].split(',')
for problem_id, distribution in groupby(queryset, lambda x: x.module_id): # Convert the aggregated decimal fields to integers
total = 0 row['total_submissions'] = int(row['total_submissions'])
correct = 0 row['correct_submissions'] = int(row['correct_submissions'])
part_ids = set() # Use a set to remove duplicate values.
# Rather than write custom SQL for the SQLite backend, simply parse the timestamp.
for answer in distribution: created = row['created']
part_ids.add(answer.part_id) if not isinstance(created, datetime.datetime):
count = answer.count row['created'] = datetime.datetime.strptime(created, '%Y-%m-%d %H:%M:%S.%f')
total += count
if answer.correct: return rows
correct += count
data.append({
'module_id': problem_id,
'total_submissions': total,
'correct_submissions': correct,
'part_ids': sorted(part_ids)
})
return data
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment