Commit 3e63a2c2 by dylanrhodes

Make API compatible with old and new answer_dist schema.

parent a5be8559
...@@ -3,6 +3,8 @@ from collections import defaultdict ...@@ -3,6 +3,8 @@ from collections import defaultdict
from django.db.models import Q from django.db.models import Q
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
def delete_user_auth_token(username): def delete_user_auth_token(username):
""" """
...@@ -71,13 +73,17 @@ def consolidate_answers(problem): ...@@ -71,13 +73,17 @@ def consolidate_answers(problem):
continue continue
for answer in answers: for answer in answers:
if not consolidated_answer: if consolidated_answer:
if type(consolidated_answer) == ProblemResponseAnswerDistribution:
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer consolidated_answer = answer
consolidated_answer.variant = None consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True consolidated_answer.consolidated_variant = True
else:
consolidated_answer.count += answer.count
consolidated_answers.append(consolidated_answer) consolidated_answers.append(consolidated_answer)
......
...@@ -93,17 +93,17 @@ class CourseEnrollmentByGender(BaseCourseEnrollment): ...@@ -93,17 +93,17 @@ class CourseEnrollmentByGender(BaseCourseEnrollment):
unique_together = [('course_id', 'date', 'gender')] unique_together = [('course_id', 'date', 'gender')]
class ProblemResponseAnswerDistribution(models.Model): class BaseProblemResponseAnswerDistribution(models.Model):
""" Each row stores the count of a particular answer to a response in a problem in a course (usage). """ """ Base model for the answer_distribution table. """
class Meta(object): class Meta(object):
db_table = 'answer_distribution' db_table = 'answer_distribution'
abstract = True
course_id = models.CharField(db_index=True, max_length=255) course_id = models.CharField(db_index=True, max_length=255)
module_id = models.CharField(db_index=True, max_length=255) module_id = models.CharField(db_index=True, max_length=255)
part_id = models.CharField(db_index=True, max_length=255) part_id = models.CharField(db_index=True, max_length=255)
correct = models.NullBooleanField() correct = models.NullBooleanField()
count = models.IntegerField()
value_id = models.CharField(db_index=True, max_length=255, null=True) value_id = models.CharField(db_index=True, max_length=255, null=True)
answer_value = models.TextField(null=True, db_column='answer_value_text') answer_value = models.TextField(null=True, db_column='answer_value_text')
variant = models.IntegerField(null=True) variant = models.IntegerField(null=True)
...@@ -112,6 +112,25 @@ class ProblemResponseAnswerDistribution(models.Model): ...@@ -112,6 +112,25 @@ class ProblemResponseAnswerDistribution(models.Model):
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
class ProblemResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
""" Original model for the count of a particular answer to a response to a problem in a course. """
class Meta(BaseProblemResponseAnswerDistribution.Meta):
managed = False
count = models.IntegerField()
class ProblemFirstLastResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
""" Updated model for answer_distribution table with counts of first and last attempts at problems. """
class Meta(BaseProblemResponseAnswerDistribution.Meta):
verbose_name = 'first_last_answer_distribution'
first_response_count = models.IntegerField()
last_response_count = models.IntegerField()
class CourseEnrollmentByCountry(BaseCourseEnrollment): class CourseEnrollmentByCountry(BaseCourseEnrollment):
country_code = models.CharField(max_length=255, null=False, db_column='country_code') country_code = models.CharField(max_length=255, null=False, db_column='country_code')
......
...@@ -99,6 +99,44 @@ class ConsolidatedAnswerDistributionSerializer(ProblemResponseAnswerDistribution ...@@ -99,6 +99,44 @@ class ConsolidatedAnswerDistributionSerializer(ProblemResponseAnswerDistribution
return distribution return distribution
class ProblemFirstLastResponseAnswerDistributionSerializer(ProblemResponseAnswerDistributionSerializer):
"""
Serializer for answer distribution table including counts of first and last response values.
"""
class Meta(ProblemResponseAnswerDistributionSerializer.Meta):
model = models.ProblemFirstLastResponseAnswerDistribution
fields = ProblemResponseAnswerDistributionSerializer.Meta.fields + (
'first_response_count',
'last_response_count',
)
fields = tuple([field for field in fields if field != 'count'])
class ConsolidatedFirstLastAnswerDistributionSerializer(ProblemFirstLastResponseAnswerDistributionSerializer):
"""
Serializer for consolidated answer distributions including first attempt counts.
"""
consolidated_variant = serializers.BooleanField()
class Meta(ProblemFirstLastResponseAnswerDistributionSerializer.Meta):
fields = ProblemFirstLastResponseAnswerDistributionSerializer.Meta.fields + ('consolidated_variant',)
# pylint: disable=super-on-old-class
def restore_object(self, attrs, instance=None):
"""
Pops and restores non-model field.
"""
consolidated_variant = attrs.pop('consolidated_variant', None)
distribution = super(ConsolidatedFirstLastAnswerDistributionSerializer, self).restore_object(attrs, instance)
distribution.consolidated_variant = consolidated_variant
return distribution
class GradeDistributionSerializer(ModelSerializerWithCreatedField): class GradeDistributionSerializer(ModelSerializerWithCreatedField):
""" """
Representation of the grade_distribution table without id Representation of the grade_distribution table without id
......
...@@ -596,7 +596,7 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication): ...@@ -596,7 +596,7 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
""" """
# This data should never be returned by the tests below because the course_id doesn't match. # This data should never be returned by the tests below because the course_id doesn't match.
G(models.ProblemResponseAnswerDistribution) G(models.ProblemFirstLastResponseAnswerDistribution)
# Create multiple objects here to test the grouping. Add a model with a different module_id to break up the # Create multiple objects here to test the grouping. Add a model with a different module_id to break up the
# natural order and ensure the view properly sorts the objects before grouping. # natural order and ensure the view properly sorts the objects before grouping.
...@@ -606,12 +606,12 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication): ...@@ -606,12 +606,12 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
alt_created = created + datetime.timedelta(seconds=2) alt_created = created + datetime.timedelta(seconds=2)
date_time_format = '%Y-%m-%d %H:%M:%S' date_time_format = '%Y-%m-%d %H:%M:%S'
o1 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=True, o1 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=module_id,
count=100, created=created.strftime(date_time_format)) correct=True, last_response_count=100, created=created.strftime(date_time_format))
o2 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id, o2 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id,
correct=True, count=100, created=created.strftime(date_time_format)) correct=True, last_response_count=100, created=created.strftime(date_time_format))
o3 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=False, o3 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=module_id,
count=200, created=alt_created.strftime(date_time_format)) correct=False, last_response_count=200, created=alt_created.strftime(date_time_format))
expected = [ expected = [
{ {
......
...@@ -6,9 +6,10 @@ ...@@ -6,9 +6,10 @@
# pylint: disable=no-member,no-value-for-parameter # pylint: disable=no-member,no-value-for-parameter
from django_dynamic_fixture import G from django_dynamic_fixture import G
import json
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer, \ from analytics_data_api.v0.serializers import ProblemFirstLastResponseAnswerDistributionSerializer, \
GradeDistributionSerializer, SequentialOpenDistributionSerializer GradeDistributionSerializer, SequentialOpenDistributionSerializer
from analyticsdataserver.tests import TestCaseWithAuthentication from analyticsdataserver.tests import TestCaseWithAuthentication
...@@ -31,7 +32,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -31,7 +32,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
cls.question_text = 'Question Text' cls.question_text = 'Question Text'
cls.ad1 = G( cls.ad1 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id1, module_id=cls.module_id1,
part_id=cls.part_id, part_id=cls.part_id,
...@@ -41,10 +42,11 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -41,10 +42,11 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
problem_display_name=cls.problem_display_name, problem_display_name=cls.problem_display_name,
question_text=cls.question_text, question_text=cls.question_text,
variant=123, variant=123,
count=1 first_response_count=1,
last_response_count=3,
) )
cls.ad2 = G( cls.ad2 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id1, module_id=cls.module_id1,
part_id=cls.part_id, part_id=cls.part_id,
...@@ -54,16 +56,17 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -54,16 +56,17 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
problem_display_name=cls.problem_display_name, problem_display_name=cls.problem_display_name,
question_text=cls.question_text, question_text=cls.question_text,
variant=345, variant=345,
count=2 first_reponse_count=0,
last_response_count=2,
) )
cls.ad3 = G( cls.ad3 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id1, module_id=cls.module_id1,
part_id=cls.part_id, part_id=cls.part_id,
) )
cls.ad4 = G( cls.ad4 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id2, module_id=cls.module_id2,
part_id=cls.part_id, part_id=cls.part_id,
...@@ -71,7 +74,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -71,7 +74,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
correct=True, correct=True,
) )
cls.ad5 = G( cls.ad5 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id2, module_id=cls.module_id2,
part_id=cls.part_id, part_id=cls.part_id,
...@@ -79,7 +82,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -79,7 +82,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
correct=True correct=True
) )
cls.ad6 = G( cls.ad6 = G(
models.ProblemResponseAnswerDistribution, models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id, course_id=cls.course_id,
module_id=cls.module_id2, module_id=cls.module_id2,
part_id=cls.part_id, part_id=cls.part_id,
...@@ -92,12 +95,15 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -92,12 +95,15 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
response = self.authenticated_get('/api/v0/problems/%s%s' % (self.module_id2, self.path)) response = self.authenticated_get('/api/v0/problems/%s%s' % (self.module_id2, self.path))
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected_data = models.ProblemResponseAnswerDistribution.objects.filter(module_id=self.module_id2) expected_data = models.ProblemFirstLastResponseAnswerDistribution.objects.filter(module_id=self.module_id2)
expected_data = [ProblemResponseAnswerDistributionSerializer(answer).data for answer in expected_data] expected_data = [ProblemFirstLastResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
for answer in expected_data: for answer in expected_data:
answer['consolidated_variant'] = False answer['consolidated_variant'] = False
response.data = set([json.dumps(answer) for answer in response.data])
expected_data = set([json.dumps(answer) for answer in expected_data])
self.assertEqual(response.data, expected_data) self.assertEqual(response.data, expected_data)
def test_consolidated_get(self): def test_consolidated_get(self):
...@@ -106,17 +112,21 @@ class AnswerDistributionTests(TestCaseWithAuthentication): ...@@ -106,17 +112,21 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
'/api/v0/problems/{0}{1}'.format(self.module_id1, self.path)) '/api/v0/problems/{0}{1}'.format(self.module_id1, self.path))
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected_data = [ expected_data = [self.ad1, self.ad3]
ProblemResponseAnswerDistributionSerializer(self.ad1).data,
ProblemResponseAnswerDistributionSerializer(self.ad3).data, expected_data[0].first_response_count += self.ad2.first_response_count
] expected_data[0].last_response_count += self.ad2.last_response_count
expected_data = [ProblemFirstLastResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
expected_data[0]['count'] += self.ad2.count
expected_data[0]['variant'] = None expected_data[0]['variant'] = None
expected_data[0]['consolidated_variant'] = True expected_data[0]['consolidated_variant'] = True
expected_data[1]['consolidated_variant'] = False expected_data[1]['consolidated_variant'] = False
response.data = set([json.dumps(answer) for answer in response.data])
expected_data = set([json.dumps(answer) for answer in expected_data])
self.assertEquals(response.data, expected_data) self.assertEquals(response.data, expected_data)
def test_get_404(self): def test_get_404(self):
......
...@@ -628,17 +628,18 @@ class ProblemsListView(BaseCourseView): ...@@ -628,17 +628,18 @@ class ProblemsListView(BaseCourseView):
allow_empty = False allow_empty = False
def get_queryset(self): def get_queryset(self):
sql = """ aggregation_query = """
SELECT SELECT
module_id, module_id,
SUM(count) AS total_submissions, SUM(last_response_count) AS total_submissions,
SUM(CASE WHEN correct=1 THEN count ELSE 0 END) AS correct_submissions, SUM(CASE WHEN correct=1 THEN last_response_count ELSE 0 END) AS correct_submissions,
GROUP_CONCAT(DISTINCT part_id) AS part_ids, GROUP_CONCAT(DISTINCT part_id) AS part_ids,
MAX(created) AS created MAX(created) AS created
FROM answer_distribution FROM answer_distribution
WHERE course_id = %s WHERE course_id = %s
GROUP BY module_id; GROUP BY module_id;
""" """
connection = connections[settings.ANALYTICS_DATABASE] connection = connections[settings.ANALYTICS_DATABASE]
with connection.cursor() as cursor: with connection.cursor() as cursor:
if connection.vendor == 'mysql': if connection.vendor == 'mysql':
...@@ -647,7 +648,18 @@ GROUP BY module_id; ...@@ -647,7 +648,18 @@ GROUP BY module_id;
# http://code.openark.org/blog/mysql/those-oversized-undersized-variables-defaults. # http://code.openark.org/blog/mysql/those-oversized-undersized-variables-defaults.
cursor.execute("SET @@group_concat_max_len = @@max_allowed_packet;") cursor.execute("SET @@group_concat_max_len = @@max_allowed_packet;")
cursor.execute(sql, [self.course_id]) cursor.execute("DESCRIBE answer_distribution;")
column_names = [row[0] for row in cursor.fetchall()]
# Alternate query for sqlite test database
else:
cursor.execute("PRAGMA table_info(answer_distribution)")
column_names = [row[1] for row in cursor.fetchall()]
if u'last_response_count' in column_names:
cursor.execute(aggregation_query, [self.course_id])
else:
cursor.execute(aggregation_query.replace('last_response_count', 'count'), [self.course_id])
rows = dictfetchall(cursor) rows = dictfetchall(cursor)
for row in rows: for row in rows:
......
...@@ -4,14 +4,21 @@ API methods for module level data. ...@@ -4,14 +4,21 @@ API methods for module level data.
from itertools import groupby from itertools import groupby
from django.db import OperationalError
from rest_framework import generics from rest_framework import generics
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution from analytics_data_api.v0.models import (
from analytics_data_api.v0.serializers import ConsolidatedAnswerDistributionSerializer GradeDistribution,
from analytics_data_api.v0.models import GradeDistribution ProblemResponseAnswerDistribution,
from analytics_data_api.v0.serializers import GradeDistributionSerializer ProblemFirstLastResponseAnswerDistribution,
from analytics_data_api.v0.models import SequentialOpenDistribution SequentialOpenDistribution,
from analytics_data_api.v0.serializers import SequentialOpenDistributionSerializer )
from analytics_data_api.v0.serializers import (
ConsolidatedAnswerDistributionSerializer,
ConsolidatedFirstLastAnswerDistributionSerializer,
GradeDistributionSerializer,
SequentialOpenDistributionSerializer,
)
from analytics_data_api.utils import consolidate_answers from analytics_data_api.utils import consolidate_answers
...@@ -43,13 +50,6 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -43,13 +50,6 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
* variant: For randomized problems, the random seed used. If problem * variant: For randomized problems, the random seed used. If problem
is not randomized, value is null. is not randomized, value is null.
* created: The date the count was computed. * created: The date the count was computed.
**Parameters**
You can request consolidation of response counts for erroneously randomized problems.
consolidate_variants -- If True, attempt to consolidate responses, otherwise, do not.
""" """
serializer_class = ConsolidatedAnswerDistributionSerializer serializer_class = ConsolidatedAnswerDistributionSerializer
...@@ -59,7 +59,12 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -59,7 +59,12 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
"""Select all the answer distribution response having to do with this usage of the problem.""" """Select all the answer distribution response having to do with this usage of the problem."""
problem_id = self.kwargs.get('problem_id') problem_id = self.kwargs.get('problem_id')
queryset = ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id).order_by('part_id') try:
queryset = list(ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id).order_by('part_id'))
except OperationalError:
self.serializer_class = ConsolidatedFirstLastAnswerDistributionSerializer
queryset = list(ProblemFirstLastResponseAnswerDistribution.objects.filter(
module_id=problem_id).order_by('part_id'))
consolidated_rows = [] consolidated_rows = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment