Commit 3e63a2c2 by dylanrhodes

Make API compatible with old and new answer_dist schema.

parent a5be8559
......@@ -3,6 +3,8 @@ from collections import defaultdict
from django.db.models import Q
from rest_framework.authtoken.models import Token
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
def delete_user_auth_token(username):
"""
......@@ -71,13 +73,17 @@ def consolidate_answers(problem):
continue
for answer in answers:
if not consolidated_answer:
if consolidated_answer:
if type(consolidated_answer) == ProblemResponseAnswerDistribution:
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
else:
consolidated_answer.count += answer.count
consolidated_answers.append(consolidated_answer)
......
......@@ -93,17 +93,17 @@ class CourseEnrollmentByGender(BaseCourseEnrollment):
unique_together = [('course_id', 'date', 'gender')]
class ProblemResponseAnswerDistribution(models.Model):
""" Each row stores the count of a particular answer to a response in a problem in a course (usage). """
class BaseProblemResponseAnswerDistribution(models.Model):
""" Base model for the answer_distribution table. """
class Meta(object):
db_table = 'answer_distribution'
abstract = True
course_id = models.CharField(db_index=True, max_length=255)
module_id = models.CharField(db_index=True, max_length=255)
part_id = models.CharField(db_index=True, max_length=255)
correct = models.NullBooleanField()
count = models.IntegerField()
value_id = models.CharField(db_index=True, max_length=255, null=True)
answer_value = models.TextField(null=True, db_column='answer_value_text')
variant = models.IntegerField(null=True)
......@@ -112,6 +112,25 @@ class ProblemResponseAnswerDistribution(models.Model):
created = models.DateTimeField(auto_now_add=True)
class ProblemResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
""" Original model for the count of a particular answer to a response to a problem in a course. """
class Meta(BaseProblemResponseAnswerDistribution.Meta):
managed = False
count = models.IntegerField()
class ProblemFirstLastResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
""" Updated model for answer_distribution table with counts of first and last attempts at problems. """
class Meta(BaseProblemResponseAnswerDistribution.Meta):
verbose_name = 'first_last_answer_distribution'
first_response_count = models.IntegerField()
last_response_count = models.IntegerField()
class CourseEnrollmentByCountry(BaseCourseEnrollment):
country_code = models.CharField(max_length=255, null=False, db_column='country_code')
......
......@@ -99,6 +99,44 @@ class ConsolidatedAnswerDistributionSerializer(ProblemResponseAnswerDistribution
return distribution
class ProblemFirstLastResponseAnswerDistributionSerializer(ProblemResponseAnswerDistributionSerializer):
"""
Serializer for answer distribution table including counts of first and last response values.
"""
class Meta(ProblemResponseAnswerDistributionSerializer.Meta):
model = models.ProblemFirstLastResponseAnswerDistribution
fields = ProblemResponseAnswerDistributionSerializer.Meta.fields + (
'first_response_count',
'last_response_count',
)
fields = tuple([field for field in fields if field != 'count'])
class ConsolidatedFirstLastAnswerDistributionSerializer(ProblemFirstLastResponseAnswerDistributionSerializer):
"""
Serializer for consolidated answer distributions including first attempt counts.
"""
consolidated_variant = serializers.BooleanField()
class Meta(ProblemFirstLastResponseAnswerDistributionSerializer.Meta):
fields = ProblemFirstLastResponseAnswerDistributionSerializer.Meta.fields + ('consolidated_variant',)
# pylint: disable=super-on-old-class
def restore_object(self, attrs, instance=None):
"""
Pops and restores non-model field.
"""
consolidated_variant = attrs.pop('consolidated_variant', None)
distribution = super(ConsolidatedFirstLastAnswerDistributionSerializer, self).restore_object(attrs, instance)
distribution.consolidated_variant = consolidated_variant
return distribution
class GradeDistributionSerializer(ModelSerializerWithCreatedField):
"""
Representation of the grade_distribution table without id
......
......@@ -596,7 +596,7 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
"""
# This data should never be returned by the tests below because the course_id doesn't match.
G(models.ProblemResponseAnswerDistribution)
G(models.ProblemFirstLastResponseAnswerDistribution)
# Create multiple objects here to test the grouping. Add a model with a different module_id to break up the
# natural order and ensure the view properly sorts the objects before grouping.
......@@ -606,12 +606,12 @@ class CourseProblemsListViewTests(DemoCourseMixin, TestCaseWithAuthentication):
alt_created = created + datetime.timedelta(seconds=2)
date_time_format = '%Y-%m-%d %H:%M:%S'
o1 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=True,
count=100, created=created.strftime(date_time_format))
o2 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id,
correct=True, count=100, created=created.strftime(date_time_format))
o3 = G(models.ProblemResponseAnswerDistribution, course_id=self.course_id, module_id=module_id, correct=False,
count=200, created=alt_created.strftime(date_time_format))
o1 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=module_id,
correct=True, last_response_count=100, created=created.strftime(date_time_format))
o2 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=alt_module_id,
correct=True, last_response_count=100, created=created.strftime(date_time_format))
o3 = G(models.ProblemFirstLastResponseAnswerDistribution, course_id=self.course_id, module_id=module_id,
correct=False, last_response_count=200, created=alt_created.strftime(date_time_format))
expected = [
{
......
......@@ -6,9 +6,10 @@
# pylint: disable=no-member,no-value-for-parameter
from django_dynamic_fixture import G
import json
from analytics_data_api.v0 import models
from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer, \
from analytics_data_api.v0.serializers import ProblemFirstLastResponseAnswerDistributionSerializer, \
GradeDistributionSerializer, SequentialOpenDistributionSerializer
from analyticsdataserver.tests import TestCaseWithAuthentication
......@@ -31,7 +32,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
cls.question_text = 'Question Text'
cls.ad1 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id1,
part_id=cls.part_id,
......@@ -41,10 +42,11 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
problem_display_name=cls.problem_display_name,
question_text=cls.question_text,
variant=123,
count=1
first_response_count=1,
last_response_count=3,
)
cls.ad2 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id1,
part_id=cls.part_id,
......@@ -54,16 +56,17 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
problem_display_name=cls.problem_display_name,
question_text=cls.question_text,
variant=345,
count=2
first_reponse_count=0,
last_response_count=2,
)
cls.ad3 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id1,
part_id=cls.part_id,
)
cls.ad4 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
......@@ -71,7 +74,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
correct=True,
)
cls.ad5 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
......@@ -79,7 +82,7 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
correct=True
)
cls.ad6 = G(
models.ProblemResponseAnswerDistribution,
models.ProblemFirstLastResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
......@@ -92,12 +95,15 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
response = self.authenticated_get('/api/v0/problems/%s%s' % (self.module_id2, self.path))
self.assertEquals(response.status_code, 200)
expected_data = models.ProblemResponseAnswerDistribution.objects.filter(module_id=self.module_id2)
expected_data = [ProblemResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
expected_data = models.ProblemFirstLastResponseAnswerDistribution.objects.filter(module_id=self.module_id2)
expected_data = [ProblemFirstLastResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
for answer in expected_data:
answer['consolidated_variant'] = False
response.data = set([json.dumps(answer) for answer in response.data])
expected_data = set([json.dumps(answer) for answer in expected_data])
self.assertEqual(response.data, expected_data)
def test_consolidated_get(self):
......@@ -106,17 +112,21 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
'/api/v0/problems/{0}{1}'.format(self.module_id1, self.path))
self.assertEquals(response.status_code, 200)
expected_data = [
ProblemResponseAnswerDistributionSerializer(self.ad1).data,
ProblemResponseAnswerDistributionSerializer(self.ad3).data,
]
expected_data = [self.ad1, self.ad3]
expected_data[0].first_response_count += self.ad2.first_response_count
expected_data[0].last_response_count += self.ad2.last_response_count
expected_data = [ProblemFirstLastResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
expected_data[0]['count'] += self.ad2.count
expected_data[0]['variant'] = None
expected_data[0]['consolidated_variant'] = True
expected_data[1]['consolidated_variant'] = False
response.data = set([json.dumps(answer) for answer in response.data])
expected_data = set([json.dumps(answer) for answer in expected_data])
self.assertEquals(response.data, expected_data)
def test_get_404(self):
......
......@@ -628,17 +628,18 @@ class ProblemsListView(BaseCourseView):
allow_empty = False
def get_queryset(self):
sql = """
aggregation_query = """
SELECT
module_id,
SUM(count) AS total_submissions,
SUM(CASE WHEN correct=1 THEN count ELSE 0 END) AS correct_submissions,
SUM(last_response_count) AS total_submissions,
SUM(CASE WHEN correct=1 THEN last_response_count ELSE 0 END) AS correct_submissions,
GROUP_CONCAT(DISTINCT part_id) AS part_ids,
MAX(created) AS created
FROM answer_distribution
WHERE course_id = %s
GROUP BY module_id;
"""
connection = connections[settings.ANALYTICS_DATABASE]
with connection.cursor() as cursor:
if connection.vendor == 'mysql':
......@@ -647,7 +648,18 @@ GROUP BY module_id;
# http://code.openark.org/blog/mysql/those-oversized-undersized-variables-defaults.
cursor.execute("SET @@group_concat_max_len = @@max_allowed_packet;")
cursor.execute(sql, [self.course_id])
cursor.execute("DESCRIBE answer_distribution;")
column_names = [row[0] for row in cursor.fetchall()]
# Alternate query for sqlite test database
else:
cursor.execute("PRAGMA table_info(answer_distribution)")
column_names = [row[1] for row in cursor.fetchall()]
if u'last_response_count' in column_names:
cursor.execute(aggregation_query, [self.course_id])
else:
cursor.execute(aggregation_query.replace('last_response_count', 'count'), [self.course_id])
rows = dictfetchall(cursor)
for row in rows:
......
......@@ -4,14 +4,21 @@ API methods for module level data.
from itertools import groupby
from django.db import OperationalError
from rest_framework import generics
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
from analytics_data_api.v0.serializers import ConsolidatedAnswerDistributionSerializer
from analytics_data_api.v0.models import GradeDistribution
from analytics_data_api.v0.serializers import GradeDistributionSerializer
from analytics_data_api.v0.models import SequentialOpenDistribution
from analytics_data_api.v0.serializers import SequentialOpenDistributionSerializer
from analytics_data_api.v0.models import (
GradeDistribution,
ProblemResponseAnswerDistribution,
ProblemFirstLastResponseAnswerDistribution,
SequentialOpenDistribution,
)
from analytics_data_api.v0.serializers import (
ConsolidatedAnswerDistributionSerializer,
ConsolidatedFirstLastAnswerDistributionSerializer,
GradeDistributionSerializer,
SequentialOpenDistributionSerializer,
)
from analytics_data_api.utils import consolidate_answers
......@@ -43,13 +50,6 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
* variant: For randomized problems, the random seed used. If problem
is not randomized, value is null.
* created: The date the count was computed.
**Parameters**
You can request consolidation of response counts for erroneously randomized problems.
consolidate_variants -- If True, attempt to consolidate responses, otherwise, do not.
"""
serializer_class = ConsolidatedAnswerDistributionSerializer
......@@ -59,7 +59,12 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
"""Select all the answer distribution response having to do with this usage of the problem."""
problem_id = self.kwargs.get('problem_id')
queryset = ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id).order_by('part_id')
try:
queryset = list(ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id).order_by('part_id'))
except OperationalError:
self.serializer_class = ConsolidatedFirstLastAnswerDistributionSerializer
queryset = list(ProblemFirstLastResponseAnswerDistribution.objects.filter(
module_id=problem_id).order_by('part_id'))
consolidated_rows = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment