Commit 3d0b5450 by Clinton Blackburn

Merge pull request #52 from edx/dylanrhodes/consolidate-variants

Consolidation parameter for extraneous variants
parents ecbc32cd 9fb03d6d
from collections import defaultdict
from django.db.models import Q
from rest_framework.authtoken.models import Token
......@@ -31,3 +33,53 @@ def set_user_auth_token(user, key):
Token.objects.create(user=user, key=key)
print "Set API key for user %s to %s" % (user, key)
def matching_tuple(answer):
""" Return tuple containing values which must match for consolidation. """
return (
answer.question_text,
answer.answer_value_text,
answer.answer_value_numeric,
answer.problem_display_name,
answer.correct,
)
def consolidate_answers(problem):
""" Attempt to consolidate erroneously randomized answers. """
answer_sets = defaultdict(list)
match_tuple_sets = defaultdict(set)
for answer in problem:
answer.consolidated_variant = False
answer_sets[answer.value_id].append(answer)
match_tuple_sets[answer.value_id].add(matching_tuple(answer))
# If a part has more than one unique tuple of matching fields, do not consolidate.
for _, match_tuple_set in match_tuple_sets.iteritems():
if len(match_tuple_set) > 1:
return problem
consolidated_answers = []
for _, answers in answer_sets.iteritems():
consolidated_answer = None
if len(answers) == 1:
consolidated_answers.append(answers[0])
continue
for answer in answers:
if not consolidated_answer:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
else:
consolidated_answer.count += answer.count
consolidated_answers.append(consolidated_answer)
return consolidated_answers
......@@ -75,6 +75,29 @@ class ProblemResponseAnswerDistributionSerializer(ModelSerializerWithCreatedFiel
)
class ConsolidatedAnswerDistributionSerializer(ProblemResponseAnswerDistributionSerializer):
"""
Serializer for consolidated answer distributions.
"""
consolidated_variant = serializers.BooleanField()
class Meta(ProblemResponseAnswerDistributionSerializer.Meta):
fields = ProblemResponseAnswerDistributionSerializer.Meta.fields + ('consolidated_variant',)
# pylint: disable=super-on-old-class
def restore_object(self, attrs, instance=None):
"""
Pops and restores non-model field.
"""
consolidated_variant = attrs.pop('consolidated_variant', None)
distribution = super(ConsolidatedAnswerDistributionSerializer, self).restore_object(attrs, instance)
distribution.consolidated_variant = consolidated_variant
return distribution
class GradeDistributionSerializer(ModelSerializerWithCreatedField):
"""
Representation of the grade_distribution table without id
......
......@@ -20,23 +20,107 @@ class AnswerDistributionTests(TestCaseWithAuthentication):
@classmethod
def setUpClass(cls):
cls.course_id = "org/num/run"
cls.module_id = "i4x://org/num/run/problem/RANDOMNUMBER"
cls.part_id1 = "i4x-org-num-run-problem-RANDOMNUMBER_2_1"
cls.module_id1 = "i4x://org/num/run/problem/RANDOMNUMBER"
cls.module_id2 = "i4x://org/num/run/problem/OTHERRANDOM"
cls.part_id = "i4x-org-num-run-problem-RANDOMNUMBER_2_1"
cls.correct = True
cls.value_id1 = '3'
cls.value_id2 = '4'
cls.answer_value_text = '3'
cls.answer_value_numeric = 3.0
cls.problem_display_name = 'Test Problem'
cls.question_text = 'Question Text'
cls.ad1 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id,
part_id=cls.part_id1
module_id=cls.module_id1,
part_id=cls.part_id,
correct=cls.correct,
value_id=cls.value_id1,
answer_value_text=cls.answer_value_text,
answer_value_numeric=cls.answer_value_numeric,
problem_display_name=cls.problem_display_name,
question_text=cls.question_text,
variant=123,
count=1
)
cls.ad2 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id1,
part_id=cls.part_id,
correct=cls.correct,
value_id=cls.value_id1,
answer_value_text=cls.answer_value_text,
answer_value_numeric=cls.answer_value_numeric,
problem_display_name=cls.problem_display_name,
question_text=cls.question_text,
variant=345,
count=2
)
cls.ad3 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id1,
part_id=cls.part_id,
)
cls.ad4 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
value_id=cls.value_id1,
correct=True,
)
cls.ad5 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
value_id=cls.value_id2,
correct=True
)
cls.ad6 = G(
models.ProblemResponseAnswerDistribution,
course_id=cls.course_id,
module_id=cls.module_id2,
part_id=cls.part_id,
value_id=cls.value_id1,
correct=False,
)
def test_get(self):
response = self.authenticated_get('/api/v0/problems/%s%s' % (self.module_id, self.path))
def test_nonconsolidated_get(self):
""" Verify that answers which should not be consolidated are not. """
response = self.authenticated_get('/api/v0/problems/%s%s' % (self.module_id2, self.path))
self.assertEquals(response.status_code, 200)
expected_dict = ProblemResponseAnswerDistributionSerializer(self.ad1).data
actual_list = response.data
self.assertEquals(len(actual_list), 1)
self.assertDictEqual(actual_list[0], expected_dict)
expected_data = models.ProblemResponseAnswerDistribution.objects.filter(module_id=self.module_id2)
expected_data = [ProblemResponseAnswerDistributionSerializer(answer).data for answer in expected_data]
for answer in expected_data:
answer['consolidated_variant'] = False
self.assertEqual(response.data, expected_data)
def test_consolidated_get(self):
""" Verify that valid consolidation does occur. """
response = self.authenticated_get(
'/api/v0/problems/{0}{1}'.format(self.module_id1, self.path))
self.assertEquals(response.status_code, 200)
expected_data = [
ProblemResponseAnswerDistributionSerializer(self.ad1).data,
ProblemResponseAnswerDistributionSerializer(self.ad3).data,
]
expected_data[0]['count'] += self.ad2.count
expected_data[0]['variant'] = None
expected_data[0]['consolidated_variant'] = True
expected_data[1]['consolidated_variant'] = False
self.assertEquals(response.data, expected_data)
def test_get_404(self):
response = self.authenticated_get('/api/v0/problems/%s%s' % ("DOES-NOT-EXIST", self.path))
......
"""
API methods for module level data.
"""
from itertools import groupby
from rest_framework import generics
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer
from analytics_data_api.v0.serializers import ConsolidatedAnswerDistributionSerializer
from analytics_data_api.v0.models import GradeDistribution
from analytics_data_api.v0.serializers import GradeDistributionSerializer
from analytics_data_api.v0.models import SequentialOpenDistribution
from analytics_data_api.v0.serializers import SequentialOpenDistributionSerializer
from analytics_data_api.utils import consolidate_answers
class ProblemResponseAnswerDistributionView(generics.ListAPIView):
......@@ -36,15 +43,30 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
* variant: For randomized problems, the random seed used. If problem
is not randomized, value is null.
* created: The date the count was computed.
**Parameters**
You can request consolidation of response counts for erroneously randomized problems.
consolidate_variants -- If True, attempt to consolidate responses, otherwise, do not.
"""
serializer_class = ProblemResponseAnswerDistributionSerializer
serializer_class = ConsolidatedAnswerDistributionSerializer
allow_empty = False
def get_queryset(self):
"""Select all the answer distribution response having to do with this usage of the problem."""
problem_id = self.kwargs.get('problem_id')
return ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id)
queryset = ProblemResponseAnswerDistribution.objects.filter(module_id=problem_id).order_by('part_id')
consolidated_rows = []
for _, part in groupby(queryset, lambda x: x.part_id):
consolidated_rows += consolidate_answers(list(part))
return consolidated_rows
class GradeDistributionView(generics.ListAPIView):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment