Commit dc785961 by Clinton Blackburn

Merge pull request #27 from edx/parts-unknown-redux

Grouping Unknown Locations More Efficiently
parents f86ecabb 33cedb77
from iso3166 import Country
UNKNOWN_COUNTRY_CODE = u'UNKNOWN'
UNKNOWN_COUNTRY = Country(UNKNOWN_COUNTRY_CODE, None, None, None)
from django.db import models from django.db import models
from iso3166 import countries, Country from iso3166 import countries
from analytics_data_api.v0.constants import UNKNOWN_COUNTRY
class CourseActivityWeekly(models.Model): class CourseActivityWeekly(models.Model):
...@@ -99,7 +101,6 @@ class ProblemResponseAnswerDistribution(models.Model): ...@@ -99,7 +101,6 @@ class ProblemResponseAnswerDistribution(models.Model):
class CourseEnrollmentByCountry(BaseCourseEnrollment): class CourseEnrollmentByCountry(BaseCourseEnrollment):
UNKNOWN_COUNTRY_CODE = 'UNKNOWN'
country_code = models.CharField(max_length=255, null=False, db_column='country_code') country_code = models.CharField(max_length=255, null=False, db_column='country_code')
@property @property
...@@ -108,13 +109,10 @@ class CourseEnrollmentByCountry(BaseCourseEnrollment): ...@@ -108,13 +109,10 @@ class CourseEnrollmentByCountry(BaseCourseEnrollment):
Returns a Country object representing the country in this model's country_code. Returns a Country object representing the country in this model's country_code.
""" """
try: try:
if self.country_code == self.UNKNOWN_COUNTRY_CODE:
return Country(self.UNKNOWN_COUNTRY_CODE, None, None, None)
return countries.get(self.country_code) return countries.get(self.country_code)
except (KeyError, ValueError): except (KeyError, ValueError, AttributeError):
# Country code is not valid ISO-3166 # Country code is not valid ISO-3166
return None return UNKNOWN_COUNTRY
class Meta(BaseCourseEnrollment.Meta): class Meta(BaseCourseEnrollment.Meta):
db_table = 'course_enrollment_location_current' db_table = 'course_enrollment_location_current'
......
from django.test import TestCase from django.test import TestCase
from django_dynamic_fixture import G from django_dynamic_fixture import G
from iso3166 import countries from iso3166 import countries
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.v0.constants import UNKNOWN_COUNTRY
class EducationLevelTests(TestCase): class EducationLevelTests(TestCase):
...@@ -26,10 +26,13 @@ class CourseEnrollmentByCountryTests(TestCase): ...@@ -26,10 +26,13 @@ class CourseEnrollmentByCountryTests(TestCase):
def test_invalid_country(self): def test_invalid_country(self):
instance = G(models.CourseEnrollmentByCountry, country_code='') instance = G(models.CourseEnrollmentByCountry, country_code='')
self.assertIsNone(instance.country) self.assertEqual(instance.country, UNKNOWN_COUNTRY)
instance = G(models.CourseEnrollmentByCountry, country_code='A1') instance = G(models.CourseEnrollmentByCountry, country_code='A1')
self.assertIsNone(instance.country) self.assertEqual(instance.country, UNKNOWN_COUNTRY)
instance = G(models.CourseEnrollmentByCountry, country_code='GobbledyGoop!') instance = G(models.CourseEnrollmentByCountry, country_code='GobbledyGoop!')
self.assertIsNone(instance.country) self.assertEqual(instance.country, UNKNOWN_COUNTRY)
instance = G(models.CourseEnrollmentByCountry, country_code='UNKNOWN')
self.assertEqual(instance.country, UNKNOWN_COUNTRY)
...@@ -12,6 +12,7 @@ from iso3166 import countries ...@@ -12,6 +12,7 @@ from iso3166 import countries
import pytz import pytz
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.v0.constants import UNKNOWN_COUNTRY, UNKNOWN_COUNTRY_CODE
from analytics_data_api.v0.models import CourseActivityWeekly from analytics_data_api.v0.models import CourseActivityWeekly
from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer
from analytics_data_api.v0.tests.utils import flatten from analytics_data_api.v0.tests.utils import flatten
...@@ -324,24 +325,23 @@ class CourseEnrollmentByLocationViewTests(CourseEnrollmentViewTestCaseMixin, Tes ...@@ -324,24 +325,23 @@ class CourseEnrollmentByLocationViewTests(CourseEnrollmentViewTestCaseMixin, Tes
def format_as_response(self, *args): def format_as_response(self, *args):
unknown = {'course_id': None, 'count': 0, 'date': None, unknown = {'course_id': None, 'count': 0, 'date': None,
'country': {'alpha2': None, 'alpha3': None, 'name': u'UNKNOWN'}} 'country': {'alpha2': None, 'alpha3': None, 'name': UNKNOWN_COUNTRY_CODE}}
for arg in args: for arg in args:
if not arg.country: if arg.country.name == UNKNOWN_COUNTRY_CODE:
unknown['course_id'] = arg.course_id unknown['course_id'] = arg.course_id
unknown['date'] = arg.date.strftime(settings.DATE_FORMAT) unknown['date'] = arg.date.strftime(settings.DATE_FORMAT)
unknown['count'] += arg.count unknown['count'] += arg.count
args = [arg for arg in args if arg.country_code not in ['', 'A1', 'A2', 'AP', 'EU', 'O1', 'UNKNOWN']] args = [arg for arg in args if arg.country != UNKNOWN_COUNTRY]
args = sorted(args, key=lambda item: (item.date, item.course_id, item.country.alpha3)) args = sorted(args, key=lambda item: (item.date, item.course_id, item.country.alpha3))
response = [
response = [unknown]
response += [
{'course_id': str(ce.course_id), 'count': ce.count, 'date': ce.date.strftime(settings.DATE_FORMAT), {'course_id': str(ce.course_id), 'count': ce.count, 'date': ce.date.strftime(settings.DATE_FORMAT),
'country': {'alpha2': ce.country.alpha2, 'alpha3': ce.country.alpha3, 'name': ce.country.name}} for ce in 'country': {'alpha2': ce.country.alpha2, 'alpha3': ce.country.alpha3, 'name': ce.country.name}} for ce in
args] args]
# Unknown comes last
response.append(unknown)
return response return response
def setUp(self): def setUp(self):
......
...@@ -332,38 +332,35 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView): ...@@ -332,38 +332,35 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView):
model = models.CourseEnrollmentByCountry model = models.CourseEnrollmentByCountry
def get_queryset(self): def get_queryset(self):
queryset = super(CourseEnrollmentByLocationView, self).get_queryset()
# Get all of the data from the database # Get all of the data from the database
queryset = super(CourseEnrollmentByLocationView, self).get_queryset()
items = queryset.all() items = queryset.all()
# Split into known and unknown # Data must be sorted in order for groupby to work properly
knowns = [] items = sorted(items, key=lambda x: x.country.alpha2)
unknowns = []
for item in items:
if item.country:
knowns.append(item)
else:
unknowns.append(item)
# Group the unknowns by date and combine the counts
for key, group in groupby(unknowns, lambda x: (x.date, x.course_id)):
date = key[0]
course_id = key[1]
# Items to be returned by this method
returned_items = []
# Group data by date, country, and course ID
for key, group in groupby(items, lambda x: (x.date, x.country.alpha2, x.course_id)):
count = 0 count = 0
date = key[0]
country_code = key[1]
course_id = key[2]
for item in group: for item in group:
count += item.count count += item.count
# pylint: disable=unexpected-keyword-arg,no-value-for-parameter # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
knowns.append(models.CourseEnrollmentByCountry( returned_items.append(models.CourseEnrollmentByCountry(
course_id=course_id, course_id=course_id,
date=date, date=date,
country_code=models.CourseEnrollmentByCountry.UNKNOWN_COUNTRY_CODE, country_code=country_code,
count=count count=count
)) ))
# Note: We are returning a list, instead of a queryset. This is # Note: We are returning a list, instead of a queryset. This is
# acceptable since the consuming code simply expects the returned # acceptable since the consuming code simply expects the returned
# value to be iterable, not necessarily a queryset. # value to be iterable, not necessarily a queryset.
return knowns return returned_items
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment