Commit 5001a93a by Dennis Jen

Merge pull request #36 from edx/dsjen/aggregate-genders

Updated enrollment/gender endpoint to aggregate gender counts by day.
parents 5f21b965 8b5c66c9
Ben Patterson <bpatterson@edx.org>
Carlos Andrés Rocha <rocha@edx.org>
Clinton Blackburn <cblackburn@edx.org>
Dennis Jen <djen@edx.org>
Ed Zarecor <ed@edx.org>
Gabe Mulley <gabe@edx.org>
Jason Bau <jbau@stanford.edu>
......
......@@ -2,3 +2,8 @@ from iso3166 import Country
UNKNOWN_COUNTRY_CODE = u'UNKNOWN'
UNKNOWN_COUNTRY = Country(UNKNOWN_COUNTRY_CODE, None, None, None)
FEMALE_GENDER = u'female'
MALE_GENDER = u'male'
OTHER_GENDER = u'other'
UNKNOWN_GENDER = u'unknown'
from django.db import models
from iso3166 import countries
from analytics_data_api.v0.constants import UNKNOWN_COUNTRY
from analytics_data_api.v0.constants import UNKNOWN_COUNTRY, FEMALE_GENDER, MALE_GENDER, OTHER_GENDER, UNKNOWN_GENDER
class CourseActivityWeekly(models.Model):
......@@ -75,7 +75,20 @@ class CourseEnrollmentByEducation(BaseCourseEnrollment):
class CourseEnrollmentByGender(BaseCourseEnrollment):
gender = models.CharField(max_length=255, null=False)
CLEANED_GENDERS = {
'f': FEMALE_GENDER,
'm': MALE_GENDER,
'o': OTHER_GENDER
}
gender = models.CharField(max_length=255, null=True, db_column='gender')
@property
def cleaned_gender(self):
"""
Returns the gender with full names and 'unknown' replacing null/None.
"""
return self.CLEANED_GENDERS.get(self.gender, UNKNOWN_GENDER)
class Meta(BaseCourseEnrollment.Meta):
db_table = 'course_enrollment_gender_daily'
......
from django.conf import settings
from rest_framework import serializers
from analytics_data_api.v0 import models
from analytics_data_api.v0.models import CourseActivityWeekly
class CourseActivityByWeekSerializer(serializers.ModelSerializer):
......@@ -129,9 +128,14 @@ class CourseEnrollmentByCountrySerializer(BaseCourseEnrollmentModelSerializer):
class CourseEnrollmentByGenderSerializer(BaseCourseEnrollmentModelSerializer):
female = serializers.IntegerField(required=False)
male = serializers.IntegerField(required=False)
other = serializers.IntegerField(required=False)
unknown = serializers.IntegerField(required=False)
class Meta(object):
model = models.CourseEnrollmentByGender
fields = ('course_id', 'date', 'gender', 'count', 'created')
fields = ('course_id', 'date', 'female', 'male', 'other', 'unknown', 'created')
class CourseEnrollmentByEducationSerializer(BaseCourseEnrollmentModelSerializer):
......@@ -158,6 +162,6 @@ class CourseActivityWeeklySerializer(serializers.ModelSerializer):
created = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
class Meta(object):
model = CourseActivityWeekly
model = models.CourseActivityWeekly
# TODO: Add 'posted_forum' here to restore forum data
fields = ('interval_start', 'interval_end', 'course_id', 'any', 'attempted_problem', 'played_video', 'created')
......@@ -316,18 +316,40 @@ class CourseEnrollmentByGenderViewTests(CourseEnrollmentViewTestCaseMixin, TestC
def generate_data(self, course_id=None):
course_id = course_id or self.course_id
G(self.model, course_id=course_id, gender='m', date=self.date, count=34)
G(self.model, course_id=course_id, gender='f', date=self.date, count=45)
G(self.model, course_id=course_id, gender='f', date=self.date - datetime.timedelta(days=2), count=45)
genders = ['f', 'm', 'o', None]
days = 2
for day in range(days):
for gender in genders:
G(self.model,
course_id=course_id,
date=self.date - datetime.timedelta(days=day),
gender=gender,
count=100 + day)
def setUp(self):
super(CourseEnrollmentByGenderViewTests, self).setUp()
self.generate_data()
def format_as_response(self, *args):
return [
{'course_id': unicode(ce.course_id), 'count': ce.count, 'date': ce.date.strftime(settings.DATE_FORMAT),
'gender': ce.gender, 'created': ce.created.strftime(settings.DATETIME_FORMAT)} for ce in args]
response = []
# Group by date
for _key, group in groupby(args, lambda x: x.date):
# Iterate over groups and create a single item with genders
item = {}
for enrollment in group:
item.update({
'created': enrollment.created.strftime(settings.DATETIME_FORMAT),
'course_id': unicode(enrollment.course_id),
'date': enrollment.date.strftime(settings.DATE_FORMAT),
enrollment.cleaned_gender: enrollment.count
})
response.append(item)
return response
# pylint: disable=no-member,no-value-for-parameter
......
......@@ -83,7 +83,7 @@ class CourseActivityWeeklyView(BaseCourseView):
<dd>The number of unique users who created a new post, responded to a post, or submitted a comment on any forum in the course.</dd>
</dl>
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......@@ -248,7 +248,7 @@ class CourseEnrollmentByBirthYearView(BaseCourseEnrollmentView):
Returns the enrollment of a course with users binned by their birth years.
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......@@ -269,7 +269,7 @@ class CourseEnrollmentByEducationView(BaseCourseEnrollmentView):
Returns the enrollment of a course with users binned by their education levels.
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......@@ -287,14 +287,15 @@ class CourseEnrollmentByGenderView(BaseCourseEnrollmentView):
"""
Course enrollment broken down by user gender
Returns the enrollment of a course with users binned by their genders.
Returns the enrollment of a course where each row/item contains user genders for the day.
Genders:
m - male
f - female
o - other
male
female
other
unknown
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......@@ -307,12 +308,35 @@ class CourseEnrollmentByGenderView(BaseCourseEnrollmentView):
serializer_class = serializers.CourseEnrollmentByGenderSerializer
model = models.CourseEnrollmentByGender
def get_queryset(self):
queryset = super(CourseEnrollmentByGenderView, self).get_queryset()
formatted_data = []
for key, group in groupby(queryset, lambda x: (x.course_id, x.date)):
# Iterate over groups and create a single item with gender data
item = {
u'course_id': key[0],
u'date': key[1],
u'created': None
}
for enrollment in group:
gender = enrollment.cleaned_gender.lower()
count = item.get(gender, 0)
count += enrollment.count
item[gender] = count
item[u'created'] = max(enrollment.created, item[u'created']) if item[u'created'] else enrollment.created
formatted_data.append(item)
return formatted_data
class CourseEnrollmentView(BaseCourseEnrollmentView):
"""
Returns the enrollment count for the specified course.
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......@@ -337,7 +361,7 @@ class CourseEnrollmentByLocationView(BaseCourseEnrollmentView):
Countries are denoted by their <a href="http://www.iso.org/iso/country_codes/country_codes" target="_blank">ISO 3166 country code</a>.
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
If no start or end dates are passed, the data for the latest date is returned. All dates are in the UTC zone.
Data is sorted chronologically (earliest to latest).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment