Commit 0c6b3341 by Clinton Blackburn

Added Activity Resource

This replaces the now-deprecated recent activity resource. The new resources follows a similar data format to that of the enrollment resources. It now returns an array. When no parameters are passed with the request, only the most-recent data is returned.
parent 5e6dedcd
......@@ -109,7 +109,7 @@ class Command(BaseCommand):
activity_types = ['PLAYED_VIDEO', 'ATTEMPTED_PROBLEM', 'POSTED_FORUM']
start = start_date
models.CourseActivityByWeek.objects.all().delete()
models.CourseActivityWeekly.objects.all().delete()
logger.info("Deleted all weekly course activity.")
logger.info("Generating new weekly course activity data...")
......@@ -121,10 +121,10 @@ class Command(BaseCommand):
counts = constrained_sum_sample_pos(len(activity_types), active_students)
for activity_type, count in zip(activity_types, counts):
models.CourseActivityByWeek.objects.create(course_id=course_id, activity_type=activity_type,
models.CourseActivityWeekly.objects.create(course_id=course_id, activity_type=activity_type,
count=count, interval_start=start, interval_end=end)
models.CourseActivityByWeek.objects.create(course_id=course_id, activity_type='ACTIVE', count=active_students,
models.CourseActivityWeekly.objects.create(course_id=course_id, activity_type='ACTIVE', count=active_students,
interval_start=start, interval_end=end)
start = end
......
......@@ -2,16 +2,18 @@ from django.db import models
from iso3166 import countries
class CourseActivityByWeek(models.Model):
class CourseActivityWeekly(models.Model):
"""A count of unique users who performed a particular action during a week."""
class Meta(object):
db_table = 'course_activity'
index_together = [['course_id', 'activity_type']]
ordering = ('interval_end', 'interval_start', 'course_id')
get_latest_by = 'interval_end'
course_id = models.CharField(db_index=True, max_length=255)
interval_start = models.DateTimeField()
interval_end = models.DateTimeField()
interval_end = models.DateTimeField(db_index=True)
activity_type = models.CharField(db_index=True, max_length=255, db_column='label')
count = models.IntegerField()
......
from django.conf import settings
from rest_framework import serializers
from analytics_data_api.v0 import models
from analytics_data_api.v0.models import CourseActivityWeekly
class CourseActivityByWeekSerializer(serializers.ModelSerializer):
......@@ -25,7 +26,7 @@ class CourseActivityByWeekSerializer(serializers.ModelSerializer):
return activity_type
class Meta(object):
model = models.CourseActivityByWeek
model = models.CourseActivityWeekly
fields = ('interval_start', 'interval_end', 'activity_type', 'count', 'course_id')
......@@ -112,3 +113,17 @@ class CourseEnrollmentByBirthYearSerializer(BaseCourseEnrollmentModelSerializer)
class Meta(object):
model = models.CourseEnrollmentByBirthYear
fields = ('course_id', 'date', 'birth_year', 'count')
class CourseActivityWeeklySerializer(serializers.ModelSerializer):
interval_start = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
interval_end = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
any = serializers.IntegerField(required=False)
attempted_problem = serializers.IntegerField(required=False)
played_video = serializers.IntegerField(required=False)
posted_forum = serializers.IntegerField(required=False)
class Meta(object):
model = CourseActivityWeekly
fields = (
'interval_start', 'interval_end', 'course_id', 'any', 'attempted_problem', 'played_video', 'posted_forum')
......@@ -4,6 +4,7 @@
import StringIO
import csv
import datetime
from itertools import groupby
from django.conf import settings
from django_dynamic_fixture import G
......@@ -11,6 +12,7 @@ from iso3166 import countries
import pytz
from analytics_data_api.v0 import models
from analytics_data_api.v0.models import CourseActivityWeekly
from analytics_data_api.v0.serializers import ProblemResponseAnswerDistributionSerializer
from analytics_data_api.v0.tests.utils import flatten
from analyticsdataserver.tests import TestCaseWithAuthentication
......@@ -89,25 +91,28 @@ class CourseViewTestCaseMixin(object):
def assertIntervalFilteringWorks(self, expected_response, start_date, end_date):
# If start date is after date of existing data, no data should be returned
date = (start_date + datetime.timedelta(days=30)).strftime(settings.DATE_FORMAT)
response = self.authenticated_get('%scourses/%s%s?start_date=%s' % (self.api_root_path, self.course_id, self.path, date))
response = self.authenticated_get(
'%scourses/%s%s?start_date=%s' % (self.api_root_path, self.course_id, self.path, date))
self.assertEquals(response.status_code, 200)
self.assertListEqual([], response.data)
# If end date is before date of existing data, no data should be returned
date = (start_date - datetime.timedelta(days=30)).strftime(settings.DATE_FORMAT)
response = self.authenticated_get('%scourses/%s%s?end_date=%s' % (self.api_root_path, self.course_id, self.path, date))
response = self.authenticated_get(
'%scourses/%s%s?end_date=%s' % (self.api_root_path, self.course_id, self.path, date))
self.assertEquals(response.status_code, 200)
self.assertListEqual([], response.data)
# If data falls in date range, data should be returned
start_date = start_date.strftime(settings.DATE_FORMAT)
end_date = end_date.strftime(settings.DATE_FORMAT)
response = self.authenticated_get(
'%scourses/%s%s?start_date=%s&end_date=%s' % (self.api_root_path, self.course_id, self.path, start_date, end_date))
response = self.authenticated_get('%scourses/%s%s?start_date=%s&end_date=%s' % (
self.api_root_path, self.course_id, self.path, start_date, end_date))
self.assertEquals(response.status_code, 200)
self.assertListEqual(response.data, expected_response)
# pylint: disable=abstract-method
class CourseEnrollmentViewTestCaseMixin(CourseViewTestCaseMixin):
def setUp(self):
super(CourseEnrollmentViewTestCaseMixin, self).setUp()
......@@ -127,18 +132,18 @@ class CourseActivityLastWeekTest(TestCaseWithAuthentication):
def setUp(self):
super(CourseActivityLastWeekTest, self).setUp()
self.course_id = 'edX/DemoX/Demo_Course'
interval_start = '2014-05-24T00:00:00Z'
interval_end = '2014-06-01T00:00:00Z'
G(models.CourseActivityByWeek, course_id=self.course_id, interval_start=interval_start,
interval_start = datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)
interval_end = interval_start + datetime.timedelta(weeks=1)
G(models.CourseActivityWeekly, course_id=self.course_id, interval_start=interval_start,
interval_end=interval_end,
activity_type='POSTED_FORUM', count=100)
G(models.CourseActivityByWeek, course_id=self.course_id, interval_start=interval_start,
G(models.CourseActivityWeekly, course_id=self.course_id, interval_start=interval_start,
interval_end=interval_end,
activity_type='ATTEMPTED_PROBLEM', count=200)
G(models.CourseActivityByWeek, course_id=self.course_id, interval_start=interval_start,
G(models.CourseActivityWeekly, course_id=self.course_id, interval_start=interval_start,
interval_end=interval_end,
activity_type='ACTIVE', count=300)
G(models.CourseActivityByWeek, course_id=self.course_id, interval_start=interval_start,
G(models.CourseActivityWeekly, course_id=self.course_id, interval_start=interval_start,
interval_end=interval_end,
activity_type='PLAYED_VIDEO', count=400)
......@@ -157,8 +162,8 @@ class CourseActivityLastWeekTest(TestCaseWithAuthentication):
def get_activity_record(**kwargs):
default = {
'course_id': 'edX/DemoX/Demo_Course',
'interval_start': datetime.datetime(2014, 5, 24, 0, 0, tzinfo=pytz.utc),
'interval_end': datetime.datetime(2014, 6, 1, 0, 0, tzinfo=pytz.utc),
'interval_start': datetime.datetime(2014, 1, 1, 0, 0, tzinfo=pytz.utc),
'interval_end': datetime.datetime(2014, 1, 8, 0, 0, tzinfo=pytz.utc),
'activity_type': 'any',
'count': 300,
}
......@@ -208,7 +213,7 @@ class CourseEnrollmentByBirthYearViewTests(CourseEnrollmentViewTestCaseMixin, Te
path = '/enrollment/birth_year'
model = models.CourseEnrollmentByBirthYear
order_by = ['birth_year']
def setUp(self):
super(CourseEnrollmentByBirthYearViewTests, self).setUp()
G(self.model, course_id=self.course_id, date=self.date, birth_year=1956)
......@@ -228,10 +233,6 @@ class CourseEnrollmentByBirthYearViewTests(CourseEnrollmentViewTestCaseMixin, Te
expected = self.format_as_response(*self.model.objects.filter(date=self.date))
self.assertEquals(response.data, expected)
def test_get_with_intervals(self):
expected = self.format_as_response(*self.model.objects.filter(date=self.date))
self.assertIntervalFilteringWorks(expected, self.date, self.date + datetime.timedelta(days=1))
class CourseEnrollmentByEducationViewTests(CourseEnrollmentViewTestCaseMixin, TestCaseWithAuthentication):
path = '/enrollment/education/'
......@@ -334,10 +335,77 @@ class CourseEnrollmentByLocationViewTests(CourseEnrollmentViewTestCaseMixin, Tes
self.country = countries.get('US')
G(self.model, course_id=self.course_id, country_code='US', count=455, date=self.date)
G(self.model, course_id=self.course_id, country_code='CA', count=356, date=self.date)
G(self.model, course_id=self.course_id, country_code='IN', count=12, date=self.date - datetime.timedelta(days=29))
G(self.model, course_id=self.course_id, country_code='IN', count=12,
date=self.date - datetime.timedelta(days=29))
G(self.model, course_id=self.course_id, country_code='', count=356, date=self.date)
G(self.model, course_id=self.course_id, country_code='A1', count=1, date=self.date)
G(self.model, course_id=self.course_id, country_code='A2', count=2, date=self.date)
G(self.model, course_id=self.course_id, country_code='AP', count=1, date=self.date)
G(self.model, course_id=self.course_id, country_code='EU', count=4, date=self.date)
G(self.model, course_id=self.course_id, country_code='O1', count=7, date=self.date)
class CourseActivityWeeklyViewTests(CourseViewTestCaseMixin, TestCaseWithAuthentication):
path = '/activity/'
default_order_by = 'interval_end'
model = CourseActivityWeekly
activity_types = ['ACTIVE', 'ATTEMPTED_PROBLEM', 'PLAYED_VIDEO', 'POSTED_FORUM']
def setUp(self):
super(CourseActivityWeeklyViewTests, self).setUp()
self.course_id = 'edX/DemoX/Demo_Course'
self.interval_start = datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)
self.interval_end = self.interval_start + datetime.timedelta(weeks=1)
for activity_type in self.activity_types:
G(CourseActivityWeekly,
course_id=self.course_id,
interval_start=self.interval_start,
interval_end=self.interval_end,
activity_type=activity_type,
count=100)
def get_latest_data(self):
return self.model.objects.filter(course_id=self.course_id, interval_end=self.interval_end)
def format_as_response(self, *args):
response = []
# Group by date
for _key, group in groupby(args, lambda x: x.interval_end):
# Iterate over groups and create a single item with all activity types
item = {}
for activity in group:
activity_type = activity.activity_type.lower()
if activity_type == 'active':
activity_type = 'any'
item.update({
u'course_id': activity.course_id,
u'interval_start': activity.interval_start.strftime(settings.DATETIME_FORMAT),
u'interval_end': activity.interval_end.strftime(settings.DATETIME_FORMAT),
activity_type: activity.count
})
response.append(item)
return response
def test_get_with_intervals(self):
""" Verify the endpoint returns multiple data points when supplied with an interval of dates. """
# Create additional data
interval_start = self.interval_start + datetime.timedelta(weeks=1)
interval_end = self.interval_end + datetime.timedelta(weeks=1)
for activity_type in self.activity_types:
G(CourseActivityWeekly,
course_id=self.course_id,
interval_start=interval_start,
interval_end=interval_end,
activity_type=activity_type,
count=200)
expected = self.format_as_response(*self.model.objects.all())
self.assertEqual(len(expected), 2)
self.assertIntervalFilteringWorks(expected, self.interval_start, interval_end + datetime.timedelta(days=1))
......@@ -6,6 +6,7 @@ from analytics_data_api.v0.views import courses as views
COURSE_URLS = [
('activity', views.CourseActivityWeeklyView, 'activity'),
('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'),
('enrollment', views.CourseEnrollmentView, 'enrollment_latest'),
('enrollment/birth_year', views.CourseEnrollmentByBirthYearView, 'enrollment_by_birth_year'),
......
import datetime
from itertools import groupby
import warnings
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db.models import Max
from django.http import Http404
from django.utils.timezone import make_aware, utc
from rest_framework import generics
from analytics_data_api.v0 import models, serializers
class BaseCourseView(generics.ListAPIView):
start_date = None
end_date = None
def get(self, request, *args, **kwargs):
start_date = request.QUERY_PARAMS.get('start_date')
end_date = request.QUERY_PARAMS.get('end_date')
timezone = utc
if start_date:
start_date = datetime.datetime.strptime(start_date, settings.DATE_FORMAT)
start_date = make_aware(start_date, timezone)
if end_date:
end_date = datetime.datetime.strptime(end_date, settings.DATE_FORMAT)
end_date = make_aware(end_date, timezone)
self.start_date = start_date
self.end_date = end_date
return super(BaseCourseView, self).get(request, *args, **kwargs)
def verify_course_exists_or_404(self, course_id):
if self.model.objects.filter(course_id=course_id).exists():
return True
raise Http404
def apply_date_filtering(self, queryset):
raise NotImplementedError
def get_queryset(self):
course_id = self.kwargs.get('course_id')
self.verify_course_exists_or_404(course_id)
queryset = self.model.objects.filter(course_id=course_id)
queryset = self.apply_date_filtering(queryset)
return queryset
# pylint: disable=line-too-long
class CourseActivityWeeklyView(BaseCourseView):
"""
Weekly course activity
Returns the course activity. Each row/item will contain all activity types for the course-week.
<strong>Activity Types</strong>
<dl>
<dt>ANY</dt>
<dd>The number of unique users who performed any action within the course, including actions not enumerated below.</dd>
<dt>ATTEMPTED_PROBLEM</dt>
<dd>The number of unique users who answered any loncapa based question in the course.</dd>
<dt>PLAYED_VIDEO</dt>
<dd>The number of unique users who started watching any video in the course.</dd>
<dt>POSTED_FORUM</dt>
<dd>The number of unique users who created a new post, responded to a post, or submitted a comment on any forum in the course.</dd>
</dl>
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
Data is sorted chronologically (earliest to latest).
Date format: YYYY-mm-dd (e.g. 2014-01-31)
start_date -- Date after which all data should be returned (inclusive)
end_date -- Date before which all data should be returned (exclusive)
"""
model = models.CourseActivityWeekly
serializer_class = serializers.CourseActivityWeeklySerializer
def apply_date_filtering(self, queryset):
if self.start_date or self.end_date:
# Filter by start/end date
if self.start_date:
queryset = queryset.filter(interval_start__gte=self.start_date)
if self.end_date:
queryset = queryset.filter(interval_end__lt=self.end_date)
else:
# No date filter supplied, so only return data for the latest date
latest_date = queryset.aggregate(Max('interval_end'))
if latest_date:
latest_date = latest_date['interval_end__max']
queryset = queryset.filter(interval_end=latest_date)
return queryset
def get_queryset(self):
queryset = super(CourseActivityWeeklyView, self).get_queryset()
queryset = self.format_data(queryset)
return queryset
def _format_activity_type(self, activity_type):
activity_type = activity_type.lower()
# The data pipeline stores "any" as "active"; however, the API should display "any".
if activity_type == 'active':
activity_type = 'any'
return activity_type
def format_data(self, data):
"""
Group the data by date and combine multiple activity rows into a single row/element.
Arguments
data (iterable) -- Data to be formatted.
"""
formatted_data = []
for key, group in groupby(data, lambda x: (x.course_id, x.interval_start, x.interval_end)):
# Iterate over groups and create a single item with all activity types
item = {
u'course_id': key[0],
u'interval_start': key[1],
u'interval_end': key[2],
}
for activity in group:
activity_type = self._format_activity_type(activity.activity_type)
item[activity_type] = activity.count
formatted_data.append(item)
return formatted_data
class CourseActivityMostRecentWeekView(generics.RetrieveAPIView):
"""
Counts of users who performed various actions at least once during the most recently computed week.
......@@ -67,34 +197,26 @@ class CourseActivityMostRecentWeekView(generics.RetrieveAPIView):
def get_object(self, queryset=None):
"""Select the activity report for the given course and activity type."""
warnings.warn('CourseActivityMostRecentWeekView has been deprecated! Use CourseActivityWeeklyView instead.',
DeprecationWarning)
course_id = self.kwargs.get('course_id')
activity_type = self._get_activity_type()
try:
return models.CourseActivityByWeek.get_most_recent(course_id, activity_type)
return models.CourseActivityWeekly.get_most_recent(course_id, activity_type)
except ObjectDoesNotExist:
raise Http404
class BaseCourseEnrollmentView(generics.ListAPIView):
def verify_course_exists_or_404(self, course_id):
if self.model.objects.filter(course_id=course_id).exists():
return True
raise Http404
class BaseCourseEnrollmentView(BaseCourseView):
def apply_date_filtering(self, queryset):
if 'start_date' in self.request.QUERY_PARAMS or 'end_date' in self.request.QUERY_PARAMS:
if self.start_date or self.end_date:
# Filter by start/end date
start_date = self.request.QUERY_PARAMS.get('start_date')
if start_date:
start_date = datetime.datetime.strptime(start_date, settings.DATE_FORMAT)
queryset = queryset.filter(date__gte=start_date)
end_date = self.request.QUERY_PARAMS.get('end_date')
if end_date:
end_date = datetime.datetime.strptime(end_date, settings.DATE_FORMAT)
queryset = queryset.filter(date__lt=end_date)
if self.start_date:
queryset = queryset.filter(date__gte=self.start_date)
if self.end_date:
queryset = queryset.filter(date__lt=self.end_date)
else:
# No date filter supplied, so only return data for the latest date
latest_date = queryset.aggregate(Max('date'))
......@@ -103,13 +225,6 @@ class BaseCourseEnrollmentView(generics.ListAPIView):
queryset = queryset.filter(date=latest_date)
return queryset
def get_queryset(self):
course_id = self.kwargs.get('course_id')
self.verify_course_exists_or_404(course_id)
queryset = self.model.objects.filter(course_id=course_id)
queryset = self.apply_date_filtering(queryset)
return queryset
class CourseEnrollmentByBirthYearView(BaseCourseEnrollmentView):
"""
......
......@@ -53,7 +53,7 @@ DATABASES = {
########## GENERAL CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone
TIME_ZONE = 'America/New_York'
TIME_ZONE = 'UTC'
# See: https://docs.djangoproject.com/en/dev/ref/settings/#language-code
LANGUAGE_CODE = 'en-us'
......@@ -62,10 +62,10 @@ LANGUAGE_CODE = 'en-us'
SITE_ID = 1
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-i18n
USE_I18N = True
USE_I18N = False
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-l10n
USE_L10N = True
USE_L10N = False
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-tz
USE_TZ = True
......@@ -268,3 +268,4 @@ ENABLE_ADMIN_SITE = False
########## END ANALYTICS DATA API CONFIGURATION
DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%dT%H%M%S'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment