Commit 25ef8334 by Clinton Blackburn

Merge pull request #21 from edx/activity-update

Added Activity Resource
parents 0674b790 0c6b3341
...@@ -20,7 +20,7 @@ clean: ...@@ -20,7 +20,7 @@ clean:
coverage erase coverage erase
test: clean test: clean
. ./.test_env && ./manage.py test --settings=analyticsdataserver.settings.test \ . ./.test_env && ./manage.py test --settings=analyticsdataserver.settings.test --with-ignore-docstrings \
--exclude-dir=analyticsdataserver/settings --with-coverage --cover-inclusive --cover-branches \ --exclude-dir=analyticsdataserver/settings --with-coverage --cover-inclusive --cover-branches \
--cover-html --cover-html-dir=$(COVERAGE)/html/ \ --cover-html --cover-html-dir=$(COVERAGE)/html/ \
--cover-xml --cover-xml-file=$(COVERAGE)/coverage.xml \ --cover-xml --cover-xml-file=$(COVERAGE)/coverage.xml \
......
...@@ -109,7 +109,7 @@ class Command(BaseCommand): ...@@ -109,7 +109,7 @@ class Command(BaseCommand):
activity_types = ['PLAYED_VIDEO', 'ATTEMPTED_PROBLEM', 'POSTED_FORUM'] activity_types = ['PLAYED_VIDEO', 'ATTEMPTED_PROBLEM', 'POSTED_FORUM']
start = start_date start = start_date
models.CourseActivityByWeek.objects.all().delete() models.CourseActivityWeekly.objects.all().delete()
logger.info("Deleted all weekly course activity.") logger.info("Deleted all weekly course activity.")
logger.info("Generating new weekly course activity data...") logger.info("Generating new weekly course activity data...")
...@@ -121,10 +121,10 @@ class Command(BaseCommand): ...@@ -121,10 +121,10 @@ class Command(BaseCommand):
counts = constrained_sum_sample_pos(len(activity_types), active_students) counts = constrained_sum_sample_pos(len(activity_types), active_students)
for activity_type, count in zip(activity_types, counts): for activity_type, count in zip(activity_types, counts):
models.CourseActivityByWeek.objects.create(course_id=course_id, activity_type=activity_type, models.CourseActivityWeekly.objects.create(course_id=course_id, activity_type=activity_type,
count=count, interval_start=start, interval_end=end) count=count, interval_start=start, interval_end=end)
models.CourseActivityByWeek.objects.create(course_id=course_id, activity_type='ACTIVE', count=active_students, models.CourseActivityWeekly.objects.create(course_id=course_id, activity_type='ACTIVE', count=active_students,
interval_start=start, interval_end=end) interval_start=start, interval_end=end)
start = end start = end
......
...@@ -2,16 +2,18 @@ from django.db import models ...@@ -2,16 +2,18 @@ from django.db import models
from iso3166 import countries from iso3166 import countries
class CourseActivityByWeek(models.Model): class CourseActivityWeekly(models.Model):
"""A count of unique users who performed a particular action during a week.""" """A count of unique users who performed a particular action during a week."""
class Meta(object): class Meta(object):
db_table = 'course_activity' db_table = 'course_activity'
index_together = [['course_id', 'activity_type']] index_together = [['course_id', 'activity_type']]
ordering = ('interval_end', 'interval_start', 'course_id')
get_latest_by = 'interval_end'
course_id = models.CharField(db_index=True, max_length=255) course_id = models.CharField(db_index=True, max_length=255)
interval_start = models.DateTimeField() interval_start = models.DateTimeField()
interval_end = models.DateTimeField() interval_end = models.DateTimeField(db_index=True)
activity_type = models.CharField(db_index=True, max_length=255, db_column='label') activity_type = models.CharField(db_index=True, max_length=255, db_column='label')
count = models.IntegerField() count = models.IntegerField()
......
from django.conf import settings from django.conf import settings
from rest_framework import serializers from rest_framework import serializers
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.v0.models import CourseActivityWeekly
class CourseActivityByWeekSerializer(serializers.ModelSerializer): class CourseActivityByWeekSerializer(serializers.ModelSerializer):
...@@ -25,7 +26,7 @@ class CourseActivityByWeekSerializer(serializers.ModelSerializer): ...@@ -25,7 +26,7 @@ class CourseActivityByWeekSerializer(serializers.ModelSerializer):
return activity_type return activity_type
class Meta(object): class Meta(object):
model = models.CourseActivityByWeek model = models.CourseActivityWeekly
fields = ('interval_start', 'interval_end', 'activity_type', 'count', 'course_id') fields = ('interval_start', 'interval_end', 'activity_type', 'count', 'course_id')
...@@ -112,3 +113,17 @@ class CourseEnrollmentByBirthYearSerializer(BaseCourseEnrollmentModelSerializer) ...@@ -112,3 +113,17 @@ class CourseEnrollmentByBirthYearSerializer(BaseCourseEnrollmentModelSerializer)
class Meta(object): class Meta(object):
model = models.CourseEnrollmentByBirthYear model = models.CourseEnrollmentByBirthYear
fields = ('course_id', 'date', 'birth_year', 'count') fields = ('course_id', 'date', 'birth_year', 'count')
class CourseActivityWeeklySerializer(serializers.ModelSerializer):
interval_start = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
interval_end = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
any = serializers.IntegerField(required=False)
attempted_problem = serializers.IntegerField(required=False)
played_video = serializers.IntegerField(required=False)
posted_forum = serializers.IntegerField(required=False)
class Meta(object):
model = CourseActivityWeekly
fields = (
'interval_start', 'interval_end', 'course_id', 'any', 'attempted_problem', 'played_video', 'posted_forum')
...@@ -6,6 +6,7 @@ from analytics_data_api.v0.views import courses as views ...@@ -6,6 +6,7 @@ from analytics_data_api.v0.views import courses as views
COURSE_URLS = [ COURSE_URLS = [
('activity', views.CourseActivityWeeklyView, 'activity'),
('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'), ('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'),
('enrollment', views.CourseEnrollmentView, 'enrollment_latest'), ('enrollment', views.CourseEnrollmentView, 'enrollment_latest'),
('enrollment/birth_year', views.CourseEnrollmentByBirthYearView, 'enrollment_by_birth_year'), ('enrollment/birth_year', views.CourseEnrollmentByBirthYearView, 'enrollment_by_birth_year'),
......
import datetime import datetime
from itertools import groupby
import warnings
from django.conf import settings from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.db.models import Max from django.db.models import Max
from django.http import Http404 from django.http import Http404
from django.utils.timezone import make_aware, utc
from rest_framework import generics from rest_framework import generics
from analytics_data_api.v0 import models, serializers from analytics_data_api.v0 import models, serializers
class BaseCourseView(generics.ListAPIView):
start_date = None
end_date = None
def get(self, request, *args, **kwargs):
start_date = request.QUERY_PARAMS.get('start_date')
end_date = request.QUERY_PARAMS.get('end_date')
timezone = utc
if start_date:
start_date = datetime.datetime.strptime(start_date, settings.DATE_FORMAT)
start_date = make_aware(start_date, timezone)
if end_date:
end_date = datetime.datetime.strptime(end_date, settings.DATE_FORMAT)
end_date = make_aware(end_date, timezone)
self.start_date = start_date
self.end_date = end_date
return super(BaseCourseView, self).get(request, *args, **kwargs)
def verify_course_exists_or_404(self, course_id):
if self.model.objects.filter(course_id=course_id).exists():
return True
raise Http404
def apply_date_filtering(self, queryset):
raise NotImplementedError
def get_queryset(self):
course_id = self.kwargs.get('course_id')
self.verify_course_exists_or_404(course_id)
queryset = self.model.objects.filter(course_id=course_id)
queryset = self.apply_date_filtering(queryset)
return queryset
# pylint: disable=line-too-long
class CourseActivityWeeklyView(BaseCourseView):
"""
Weekly course activity
Returns the course activity. Each row/item will contain all activity types for the course-week.
<strong>Activity Types</strong>
<dl>
<dt>ANY</dt>
<dd>The number of unique users who performed any action within the course, including actions not enumerated below.</dd>
<dt>ATTEMPTED_PROBLEM</dt>
<dd>The number of unique users who answered any loncapa based question in the course.</dd>
<dt>PLAYED_VIDEO</dt>
<dd>The number of unique users who started watching any video in the course.</dd>
<dt>POSTED_FORUM</dt>
<dd>The number of unique users who created a new post, responded to a post, or submitted a comment on any forum in the course.</dd>
</dl>
If no start or end dates are passed, the data for the latest date is returned. All dates should are in the UTC zone.
Data is sorted chronologically (earliest to latest).
Date format: YYYY-mm-dd (e.g. 2014-01-31)
start_date -- Date after which all data should be returned (inclusive)
end_date -- Date before which all data should be returned (exclusive)
"""
model = models.CourseActivityWeekly
serializer_class = serializers.CourseActivityWeeklySerializer
def apply_date_filtering(self, queryset):
if self.start_date or self.end_date:
# Filter by start/end date
if self.start_date:
queryset = queryset.filter(interval_start__gte=self.start_date)
if self.end_date:
queryset = queryset.filter(interval_end__lt=self.end_date)
else:
# No date filter supplied, so only return data for the latest date
latest_date = queryset.aggregate(Max('interval_end'))
if latest_date:
latest_date = latest_date['interval_end__max']
queryset = queryset.filter(interval_end=latest_date)
return queryset
def get_queryset(self):
queryset = super(CourseActivityWeeklyView, self).get_queryset()
queryset = self.format_data(queryset)
return queryset
def _format_activity_type(self, activity_type):
activity_type = activity_type.lower()
# The data pipeline stores "any" as "active"; however, the API should display "any".
if activity_type == 'active':
activity_type = 'any'
return activity_type
def format_data(self, data):
"""
Group the data by date and combine multiple activity rows into a single row/element.
Arguments
data (iterable) -- Data to be formatted.
"""
formatted_data = []
for key, group in groupby(data, lambda x: (x.course_id, x.interval_start, x.interval_end)):
# Iterate over groups and create a single item with all activity types
item = {
u'course_id': key[0],
u'interval_start': key[1],
u'interval_end': key[2],
}
for activity in group:
activity_type = self._format_activity_type(activity.activity_type)
item[activity_type] = activity.count
formatted_data.append(item)
return formatted_data
class CourseActivityMostRecentWeekView(generics.RetrieveAPIView): class CourseActivityMostRecentWeekView(generics.RetrieveAPIView):
""" """
Counts of users who performed various actions at least once during the most recently computed week. Counts of users who performed various actions at least once during the most recently computed week.
...@@ -67,34 +197,26 @@ class CourseActivityMostRecentWeekView(generics.RetrieveAPIView): ...@@ -67,34 +197,26 @@ class CourseActivityMostRecentWeekView(generics.RetrieveAPIView):
def get_object(self, queryset=None): def get_object(self, queryset=None):
"""Select the activity report for the given course and activity type.""" """Select the activity report for the given course and activity type."""
warnings.warn('CourseActivityMostRecentWeekView has been deprecated! Use CourseActivityWeeklyView instead.',
DeprecationWarning)
course_id = self.kwargs.get('course_id') course_id = self.kwargs.get('course_id')
activity_type = self._get_activity_type() activity_type = self._get_activity_type()
try: try:
return models.CourseActivityByWeek.get_most_recent(course_id, activity_type) return models.CourseActivityWeekly.get_most_recent(course_id, activity_type)
except ObjectDoesNotExist: except ObjectDoesNotExist:
raise Http404 raise Http404
class BaseCourseEnrollmentView(generics.ListAPIView): class BaseCourseEnrollmentView(BaseCourseView):
def verify_course_exists_or_404(self, course_id):
if self.model.objects.filter(course_id=course_id).exists():
return True
raise Http404
def apply_date_filtering(self, queryset): def apply_date_filtering(self, queryset):
if 'start_date' in self.request.QUERY_PARAMS or 'end_date' in self.request.QUERY_PARAMS: if self.start_date or self.end_date:
# Filter by start/end date # Filter by start/end date
start_date = self.request.QUERY_PARAMS.get('start_date') if self.start_date:
if start_date: queryset = queryset.filter(date__gte=self.start_date)
start_date = datetime.datetime.strptime(start_date, settings.DATE_FORMAT)
queryset = queryset.filter(date__gte=start_date)
end_date = self.request.QUERY_PARAMS.get('end_date') if self.end_date:
if end_date: queryset = queryset.filter(date__lt=self.end_date)
end_date = datetime.datetime.strptime(end_date, settings.DATE_FORMAT)
queryset = queryset.filter(date__lt=end_date)
else: else:
# No date filter supplied, so only return data for the latest date # No date filter supplied, so only return data for the latest date
latest_date = queryset.aggregate(Max('date')) latest_date = queryset.aggregate(Max('date'))
...@@ -103,13 +225,6 @@ class BaseCourseEnrollmentView(generics.ListAPIView): ...@@ -103,13 +225,6 @@ class BaseCourseEnrollmentView(generics.ListAPIView):
queryset = queryset.filter(date=latest_date) queryset = queryset.filter(date=latest_date)
return queryset return queryset
def get_queryset(self):
course_id = self.kwargs.get('course_id')
self.verify_course_exists_or_404(course_id)
queryset = self.model.objects.filter(course_id=course_id)
queryset = self.apply_date_filtering(queryset)
return queryset
class CourseEnrollmentByBirthYearView(BaseCourseEnrollmentView): class CourseEnrollmentByBirthYearView(BaseCourseEnrollmentView):
""" """
......
...@@ -53,7 +53,7 @@ DATABASES = { ...@@ -53,7 +53,7 @@ DATABASES = {
########## GENERAL CONFIGURATION ########## GENERAL CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone # See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone
TIME_ZONE = 'America/New_York' TIME_ZONE = 'UTC'
# See: https://docs.djangoproject.com/en/dev/ref/settings/#language-code # See: https://docs.djangoproject.com/en/dev/ref/settings/#language-code
LANGUAGE_CODE = 'en-us' LANGUAGE_CODE = 'en-us'
...@@ -62,10 +62,10 @@ LANGUAGE_CODE = 'en-us' ...@@ -62,10 +62,10 @@ LANGUAGE_CODE = 'en-us'
SITE_ID = 1 SITE_ID = 1
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-i18n # See: https://docs.djangoproject.com/en/dev/ref/settings/#use-i18n
USE_I18N = True USE_I18N = False
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-l10n # See: https://docs.djangoproject.com/en/dev/ref/settings/#use-l10n
USE_L10N = True USE_L10N = False
# See: https://docs.djangoproject.com/en/dev/ref/settings/#use-tz # See: https://docs.djangoproject.com/en/dev/ref/settings/#use-tz
USE_TZ = True USE_TZ = True
...@@ -268,3 +268,4 @@ ENABLE_ADMIN_SITE = False ...@@ -268,3 +268,4 @@ ENABLE_ADMIN_SITE = False
########## END ANALYTICS DATA API CONFIGURATION ########## END ANALYTICS DATA API CONFIGURATION
DATE_FORMAT = '%Y-%m-%d' DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%dT%H%M%S'
...@@ -11,3 +11,4 @@ pep257==0.3.2 ...@@ -11,3 +11,4 @@ pep257==0.3.2
pep8==1.5.7 pep8==1.5.7
pylint==1.2.1 pylint==1.2.1
pytz==2012h pytz==2012h
nose-ignore-docstring==0.2
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment