Commit d7c39e6e by Gabe Mulley

Course user activity

Change-Id: I6dac7d5bb0d041728c64c09e1999fb61339bee92
parent fe7f3099
......@@ -2,6 +2,7 @@
ROOT = $(shell echo "$$PWD")
COVERAGE = $(ROOT)/build/coverage
PACKAGES = analyticsdata analyticsdataclient
DATABASES = default analytics
validate: test.requirements test quality
......@@ -45,3 +46,6 @@ quality:
# Ignore module level docstrings and all test files
pep257 --ignore=D100 --match='(?!test).*py' $(PACKAGES)
syncdb:
$(foreach db_name,$(DATABASES),./manage.py syncdb --migrate --database=$(db_name);)
[
{
"pk": 40,
"model": "analyticsdata.courseactivitybyweek",
"fields": {
"course_id": "edX/DemoX/Demo_Course",
"interval_start": "2014-05-24T00:00:00Z",
"label": "POSTED_FORUM",
"count": 100,
"interval_end": "2014-06-01T00:00:00Z"
}
},
{
"pk": 106,
"model": "analyticsdata.courseactivitybyweek",
"fields": {
"course_id": "edX/DemoX/Demo_Course",
"interval_start": "2014-05-24T00:00:00Z",
"label": "ATTEMPTED_PROBLEM",
"count": 200,
"interval_end": "2014-06-01T00:00:00Z"
}
},
{
"pk": 201,
"model": "analyticsdata.courseactivitybyweek",
"fields": {
"course_id": "edX/DemoX/Demo_Course",
"interval_start": "2014-05-24T00:00:00Z",
"label": "ACTIVE",
"count": 300,
"interval_end": "2014-06-01T00:00:00Z"
}
},
{
"pk": 725,
"model": "analyticsdata.courseactivitybyweek",
"fields": {
"course_id": "edX/DemoX/Demo_Course",
"interval_start": "2014-05-24T00:00:00Z",
"label": "PLAYED_VIDEO",
"count": 400,
"interval_end": "2014-06-01T00:00:00Z"
}
}
]
\ No newline at end of file
from django.db import models
from rest_framework import serializers
class CourseActivityByWeek(models.Model):
"""A count of unique users who performed a particular action during a week."""
db_from_setting = 'ANALYTICS_DATABASE'
class Meta: # pylint: disable=old-style-class
db_table = 'course_activity'
course_id = models.CharField(db_index=True, max_length=255)
interval_start = models.DateTimeField()
interval_end = models.DateTimeField()
label = models.CharField(db_index=True, max_length=255)
count = models.IntegerField()
@classmethod
def get_most_recent(cls, course_id, label):
"""Activity for the week that was mostly recently computed."""
return cls.objects.filter(course_id=course_id, label=label).latest('interval_end')
class CourseActivityByWeekSerializer(serializers.ModelSerializer):
"""
Representation of CourseActivityByWeek that excludes the id field.
This table is managed by the data pipeline, and records can be removed and added at any time. The id for a
particular record is likely to change unexpectedly so we avoid exposing it.
"""
class Meta: # pylint: disable=old-style-class
model = CourseActivityByWeek
fields = ('course_id', 'interval_start', 'interval_end', 'label', 'count')
from contextlib import contextmanager
from datetime import datetime
from functools import partial
from django.conf import settings
from django.contrib.auth.models import User
......@@ -7,6 +9,7 @@ from django.db.utils import ConnectionHandler
from django.test import TestCase
from django.test.utils import override_settings
from mock import patch
import pytz
from rest_framework.authtoken.models import Token
from analyticsdata.views import handle_internal_server_error, handle_missing_resource_error
......@@ -16,7 +19,16 @@ from analyticsdata.views import handle_internal_server_error, handle_missing_res
# for subsequent versions if there are breaking changes introduced in those versions.
class OperationalEndpointsTest(TestCase):
class TestCaseWithAutenticatation(TestCase):
def setUp(self):
super(TestCaseWithAutenticatation, self).setUp()
test_user = User.objects.create_user('tester', 'test@example.com', 'testpassword')
token = Token.objects.create(user=test_user)
self.authenticated_get = partial(self.client.get, HTTP_AUTHORIZATION='Token ' + token.key)
class OperationalEndpointsTest(TestCaseWithAutenticatation):
def test_status(self):
response = self.client.get('/api/v0/status')
......@@ -27,9 +39,7 @@ class OperationalEndpointsTest(TestCase):
self.assertEquals(response.status_code, 401)
def test_authentication_check_success(self):
test_user = User.objects.create_user('tester', 'test@example.com', 'testpassword')
token = Token.objects.create(user=test_user)
response = self.client.get('/api/v0/authenticated', HTTP_AUTHORIZATION='Token ' + token.key)
response = self.authenticated_get('/api/v0/authenticated')
self.assertEquals(response.status_code, 200)
def test_health(self):
......@@ -97,3 +107,55 @@ class ErrorHandlingTest(TestCase):
def test_missing_resource_handling(self):
response = handle_missing_resource_error(None)
self.validate_error_response(response, 404)
class CourseActivityLastWeekTest(TestCaseWithAutenticatation):
fixtures = ['single_course_activity']
COURSE_ID = 'edX/DemoX/Demo_Course'
def test_activity(self):
response = self.authenticated_get('/api/v0/courses/{0}/recent_activity'.format(self.COURSE_ID))
self.assertEquals(response.status_code, 200)
self.assertEquals(response.data, self.get_activity_record())
@staticmethod
def get_activity_record(**kwargs):
default = {
'course_id': 'edX/DemoX/Demo_Course',
'interval_start': datetime(2014, 5, 24, 0, 0, tzinfo=pytz.utc),
'interval_end': datetime(2014, 6, 1, 0, 0, tzinfo=pytz.utc),
'label': 'ACTIVE',
'count': 300,
}
default.update(kwargs)
return default
def test_activity_auth(self):
response = self.client.get('/api/v0/courses/{0}/recent_activity'.format(self.COURSE_ID))
self.assertEquals(response.status_code, 401)
def test_url_encoded_course_id(self):
response = self.authenticated_get('/api/v0/courses/edX%2FDemoX%2FDemo_Course/recent_activity')
self.assertEquals(response.status_code, 200)
self.assertEquals(response.data, self.get_activity_record())
def test_video_activity(self):
label = 'PLAYED_VIDEO'
response = self.authenticated_get('/api/v0/courses/{0}/recent_activity?label={1}'.format(self.COURSE_ID, label))
self.assertEquals(response.status_code, 200)
self.assertEquals(response.data, self.get_activity_record(label=label, count=400))
def test_unknown_activity(self):
label = 'MISSING_ACTIVITY_TYPE'
response = self.authenticated_get('/api/v0/courses/{0}/recent_activity?label={1}'.format(self.COURSE_ID, label))
self.assertEquals(response.status_code, 404)
def test_unknown_course_id(self):
response = self.authenticated_get('/api/v0/courses/{0}/recent_activity'.format('foo'))
self.assertEquals(response.status_code, 404)
def test_missing_course_id(self):
response = self.authenticated_get('/api/v0/courses/recent_activity')
self.assertEquals(response.status_code, 404)
......@@ -7,6 +7,9 @@ urlpatterns = patterns(
url(r'^status$', views.status),
url(r'^authenticated$', views.authenticated),
url(r'^health$', views.health),
# Course Activity
url(r'^courses/(?P<course_id>.+)/recent_activity$', views.CourseActivityMostRecentWeekView.as_view())
)
urlpatterns = format_suffix_patterns(urlpatterns)
from rest_framework import generics
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import connections
from django.http import HttpResponse
from django.http import HttpResponse, Http404
from analyticsdata.models import CourseActivityByWeek
from analyticsdata.models import CourseActivityByWeekSerializer
@api_view(['GET'])
......@@ -90,3 +96,50 @@ def _handle_error(status_code):
renderer = JSONRenderer()
content_type = '{media}; charset={charset}'.format(media=renderer.media_type, charset=renderer.charset)
return HttpResponse(renderer.render(info), content_type=content_type, status=status_code)
class CourseActivityMostRecentWeekView(generics.RetrieveAPIView):
"""
Counts of users who performed various actions at least once during the most recently computed week.
The default is all users who performed *any* action in the course.
The representation has the following fields:
- course_id: The ID of the course whose activity is described.
- interval_start: All data from this timestamp up to the `interval_end` was considered when computing this data
point.
- interval_end: All data from `interval_start` up to this timestamp was considered when computing this data point.
Note that data produced at exactly this time is **not** included.
- label: The type of activity requested. Possible values are:
- ACTIVE: The number of unique users who performed any action within the course, including actions not
enumerated below.
- PLAYED_VIDEO: The number of unique users who started watching any video in the course.
- ATTEMPTED_PROBLEM: The number of unique users who answered any loncapa based question in the course.
- POSTED_FORUM: The number of unique users who created a new post, responded to a post, or submitted a comment
on any forum in the course.
- count: The number of users who performed the activity indicated by the `label`.
Parameters:
- course_id (string): Unique identifier for the course.
- label (string): The type of activity. Possible values:
- `ACTIVE`
- `PLAYED_VIDEO`
- `ATTEMPTED_PROBLEM`
- `POSTED_FORUM`
"""
serializer_class = CourseActivityByWeekSerializer
def get_object(self): # pylint: disable=arguments-differ
"""Select the activity report for the given course and label."""
course_id = self.kwargs.get('course_id')
label = self.request.QUERY_PARAMS.get('label', 'ACTIVE')
try:
return CourseActivityByWeek.get_most_recent(course_id, label)
except ObjectDoesNotExist:
raise Http404
class Course(object):
"""Course scoped analytics."""
# TODO: Should we have an acceptance test that runs the hadoop job to populate the database, serves the data with
# the API server and uses the client to retrieve it and validate the various transports?
def __init__(self, client, course_key):
"""
Initialize the CourseUserActivity.
Arguments:
client (analyticsdataclient.client.Client): The client to use to access remote resources.
course_key (mixed): An object that when passed to unicode() returns the unique identifier for the course as
it is represented in the data pipeline results.
"""
self.client = client
self.course_key = course_key
@property
def recent_active_user_count(self):
"""A count of users who have recently interacted with the course in any way."""
# TODO: should we return something more structured than a python dict?
return self.client.get('courses/{0}/recent_activity'.format(unicode(self.course_key)))
@property
def recent_problem_activity_count(self):
"""A count of users who have recently attempted a problem."""
# TODO: Can we avoid passing around strings like "ATTEMPTED_PROBLEM" in the data pipeline and the client?
return self.client.get(
'courses/{0}/recent_activity?label=ATTEMPTED_PROBLEM'.format(unicode(self.course_key)))
from analyticsdataclient.client import Client, ClientError
class InMemoryClient(Client):
"""Serves resources that have previously been set and stored in memory."""
def __init__(self):
"""Initialize the fake client."""
super(InMemoryClient, self).__init__()
self.resources = {}
def has_resource(self, resource, timeout=None):
"""Return True iff the resource has been previously set."""
try:
self.get(resource, timeout=timeout)
return True
except ClientError:
return False
def get(self, resource, timeout=None):
"""Return the resource from memory."""
try:
return self.resources[resource]
except KeyError:
raise ClientError('Unable to find requested resource')
from unittest import TestCase
from analyticsdataclient.course import Course
from analyticsdataclient.tests import InMemoryClient
class CourseTest(TestCase):
def setUp(self):
self.client = InMemoryClient()
self.course = Course(self.client, 'edX/DemoX/Demo_Course')
def test_recent_activity(self):
# These tests don't feel terribly useful, since it's not really testing any substantial code... just that mock
# values are returned. The risky part of the interface (the URL and the response data) is not tested at all
# since it is mocked out.
course_id = 'edX/DemoX/Demo_Course'
expected_result = {
'course_id': 'edX/DemoX/Demo_Course',
'interval_start': '2014-05-24T00:00:00Z',
'interval_end': '2014-06-01T00:00:00Z',
'label': 'ACTIVE',
'count': 300,
}
self.client.resources['courses/{0}/recent_activity'.format(course_id)] = expected_result
self.assertEquals(self.course.recent_active_user_count, expected_result)
from unittest import TestCase
from analyticsdataclient.client import Client, ClientError
from analyticsdataclient.status import Status
from analyticsdataclient.tests import InMemoryClient
class StatusTest(TestCase):
......@@ -46,23 +46,3 @@ class StatusTest(TestCase):
self.client.resources['health'] = {}
self.assertEquals(self.status.healthy, False)
class InMemoryClient(Client):
def __init__(self):
super(InMemoryClient, self).__init__()
self.resources = {}
def has_resource(self, resource, timeout=None):
try:
self.get(resource, timeout=timeout)
return True
except ClientError:
return False
def get(self, resource, timeout=None):
try:
return self.resources[resource]
except KeyError:
raise ClientError('Unable to find requested resource')
from django.conf import settings
class DatabaseFromSettingRouter(object):
def db_for_read(self, model, **hints):
return self._get_database(model)
def _get_database(self, model):
if getattr(model, 'db_from_setting', None):
return getattr(settings, model.db_from_setting, 'default')
else:
return None
def db_for_write(self, model, **hints):
return self._get_database(model)
def allow_relation(self, obj1, obj2, **hints):
return self._get_database(obj1) == self._get_database(obj2)
def allow_syncdb(self, db, model):
dest_db = self._get_database(model)
if dest_db is not None:
return db == dest_db
else:
return None
......@@ -237,9 +237,6 @@ WSGI_APPLICATION = '%s.wsgi.application' % SITE_NAME
########## REST FRAMEWORK CONFIGURATION
REST_FRAMEWORK = {
'DEFAULT_MODEL_SERIALIZER_CLASS':
'rest_framework.serializers.HyperlinkedModelSerializer',
'DEFAULT_PERMISSION_CLASSES': [
'rest_framework.permissions.IsAuthenticated'
],
......@@ -257,6 +254,8 @@ REST_FRAMEWORK = {
########## ANALYTICS DATA API CONFIGURATION
ANALYTICS_DATABASE = 'default'
DATABASE_ROUTERS = ['analyticsdataserver.router.DatabaseFromSettingRouter']
ENABLE_ADMIN_SITE = False
########## END ANALYTICS DATA API CONFIGURATION
......@@ -31,6 +31,14 @@ DATABASES = {
'PASSWORD': '',
'HOST': '',
'PORT': '',
},
'analytics': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': normpath(join(DJANGO_ROOT, 'analytics.db')),
'USER': '',
'PASSWORD': '',
'HOST': '',
'PORT': '',
}
}
########## END DATABASE CONFIGURATION
......@@ -69,6 +77,7 @@ DEBUG_TOOLBAR_CONFIG = {
########## ANALYTICS DATA API CONFIGURATION
ANALYTICS_DATABASE = 'analytics'
ENABLE_ADMIN_SITE = True
########## END ANALYTICS DATA API CONFIGURATION
......@@ -75,39 +75,34 @@ Validates provided credentials. Returns no data, a simple 200 OK status code is
# Group Course
## User Activity [/api/v0/courses/{course_id}/user_activity{?from_date,to_date,group_by}]
## Activity [/api/v0/courses/{course_id}/activity_last_week{?label}]
Counts of unique users who performed various actions of interest. A unique user is defined as a user who performed at least one action within a time interval specified by the `group_by` parameter. This time interval used for grouping results is referred to as the `unit` and can either be a week or a day.
Counts of users who performed various actions at least once in the past week. The default is all users who performed *any* action in the course.
Each data point has the following fields:
The representation has the following fields:
- from_date (timestamp): All data from this timestamp up to the `to_date` was considered when computing this data point.
- to_date (timestamp): All data from `from_date` up to this timestamp was considered when computing this data point. Note that data produced at exactly this time is **not** included.
- visited (integer): The number of unique users who visited the course.
- started_video (integer): The number of unique users who started watching any video in the course.
- answered_question (integer): The number of unique users who answered any capa based question in the course.
- posted_forum (integer): The number of unique users who created a new post, responded to a post, or submitted a comment on any forum in the course.
- course_id: The ID of the course whose activity is described.
- interval_start: All data from this timestamp up to the `interval_end` was considered when computing this data point.
- interval_end: All data from `interval_start` up to this timestamp was considered when computing this data point. Note that data produced at exactly this time is **not** included.
- label: The type of activity requested. Possible values are:
- ACTIVE: The number of unique users who visited the course.
- PLAYED_VIDEO: The number of unique users who started watching any video in the course.
- ATTEMPTED_PROBLEM: The number of unique users who answered any loncapa based question in the course.
- POSTED_FORUM: The number of unique users who created a new post, responded to a post, or submitted a comment on any forum in the course.
- count: The number of users who performed the activity indicated by the `label`.
+ Parameters
+ course_id (string) ... ID of the course.
Currently accepts url encoded slash separated course key values. In the future will also accept other course identifying strings.
+ from_date (optional, timestamp) ... A time within the first unit to include in the results.
Defaults to midnight on the first day of the most recent complete unit at the UTC time the server processes the request.
+ to_date (optional, timestamp) ... A time within the unit after the last unit to include in the results.
Defaults to midnight on the day after the last day of the most recent complete unit at the UTC time the server processes the request.
+ group_by = `week` (optional, string) ... Specifies the granularity of groups returned.
This string should uniquely identify the course.
Users that appear multiple times in this interval will be counted only once.
+ label = `active` (optional, string) ... The type of activity.
+ Values
+ `week`
+ `day`
+ `ACTIVE`
+ `PLAYED_VIDEO`
+ `ATTEMPTED_PROBLEM`
+ `POSTED_FORUM`
### User Activity over Time [GET]
......@@ -122,20 +117,11 @@ Each data point has the following fields:
+ Body
{
"count": 1,
"next": null,
"previous": null,
"results": [
{
"from_date": "2014-05-17T00:00:00.000Z",
"to_date": "2014-05-24T00:00:00.000Z",
"course_id": "edX/Demo_Course/2013_T1",
"visited": 1047,
"started_video": 931,
"answered_question": 452,
"posted_forum": 318
}
]
"course_id": "edx/Demo_Course/2014T2",
"interval_start": "2014-05-17T00:00:00.000Z",
"interval_end": "2014-05-24T00:00:00.000Z",
"label": "active",
"count": 1024
}
# Group Problem
......
......@@ -3,4 +3,3 @@ django-model-utils==1.4.0
South==0.8.1
djangorestframework==2.3.5
Markdown==2.4.1
distribute>=0.6.28, <0.7
......@@ -3,4 +3,3 @@
-r test.txt
django-debug-toolbar==0.9.4
Sphinx==1.2b1
requests==2.3.0 # nose requires this since it loads the client during test discovery
......@@ -3,4 +3,4 @@
-r base.txt
gunicorn==0.17.4
MySQL-python==1.2.4
MySQL-python==1.2.5
......@@ -9,3 +9,4 @@ mock==1.0.1
pep257==0.3.2
httpretty==0.8.0
requests==2.3.0
pytz==2012h
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment