Commit 05ee144f by Dennis Jen

Merge pull request #110 from edx/learner-analytics

Learner analytics
parents 3b4aa462 010ae3fa
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
omit = analyticsdataserver/settings* omit = analyticsdataserver/settings*
*wsgi.py *wsgi.py
analytics_data_api/management/commands/generate_fake_course_data.py analytics_data_api/management/commands/generate_fake_course_data.py
source = analyticsdataserver, analytics_data_api
branch = True
[report] [report]
# Regexes for lines to exclude from consideration # Regexes for lines to exclude from consideration
...@@ -10,3 +12,9 @@ exclude_lines = ...@@ -10,3 +12,9 @@ exclude_lines =
pragma: no cover pragma: no cover
raise NotImplementedError raise NotImplementedError
[html]
directory = ${COVERAGE_DIR}/html/
[xml]
output = ${COVERAGE_DIR}/coverage.xml
...@@ -2,16 +2,19 @@ language: python ...@@ -2,16 +2,19 @@ language: python
python: "2.7" python: "2.7"
install: install:
- "pip install coveralls" - "pip install coveralls"
- "make test.install_elasticsearch"
cache: pip cache: pip
# Use docker for builds # Use docker for builds
sudo: false sudo: false
before_script:
- make test.run_elasticsearch
# Recommended by Travis in order to make sure ElasticSearch
# actually starts up. See # https://docs.travis-ci.com/user/database-setup/#ElasticSearch
- sleep 10
script: script:
- make validate - make validate
- git fetch origin master:refs/remotes/origin/master # https://github.com/edx/diff-cover#troubleshooting - git fetch origin master:refs/remotes/origin/master # https://github.com/edx/diff-cover#troubleshooting
- make diff.report - make diff.report
branches:
only:
- master
after_success: after_success:
- coveralls - coveralls
- bash ./scripts/build-stats-to-datadog.sh - bash ./scripts/build-stats-to-datadog.sh
ROOT = $(shell echo "$$PWD") ROOT = $(shell echo "$$PWD")
COVERAGE = $(ROOT)/build/coverage COVERAGE_DIR = $(ROOT)/build/coverage
PACKAGES = analyticsdataserver analytics_data_api PACKAGES = analyticsdataserver analytics_data_api
DATABASES = default analytics DATABASES = default analytics
ELASTICSEARCH_VERSION = 1.5.2
TEST_SETTINGS = analyticsdataserver.settings.test
.PHONY: requirements develop clean diff.report view.diff.report quality .PHONY: requirements develop clean diff.report view.diff.report quality
requirements: requirements:
pip install -q -r requirements/base.txt pip install -q -r requirements/base.txt
test.install_elasticsearch:
curl -L -O https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-$(ELASTICSEARCH_VERSION).zip
unzip elasticsearch-$(ELASTICSEARCH_VERSION).zip
test.run_elasticsearch:
cd elasticsearch-$(ELASTICSEARCH_VERSION) && ./bin/elasticsearch -d
test.requirements: requirements test.requirements: requirements
pip install -q -r requirements/test.txt pip install -q -r requirements/test.txt
...@@ -20,22 +28,22 @@ clean: ...@@ -20,22 +28,22 @@ clean:
coverage erase coverage erase
test: clean test: clean
. ./.test_env && ./manage.py test --settings=analyticsdataserver.settings.test --with-ignore-docstrings \ coverage run ./manage.py test --settings=$(TEST_SETTINGS) \
--exclude-dir=analyticsdataserver/settings --with-coverage --cover-inclusive --cover-branches \ --with-ignore-docstrings --exclude-dir=analyticsdataserver/settings \
--cover-html --cover-html-dir=$(COVERAGE)/html/ \
--cover-xml --cover-xml-file=$(COVERAGE)/coverage.xml \
$(foreach package,$(PACKAGES),--cover-package=$(package)) \
$(PACKAGES) $(PACKAGES)
export COVERAGE_DIR=$(COVERAGE_DIR) && \
coverage html && \
coverage xml
diff.report: diff.report:
diff-cover $(COVERAGE)/coverage.xml --html-report $(COVERAGE)/diff_cover.html diff-cover $(COVERAGE_DIR)/coverage.xml --html-report $(COVERAGE_DIR)/diff_cover.html
diff-quality --violations=pep8 --html-report $(COVERAGE)/diff_quality_pep8.html diff-quality --violations=pep8 --html-report $(COVERAGE_DIR)/diff_quality_pep8.html
diff-quality --violations=pylint --html-report $(COVERAGE)/diff_quality_pylint.html diff-quality --violations=pylint --html-report $(COVERAGE_DIR)/diff_quality_pylint.html
view.diff.report: view.diff.report:
xdg-open file:///$(COVERAGE)/diff_cover.html xdg-open file:///$(COVERAGE_DIR)/diff_cover.html
xdg-open file:///$(COVERAGE)/diff_quality_pep8.html xdg-open file:///$(COVERAGE_DIR)/diff_quality_pep8.html
xdg-open file:///$(COVERAGE)/diff_quality_pylint.html xdg-open file:///$(COVERAGE_DIR)/diff_quality_pylint.html
quality: quality:
pep8 $(PACKAGES) pep8 $(PACKAGES)
......
...@@ -29,6 +29,23 @@ Getting Started ...@@ -29,6 +29,23 @@ Getting Started
$ ./manage.py migrate --noinput $ ./manage.py migrate --noinput
$ ./manage.py migrate --noinput --database=analytics $ ./manage.py migrate --noinput --database=analytics
The learner API endpoints require elasticsearch with a mapping
defined on this `wiki page <https://openedx.atlassian.net/wiki/display/AN/Learner+Analytics#LearnerAnalytics-ElasticSearch>`_.
The connection to elasticsearch can be configured by the
``ELASTICSEARCH_LEARNERS_HOST`` and
``ELASTICSEARCH_LEARNERS_INDEX`` django settings. For testing, you
can install elasticsearch locally:
::
$ make test.install_elasticsearch
To run the cluster for testing:
::
$ make test.run_elasticsearch
3. Create a user and authentication token. Note that the user will be 3. Create a user and authentication token. Note that the user will be
created if one does not exist. created if one does not exist.
......
DISCUSSION = 'discussion'
PROBLEM = 'problem'
VIDEO = 'video'
INDIVIDUAL_TYPES = [DISCUSSION, PROBLEM, VIDEO]
PROBLEMS = 'problems'
VIDEOS = 'videos'
AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS]
from analytics_data_api.constants import engagement_entity_types
ATTEMPTED = 'attempted'
COMPLETED = 'completed'
CONTRIBUTED = 'contributed'
VIEWED = 'viewed'
# map entity types to events
EVENTS = {
engagement_entity_types.DISCUSSION: [CONTRIBUTED],
engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED],
engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED],
engagement_entity_types.VIDEO: [VIEWED],
engagement_entity_types.VIDEOS: [VIEWED],
}
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
class EngagementType(object):
"""
Encapsulates:
- The API consumer-facing display name for engagement types
- The internal question of whether the metric should be counted in terms
of the entity type or the raw number of events.
"""
# Defines the current canonical set of engagement types used in the Learner
# Analytics API.
ALL_TYPES = (
'problems_attempted',
'problems_completed',
'videos_viewed',
'discussion_contributions',
)
def __init__(self, entity_type, event_type):
"""
Initializes an EngagementType for a particular entity and event type.
Arguments:
entity_type (str): the type of module interacted with
event_type (str): the type of interaction on that entity
"""
if entity_type == PROBLEM:
if event_type == ATTEMPTED:
self.name = 'problems_attempted'
self.is_counted_by_entity = True
if event_type == COMPLETED:
self.name = 'problems_completed'
self.is_counted_by_entity = True
elif entity_type == VIDEO:
if event_type == VIEWED:
self.name = 'videos_viewed'
self.is_counted_by_entity = True
elif entity_type == DISCUSSION:
if event_type == CONTRIBUTED:
# Note that the discussion contribution metric counts
# total discussion contributions, not number of
# discussions contributed to.
self.name = 'discussion_contributions'
self.is_counted_by_entity = False
else:
raise ValueError(
'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format(
entity_type=entity_type,
event_type=event_type,
)
)
LEARNER_API_DEFAULT_LIST_PAGE_SIZE = 25
SEGMENTS = ["highly_engaged", "disengaging", "struggling", "inactive", "unenrolled"]
from elasticsearch import Elasticsearch
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from analytics_data_api.management.utils import elasticsearch_settings_defined
class Command(BaseCommand):
help = 'Creates Elasticsearch indices used by the Analytics Data API.'
def handle(self, *args, **options):
if not elasticsearch_settings_defined():
raise CommandError(
'You must define settings.ELASTICSEARCH_LEARNERS_HOST, '
'settings.ELASTICSEARCH_LEARNERS_INDEX, and settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX'
)
es = Elasticsearch([settings.ELASTICSEARCH_LEARNERS_HOST])
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_INDEX):
self.stderr.write('"{}" index already exists.'.format(settings.ELASTICSEARCH_LEARNERS_INDEX))
else:
es.indices.create(
index=settings.ELASTICSEARCH_LEARNERS_INDEX,
body={
'mappings': {
'roster_entry': {
'properties': {
'name': {
'type': 'string'
},
'username': {
'type': 'string', 'index': 'not_analyzed'
},
'email': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'course_id': {
'type': 'string', 'index': 'not_analyzed'
},
'enrollment_mode': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'segments': {
'type': 'string'
},
'cohort': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'discussion_contributions': {
'type': 'integer', 'doc_values': True
},
'problems_attempted': {
'type': 'integer', 'doc_values': True
},
'problems_completed': {
'type': 'integer', 'doc_values': True
},
'problem_attempts_per_completed': {
'type': 'float', 'doc_values': True
},
'attempt_ratio_order': {
'type': 'integer', 'doc_values': True
},
'videos_viewed': {
'type': 'integer', 'doc_values': True
},
'enrollment_date': {
'type': 'date', 'doc_values': True
},
}
}
}
}
)
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX):
self.stderr.write('"{}" index already exists.'.format(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX))
else:
es.indices.create(
index=settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX,
body={
'mappings': {
'marker': {
'properties': {
'date': {
'type': 'date', 'doc_values': True
},
'target_index': {
'type': 'string'
},
}
}
}
}
)
from elasticsearch import Elasticsearch
from django.conf import settings
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = 'Removes Elasticsearch indices used by the Analytics Data API'
def handle(self, *args, **options):
es = Elasticsearch([settings.ELASTICSEARCH_LEARNERS_HOST])
for index in [settings.ELASTICSEARCH_LEARNERS_INDEX, settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX]:
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX):
es.indices.delete(index=index)
...@@ -8,7 +8,7 @@ import random ...@@ -8,7 +8,7 @@ import random
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils import timezone from django.utils import timezone
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.constants import engagement_entity_types, engagement_events
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -32,7 +32,11 @@ class Command(BaseCommand): ...@@ -32,7 +32,11 @@ class Command(BaseCommand):
help = 'Generate fake data' help = 'Generate fake data'
option_list = BaseCommand.option_list + ( option_list = BaseCommand.option_list + (
make_option('-n', '--num-weeks', action='store', type="int", dest='num_weeks', make_option('-n', '--num-weeks', action='store', type="int", dest='num_weeks',
help='"Number of weeks worth of data to generate.'), help='Number of weeks worth of data to generate.'),
make_option('-c', '--course_id', action='store', type='string', dest='course_id',
help='Course ID for which to generate fake data'),
make_option('-u', '--username', action='store', type='string', dest='username',
help='Username for which to generate fake data'),
) )
def generate_daily_data(self, course_id, start_date, end_date): def generate_daily_data(self, course_id, start_date, end_date):
...@@ -182,8 +186,48 @@ class Command(BaseCommand): ...@@ -182,8 +186,48 @@ class Command(BaseCommand):
users_at_start=users_at_start, users_at_start=users_at_start,
users_at_end=random.randint(100, users_at_start)) users_at_end=random.randint(100, users_at_start))
def generate_learner_engagement_data(self, course_id, username, start_date, end_date, max_value=100):
logger.info("Deleting learner engagement module data...")
models.ModuleEngagement.objects.all().delete()
logger.info("Generating learner engagement module data...")
current = start_date
while current < end_date:
current = current + datetime.timedelta(days=1)
for entity_type in engagement_entity_types.INDIVIDUAL_TYPES:
for event in engagement_events.EVENTS[entity_type]:
num_events = random.randint(0, max_value)
if num_events:
for _ in xrange(num_events):
count = random.randint(0, max_value / 20)
entity_id = 'an-id-{}-{}'.format(entity_type, event)
models.ModuleEngagement.objects.create(
course_id=course_id, username=username, date=current,
entity_type=entity_type, entity_id=entity_id, event=event, count=count)
logger.info("Done!")
def generate_learner_engagement_range_data(self, course_id, start_date, end_date, max_value=100):
logger.info("Deleting engagement range data...")
models.ModuleEngagementMetricRanges.objects.all().delete()
logger.info("Generating engagement range data...")
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_ceil = random.random() * max_value * 0.5
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='low', low_value=0, high_value=low_ceil)
high_floor = random.random() * max_value * 0.5 + low_ceil
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='high', low_value=high_floor, high_value=max_value)
def handle(self, *args, **options): def handle(self, *args, **options):
course_id = 'edX/DemoX/Demo_Course' course_id = options.get('course_id', 'edX/DemoX/Demo_Course')
username = options.get('username', 'ed_xavier')
video_id = '0fac49ba' video_id = '0fac49ba'
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9' video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
start_date = datetime.datetime(year=2015, month=1, day=1, tzinfo=timezone.utc) start_date = datetime.datetime(year=2015, month=1, day=1, tzinfo=timezone.utc)
...@@ -199,3 +243,5 @@ class Command(BaseCommand): ...@@ -199,3 +243,5 @@ class Command(BaseCommand):
self.generate_daily_data(course_id, start_date, end_date) self.generate_daily_data(course_id, start_date, end_date)
self.generate_video_data(course_id, video_id, video_module_id) self.generate_video_data(course_id, video_id, video_module_id)
self.generate_video_timeline_data(video_id) self.generate_video_timeline_data(video_id)
self.generate_learner_engagement_data(course_id, username, start_date, end_date)
self.generate_learner_engagement_range_data(course_id, start_date, end_date)
from django.conf import settings
def elasticsearch_settings_defined():
return all(
setting is not None for setting in (
settings.ELASTICSEARCH_LEARNERS_HOST,
settings.ELASTICSEARCH_LEARNERS_INDEX,
settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX
)
)
import datetime
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.management import call_command, CommandError from django.core.management import call_command, CommandError
from django.test import TestCase from django.test import TestCase
...@@ -6,7 +8,7 @@ from rest_framework.authtoken.models import Token ...@@ -6,7 +8,7 @@ from rest_framework.authtoken.models import Token
from analytics_data_api.constants.country import get_country, UNKNOWN_COUNTRY from analytics_data_api.constants.country import get_country, UNKNOWN_COUNTRY
from analytics_data_api.utils import delete_user_auth_token, set_user_auth_token from analytics_data_api.utils import date_range, delete_user_auth_token, set_user_auth_token
class UtilsTests(TestCase): class UtilsTests(TestCase):
...@@ -91,3 +93,30 @@ class CountryTests(TestCase): ...@@ -91,3 +93,30 @@ class CountryTests(TestCase):
# Return unknown country if code is invalid # Return unknown country if code is invalid
self.assertEqual(get_country('A1'), UNKNOWN_COUNTRY) self.assertEqual(get_country('A1'), UNKNOWN_COUNTRY)
self.assertEqual(get_country(None), UNKNOWN_COUNTRY) self.assertEqual(get_country(None), UNKNOWN_COUNTRY)
class DateRangeTests(TestCase):
def test_empty_range(self):
date = datetime.datetime(2016, 1, 1)
self.assertEqual([date for date in date_range(date, date)], [])
def test_range_exclusive(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 2)
self.assertEqual([date for date in date_range(start_date, end_date)], [start_date])
def test_delta_goes_past_end_date(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 3)
time_delta = datetime.timedelta(days=5)
self.assertEqual([date for date in date_range(start_date, end_date, time_delta)], [start_date])
def test_general_range(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 5)
self.assertEqual([date for date in date_range(start_date, end_date)], [
datetime.datetime(2016, 1, 1),
datetime.datetime(2016, 1, 2),
datetime.datetime(2016, 1, 3),
datetime.datetime(2016, 1, 4),
])
from collections import defaultdict import datetime
from importlib import import_module
from django.db.models import Q from django.db.models import Q
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
def delete_user_auth_token(username): def delete_user_auth_token(username):
""" """
...@@ -47,49 +46,6 @@ def matching_tuple(answer): ...@@ -47,49 +46,6 @@ def matching_tuple(answer):
) )
def consolidate_answers(problem):
""" Attempt to consolidate erroneously randomized answers. """
answer_sets = defaultdict(list)
match_tuple_sets = defaultdict(set)
for answer in problem:
answer.consolidated_variant = False
answer_sets[answer.value_id].append(answer)
match_tuple_sets[answer.value_id].add(matching_tuple(answer))
# If a part has more than one unique tuple of matching fields, do not consolidate.
for _, match_tuple_set in match_tuple_sets.iteritems():
if len(match_tuple_set) > 1:
return problem
consolidated_answers = []
for _, answers in answer_sets.iteritems():
consolidated_answer = None
if len(answers) == 1:
consolidated_answers.append(answers[0])
continue
for answer in answers:
if consolidated_answer:
if isinstance(consolidated_answer, ProblemResponseAnswerDistribution):
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
consolidated_answers.append(consolidated_answer)
return consolidated_answers
def dictfetchall(cursor): def dictfetchall(cursor):
"""Returns all rows from a cursor as a dict""" """Returns all rows from a cursor as a dict"""
...@@ -98,3 +54,33 @@ def dictfetchall(cursor): ...@@ -98,3 +54,33 @@ def dictfetchall(cursor):
dict(zip([col[0] for col in desc], row)) dict(zip([col[0] for col in desc], row))
for row in cursor.fetchall() for row in cursor.fetchall()
] ]
def load_fully_qualified_definition(definition):
""" Returns the class given the full definition. """
module_name, class_name = definition.rsplit('.', 1)
module = import_module(module_name)
return getattr(module, class_name)
def date_range(start_date, end_date, delta=datetime.timedelta(days=1)):
"""
Returns a generator that iterates over the date range [start_date, end_date)
(start_date inclusive, end_date exclusive). Each date in the range is
offset from the previous date by a change of `delta`, which defaults
to one day.
Arguments:
start_date (datetime.datetime): The start date of the range, inclusive
end_date (datetime.datetime): The end date of the range, exclusive
delta (datetime.timedelta): The change in time between dates in the
range.
Returns:
Generator: A generator which iterates over all dates in the specified
range.
"""
cur_date = start_date
while cur_date < end_date:
yield cur_date
cur_date += delta
default_app_config = 'analytics_data_api.v0.apps.ApiAppConfig'
from django.apps import AppConfig
from django.conf import settings
from elasticsearch_dsl import connections
from analytics_data_api.utils import load_fully_qualified_definition
class ApiAppConfig(AppConfig):
name = 'analytics_data_api.v0'
def ready(self):
super(ApiAppConfig, self).ready()
if settings.ELASTICSEARCH_LEARNERS_HOST:
connection_params = {'hosts': [settings.ELASTICSEARCH_LEARNERS_HOST]}
if settings.ELASTICSEARCH_CONNECTION_CLASS:
connection_params['connection_class'] = \
load_fully_qualified_definition(settings.ELASTICSEARCH_CONNECTION_CLASS)
# aws settings
connection_params['aws_access_key_id'] = settings.ELASTICSEARCH_AWS_ACCESS_KEY_ID
connection_params['aws_secret_access_key'] = settings.ELASTICSEARCH_AWS_SECRET_ACCESS_KEY
connection_params['region'] = settings.ELASTICSEARCH_CONNECTION_DEFAULT_REGION
# Remove 'None' values so that we don't overwrite defaults
connection_params = {key: val for key, val in connection_params.items() if val is not None}
connections.connections.create_connection(**connection_params)
import json
import time
from boto.connection import AWSAuthConnection
from elasticsearch import Connection
class BotoHttpConnection(Connection):
"""
Uses AWS configured connection to sign requests before they're sent to
elasticsearch nodes.
"""
connection = None
def __init__(self, host='localhost', port=443, aws_access_key_id=None, aws_secret_access_key=None,
region=None, **kwargs):
super(BotoHttpConnection, self).__init__(host=host, port=port, **kwargs)
connection_params = {'host': host, 'port': port}
# If not provided, boto will attempt to use default environment variables to fill
# the access credentials.
connection_params['aws_access_key_id'] = aws_access_key_id
connection_params['aws_secret_access_key'] = aws_secret_access_key
connection_params['region'] = region
# Remove 'None' values so that we don't overwrite defaults
connection_params = {key: val for key, val in connection_params.items() if val is not None}
self.connection = ESConnection(**connection_params)
# pylint: disable=unused-argument
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()):
"""
Called when making requests elasticsearch. Requests are signed and
http status, headers, and response is returned.
Note: the "timeout" kwarg is ignored in this case. Boto manages the timeout
and the default is 70 seconds.
See: https://github.com/boto/boto/blob/develop/boto/connection.py#L533
"""
if not isinstance(body, basestring):
body = json.dumps(body)
start = time.time()
response = self.connection.make_request(method, url, params=params, data=body)
duration = time.time() - start
raw_data = response.read()
# raise errors based on http status codes and let the client handle them
if not (200 <= response.status < 300) and response.status not in ignore:
self.log_request_fail(method, url, body, duration, response.status)
self._raise_error(response.status, raw_data)
self.log_request_success(method, url, url, body, response.status, raw_data, duration)
return response.status, dict(response.getheaders()), raw_data
class ESConnection(AWSAuthConnection):
"""
Use to sign requests for an AWS hosted elasticsearch cluster.
"""
def __init__(self, *args, **kwargs):
region = kwargs.pop('region', None)
kwargs.setdefault('is_secure', True)
super(ESConnection, self).__init__(*args, **kwargs)
self.auth_region_name = region
self.auth_service_name = 'es'
def _required_auth_capability(self):
"""
Supplies the capabilities of the auth handler and signs the responses to
AWS using HMAC-4.
"""
return ['hmac-v4']
import abc
class BaseError(Exception):
"""
Base error.
"""
__metaclass__ = abc.ABCMeta
message = None
def __str__(self):
return self.message
class LearnerNotFoundError(BaseError):
"""
Raise learner not found for a course.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
username = kwargs.pop('username')
super(LearnerNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(username=username, course_id=course_id)
@property
def message_template(self):
return 'Learner {username} not found for course {course_id}.'
class LearnerEngagementTimelineNotFoundError(BaseError):
"""
Raise learner engagement timeline not found for a course.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
username = kwargs.pop('username')
super(LearnerEngagementTimelineNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(username=username, course_id=course_id)
@property
def message_template(self):
return 'Learner {username} engagement timeline not found for course {course_id}.'
class CourseNotSpecifiedError(BaseError):
"""
Raise if course not specified.
"""
def __init__(self, *args, **kwargs):
super(CourseNotSpecifiedError, self).__init__(*args, **kwargs)
self.message = 'Course id/key not specified.'
class CourseKeyMalformedError(BaseError):
"""
Raise if course id/key malformed.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
super(CourseKeyMalformedError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(course_id=course_id)
@property
def message_template(self):
return 'Course id/key {course_id} malformed.'
class ParameterValueError(BaseError):
"""Raise if multiple incompatible parameters were provided."""
def __init__(self, message, *args, **kwargs):
super(ParameterValueError, self).__init__(*args, **kwargs)
self.message = message
import abc
from django.http.response import JsonResponse
from rest_framework import status
from analytics_data_api.v0.exceptions import (
CourseKeyMalformedError,
CourseNotSpecifiedError,
LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError,
ParameterValueError,
)
class BaseProcessErrorMiddleware(object):
"""
Base error.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractproperty
def error(self):
""" Error class to catch. """
pass
@abc.abstractproperty
def error_code(self):
""" Error code to return. """
pass
@abc.abstractproperty
def status_code(self):
""" HTTP status code to return. """
pass
def process_exception(self, _request, exception):
if isinstance(exception, self.error):
return JsonResponse({
"error_code": self.error_code,
"developer_message": str(exception)
}, status=self.status_code)
class LearnerNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if learner not found.
"""
@property
def error(self):
return LearnerNotFoundError
@property
def error_code(self):
return 'no_learner_for_course'
@property
def status_code(self):
return status.HTTP_404_NOT_FOUND
class LearnerEngagementTimelineNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if learner engagement timeline not found.
"""
@property
def error(self):
return LearnerEngagementTimelineNotFoundError
@property
def error_code(self):
return 'no_learner_engagement_timeline'
@property
def status_code(self):
return status.HTTP_404_NOT_FOUND
class CourseNotSpecifiedErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 course not specified.
"""
@property
def error(self):
return CourseNotSpecifiedError
@property
def error_code(self):
return 'course_not_specified'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
class CourseKeyMalformedErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 if course key is malformed.
"""
@property
def error(self):
return CourseKeyMalformedError
@property
def error_code(self):
return 'course_key_malformed'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
class ParameterValueErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 if illegal parameter values are provided.
"""
@property
def error(self):
return ParameterValueError
@property
def error_code(self):
return 'illegal_parameter_values'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
from urlparse import urljoin
from django.conf import settings from django.conf import settings
from rest_framework import serializers from rest_framework import pagination, serializers
from analytics_data_api.constants import enrollment_modes, genders from analytics_data_api.constants import (
engagement_entity_types,
engagement_events,
enrollment_modes,
genders,
)
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
...@@ -169,13 +175,16 @@ class SequentialOpenDistributionSerializer(ModelSerializerWithCreatedField): ...@@ -169,13 +175,16 @@ class SequentialOpenDistributionSerializer(ModelSerializerWithCreatedField):
) )
class BaseCourseEnrollmentModelSerializer(ModelSerializerWithCreatedField): class DefaultIfNoneMixin(object):
date = serializers.DateField(format=settings.DATE_FORMAT)
def default_if_none(self, value, default=0): def default_if_none(self, value, default=0):
return value if value is not None else default return value if value is not None else default
class BaseCourseEnrollmentModelSerializer(DefaultIfNoneMixin, ModelSerializerWithCreatedField):
date = serializers.DateField(format=settings.DATE_FORMAT)
class CourseEnrollmentDailySerializer(BaseCourseEnrollmentModelSerializer): class CourseEnrollmentDailySerializer(BaseCourseEnrollmentModelSerializer):
""" Representation of course enrollment for a single day and course. """ """ Representation of course enrollment for a single day and course. """
...@@ -306,3 +315,150 @@ class VideoTimelineSerializer(ModelSerializerWithCreatedField): ...@@ -306,3 +315,150 @@ class VideoTimelineSerializer(ModelSerializerWithCreatedField):
'num_views', 'num_views',
'created' 'created'
) )
class LastUpdatedSerializer(serializers.Serializer):
last_updated = serializers.DateField(source='date', format=settings.DATE_FORMAT)
class LearnerSerializer(serializers.Serializer, DefaultIfNoneMixin):
username = serializers.CharField(source='username')
enrollment_mode = serializers.CharField(source='enrollment_mode')
name = serializers.CharField(source='name')
account_url = serializers.SerializerMethodField('get_account_url')
email = serializers.CharField(source='email')
segments = serializers.Field(source='segments')
engagements = serializers.SerializerMethodField('get_engagements')
enrollment_date = serializers.DateField(source='enrollment_date', format=settings.DATE_FORMAT)
cohort = serializers.CharField(source='cohort')
def transform_segments(self, _obj, value):
# returns null instead of empty strings
return value or []
def transform_cohort(self, _obj, value):
# returns null instead of empty strings
return value or None
def get_account_url(self, obj):
if settings.LMS_USER_ACCOUNT_BASE_URL:
return urljoin(settings.LMS_USER_ACCOUNT_BASE_URL, obj.username)
else:
return None
def get_engagements(self, obj):
"""
Add the engagement totals.
"""
engagements = {}
# fill in these fields will 0 if values not returned/found
default_if_none_fields = ['discussion_contributions', 'problems_attempted',
'problems_completed', 'videos_viewed']
for field in default_if_none_fields:
engagements[field] = self.default_if_none(getattr(obj, field, None), 0)
# preserve null values for problem attempts per completed
engagements['problem_attempts_per_completed'] = getattr(obj, 'problem_attempts_per_completed', None)
return engagements
class EdxPaginationSerializer(pagination.PaginationSerializer):
"""
Adds values to the response according to edX REST API Conventions.
"""
count = serializers.Field(source='paginator.count')
num_pages = serializers.Field(source='paginator.num_pages')
class ElasticsearchDSLSearchSerializer(EdxPaginationSerializer):
def __init__(self, *args, **kwargs):
"""Make sure that the elasticsearch query is executed."""
# Because the elasticsearch-dsl search object has a different
# API from the queryset object that's expected by the django
# Paginator object, we have to manually execute the query.
# Note that the `kwargs['instance']` is the Page object, and
# `kwargs['instance'].object_list` is actually an
# elasticsearch-dsl search object.
kwargs['instance'].object_list = kwargs['instance'].object_list.execute()
super(ElasticsearchDSLSearchSerializer, self).__init__(*args, **kwargs)
class EngagementDaySerializer(DefaultIfNoneMixin, serializers.Serializer):
date = serializers.DateField(format=settings.DATE_FORMAT)
problems_attempted = serializers.IntegerField(required=True, default=0)
problems_completed = serializers.IntegerField(required=True, default=0)
discussion_contributions = serializers.IntegerField(required=True, default=0)
videos_viewed = serializers.IntegerField(required=True, default=0)
def transform_problems_attempted(self, _obj, value):
return self.default_if_none(value, 0)
def transform_problems_completed(self, _obj, value):
return self.default_if_none(value, 0)
def transform_discussion_contributions(self, _obj, value):
return self.default_if_none(value, 0)
def transform_videos_viewed(self, _obj, value):
return self.default_if_none(value, 0)
class DateRangeSerializer(serializers.Serializer):
start = serializers.DateTimeField(source='start_date', format=settings.DATE_FORMAT)
end = serializers.DateTimeField(source='end_date', format=settings.DATE_FORMAT)
class EnagementRangeMetricSerializer(serializers.Serializer):
"""
Serializes ModuleEngagementMetricRanges (low_range and high_range) into
the below_average, average, above_average ranges represented as arrays.
"""
below_average = serializers.SerializerMethodField('get_below_average_range')
average = serializers.SerializerMethodField('get_average_range')
above_average = serializers.SerializerMethodField('get_above_average_range')
def get_average_range(self, obj):
metric_range = [
obj['low_range'].high_value if obj['low_range'] else None,
obj['high_range'].low_value if obj['high_range'] else None,
]
return metric_range
def get_below_average_range(self, obj):
return self._get_range(obj['low_range'])
def get_above_average_range(self, obj):
return self._get_range(obj['high_range'])
def _get_range(self, metric_range):
return [metric_range.low_value, metric_range.high_value] if metric_range else [None, None]
class CourseLearnerMetadataSerializer(serializers.Serializer):
enrollment_modes = serializers.Field(source='es_data.enrollment_modes')
segments = serializers.Field(source='es_data.segments')
cohorts = serializers.Field(source='es_data.cohorts')
engagement_ranges = serializers.SerializerMethodField('get_engagement_ranges')
def get_engagement_ranges(self, obj):
query_set = obj['engagement_ranges']
engagement_ranges = {
'date_range': DateRangeSerializer(query_set[0] if len(query_set) else None).data
}
# go through each entity and event type combination and fill in the ranges
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_range_queryset = query_set.filter(metric=metric, range_type='low')
high_range_queryset = query_set.filter(metric=metric, range_type='high')
engagement_ranges.update({
metric: EnagementRangeMetricSerializer({
'low_range': low_range_queryset[0] if len(low_range_queryset) else None,
'high_range': high_range_queryset[0] if len(high_range_queryset) else None,
}).data
})
return engagement_ranges
import socket
from django.test import TestCase
from elasticsearch.exceptions import ElasticsearchException
from mock import patch
from analytics_data_api.v0.connections import BotoHttpConnection, ESConnection
class ESConnectionTests(TestCase):
def test_constructor_params(self):
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='access_key',
aws_secret_access_key='secret',
region='region_123')
self.assertEqual(connection.auth_region_name, 'region_123')
self.assertEqual(connection.aws_access_key_id, 'access_key')
self.assertEqual(connection.aws_secret_access_key, 'secret')
def test_signing(self):
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='my_access_key',
aws_secret_access_key='secret',
region='region_123')
# create a request and sign it
request = connection.build_base_http_request('GET', '/', None)
request.authorize(connection)
# confirm the header contains signing method and key id
auth_header = request.headers['Authorization']
self.assertTrue('AWS4-HMAC-SHA256' in auth_header)
self.assertTrue('my_access_key' in auth_header)
def test_timeout(self):
def fake_connection(_address):
raise socket.timeout('fake error')
socket.create_connection = fake_connection
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='access_key',
aws_secret_access_key='secret',
region='region_123')
connection.num_retries = 0
with self.assertRaises(socket.error):
connection.make_request('GET', 'https://example.com')
class BotoHttpConnectionTests(TestCase):
@patch('analytics_data_api.v0.connections.ESConnection.make_request')
def test_perform_request_success(self, mock_response):
mock_response.return_value.status = 200
connection = BotoHttpConnection(aws_access_key_id='access_key', aws_secret_access_key='secret')
with patch('elasticsearch.connection.base.logger.info') as mock_logger:
status, _header, _data = connection.perform_request('get', 'http://example.com')
self.assertEqual(status, 200)
self.assertGreater(mock_logger.call_count, 0)
@patch('analytics_data_api.v0.connections.ESConnection.make_request')
def test_perform_request_error(self, mock_response):
mock_response.return_value.status = 500
connection = BotoHttpConnection(aws_access_key_id='access_key', aws_secret_access_key='secret')
with self.assertRaises(ElasticsearchException):
with patch('elasticsearch.connection.base.logger.debug') as mock_logger:
connection.perform_request('get', 'http://example.com')
self.assertGreater(mock_logger.call_count, 0)
import json
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from rest_framework import status
DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014' DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014'
...@@ -12,3 +15,24 @@ class DemoCourseMixin(object): ...@@ -12,3 +15,24 @@ class DemoCourseMixin(object):
cls.course_id = DEMO_COURSE_ID cls.course_id = DEMO_COURSE_ID
cls.course_key = CourseKey.from_string(cls.course_id) cls.course_key = CourseKey.from_string(cls.course_id)
super(DemoCourseMixin, cls).setUpClass() super(DemoCourseMixin, cls).setUpClass()
class VerifyCourseIdMixin(object):
def verify_no_course_id(self, response):
""" Assert that a course ID must be provided. """
self.assertEquals(response.status_code, status.HTTP_400_BAD_REQUEST)
expected = {
u"error_code": u"course_not_specified",
u"developer_message": u"Course id/key not specified."
}
self.assertDictEqual(json.loads(response.content), expected)
def verify_bad_course_id(self, response, course_id='malformed-course-id'):
""" Assert that a course ID must be valid. """
self.assertEquals(response.status_code, status.HTTP_400_BAD_REQUEST)
expected = {
u"error_code": u"course_key_malformed",
u"developer_message": u"Course id/key {} malformed.".format(course_id)
}
self.assertDictEqual(json.loads(response.content), expected)
import datetime
import json
import ddt
from django.utils.http import urlquote
from django_dynamic_fixture import G
import pytz
from rest_framework import status
from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
from analytics_data_api.v0 import models
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin
@ddt.ddt
class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
DEFAULT_USERNAME = 'ed_xavier'
path_template = '/api/v0/engagement_timelines/{}/?course_id={}'
def create_engagement(self, entity_type, event_type, entity_id, count, date=None):
"""Create a ModuleEngagement model"""
if date is None:
date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
G(
models.ModuleEngagement,
course_id=self.course_id,
username=self.DEFAULT_USERNAME,
date=date,
entity_type=entity_type,
entity_id=entity_id,
event=event_type,
count=count,
)
@ddt.data(
(PROBLEM, ATTEMPTED, 'problems_attempted', True),
(PROBLEM, COMPLETED, 'problems_completed', True),
(VIDEO, VIEWED, 'videos_viewed', True),
(DISCUSSION, CONTRIBUTED, 'discussion_contributions', False),
)
@ddt.unpack
def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation):
"""
Verify that some metrics are counted by unique ID, while some are
counted by total interactions.
"""
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
expected_data = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 0,
}
]
}
if expect_id_aggregation:
expected_data['days'][0][metric_display_name] = 2
else:
expected_data['days'][0][metric_display_name] = 10
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
self.assertEquals(
response.data,
expected_data
)
def test_timeline(self):
"""
Smoke test the learner engagement timeline.
"""
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one)
self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two)
self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 6,
'problems_attempted': 1,
'problems_completed': 1,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 2,
'problems_completed': 0,
'videos_viewed': 1
},
]
}
self.assertEquals(response.data, expected)
def test_day_gap(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc)
last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc)
self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day)
self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-05-26',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 1
},
{
'date': '2015-05-27',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 0
},
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 1,
'problems_completed': 0,
'videos_viewed': 0
},
]
}
self.assertEquals(response.data, expected)
def test_not_found(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, status.HTTP_404_NOT_FOUND)
expected = {
u"error_code": u"no_learner_engagement_timeline",
u"developer_message": u"Learner {} engagement timeline not found for course {}.".format(
self.DEFAULT_USERNAME, self.course_id)
}
self.assertDictEqual(json.loads(response.content), expected)
def test_no_course_id(self):
base_path = '/api/v0/engagement_timelines/{}'
response = self.authenticated_get((base_path).format('ed_xavier'))
self.verify_no_course_id(response)
def test_bad_course_id(self):
path = self.path_template.format(self.DEFAULT_USERNAME, 'malformed-course-id')
response = self.authenticated_get(path)
self.verify_bad_course_id(response)
...@@ -2,11 +2,14 @@ from django.conf.urls import patterns, url, include ...@@ -2,11 +2,14 @@ from django.conf.urls import patterns, url, include
from django.core.urlresolvers import reverse_lazy from django.core.urlresolvers import reverse_lazy
from django.views.generic import RedirectView from django.views.generic import RedirectView
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
urlpatterns = patterns( urlpatterns = patterns(
'', '',
url(r'^courses/', include('analytics_data_api.v0.urls.courses', namespace='courses')), url(r'^courses/', include('analytics_data_api.v0.urls.courses', namespace='courses')),
url(r'^problems/', include('analytics_data_api.v0.urls.problems', namespace='problems')), url(r'^problems/', include('analytics_data_api.v0.urls.problems', namespace='problems')),
url(r'^videos/', include('analytics_data_api.v0.urls.videos', namespace='videos')), url(r'^videos/', include('analytics_data_api.v0.urls.videos', namespace='videos')),
url('^', include('analytics_data_api.v0.urls.learners', namespace='learners')),
# pylint: disable=no-value-for-parameter # pylint: disable=no-value-for-parameter
url(r'^authenticated/$', RedirectView.as_view(url=reverse_lazy('authenticated')), name='authenticated'), url(r'^authenticated/$', RedirectView.as_view(url=reverse_lazy('authenticated')), name='authenticated'),
......
from django.conf.urls import patterns, url from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import courses as views from analytics_data_api.v0.views import courses as views
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
COURSE_URLS = [ COURSE_URLS = [
('activity', views.CourseActivityWeeklyView, 'activity'), ('activity', views.CourseActivityWeeklyView, 'activity'),
('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'), ('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'),
......
from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import learners as views
USERNAME_PATTERN = r'(?P<username>[\w.+-]+)'
urlpatterns = patterns(
'',
url(r'^learners/$', views.LearnerListView.as_view(), name='learners'),
url(r'^learners/{}/$'.format(USERNAME_PATTERN), views.LearnerView.as_view(), name='learner'),
url(r'^engagement_timelines/{}/$'.format(USERNAME_PATTERN),
views.EngagementTimelineView.as_view(), name='engagement_timelines'),
url(r'^course_learner_metadata/{}/$'.format(COURSE_ID_PATTERN),
views.CourseLearnerMetadata.as_view(), name='course_learner_metadata'),
)
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from analytics_data_api.v0.exceptions import (CourseNotSpecifiedError, CourseKeyMalformedError)
class CourseViewMixin(object):
"""
Captures the course_id from the url and validates it.
"""
course_id = None
def get(self, request, *args, **kwargs):
self.course_id = self.kwargs.get('course_id', request.QUERY_PARAMS.get('course_id', None))
if not self.course_id:
raise CourseNotSpecifiedError()
try:
CourseKey.from_string(self.course_id)
except InvalidKeyError:
raise CourseKeyMalformedError(course_id=self.course_id)
return super(CourseViewMixin, self).get(request, *args, **kwargs)
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
API methods for module level data. API methods for module level data.
""" """
from collections import defaultdict
from itertools import groupby from itertools import groupby
from django.db import OperationalError from django.db import OperationalError
...@@ -19,7 +20,7 @@ from analytics_data_api.v0.serializers import ( ...@@ -19,7 +20,7 @@ from analytics_data_api.v0.serializers import (
GradeDistributionSerializer, GradeDistributionSerializer,
SequentialOpenDistributionSerializer, SequentialOpenDistributionSerializer,
) )
from analytics_data_api.utils import consolidate_answers from analytics_data_api.utils import matching_tuple
class ProblemResponseAnswerDistributionView(generics.ListAPIView): class ProblemResponseAnswerDistributionView(generics.ListAPIView):
...@@ -55,6 +56,48 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -55,6 +56,48 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
serializer_class = ConsolidatedAnswerDistributionSerializer serializer_class = ConsolidatedAnswerDistributionSerializer
allow_empty = False allow_empty = False
@classmethod
def consolidate_answers(cls, problem):
""" Attempt to consolidate erroneously randomized answers. """
answer_sets = defaultdict(list)
match_tuple_sets = defaultdict(set)
for answer in problem:
answer.consolidated_variant = False
answer_sets[answer.value_id].append(answer)
match_tuple_sets[answer.value_id].add(matching_tuple(answer))
# If a part has more than one unique tuple of matching fields, do not consolidate.
for _, match_tuple_set in match_tuple_sets.iteritems():
if len(match_tuple_set) > 1:
return problem
consolidated_answers = []
for _, answers in answer_sets.iteritems():
consolidated_answer = None
if len(answers) == 1:
consolidated_answers.append(answers[0])
continue
for answer in answers:
if consolidated_answer:
if isinstance(consolidated_answer, ProblemResponseAnswerDistribution):
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
consolidated_answers.append(consolidated_answer)
return consolidated_answers
def get_queryset(self): def get_queryset(self):
"""Select all the answer distribution response having to do with this usage of the problem.""" """Select all the answer distribution response having to do with this usage of the problem."""
problem_id = self.kwargs.get('problem_id') problem_id = self.kwargs.get('problem_id')
...@@ -69,7 +112,7 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -69,7 +112,7 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
consolidated_rows = [] consolidated_rows = []
for _, part in groupby(queryset, lambda x: x.part_id): for _, part in groupby(queryset, lambda x: x.part_id):
consolidated_rows += consolidate_answers(list(part)) consolidated_rows += self.consolidate_answers(list(part))
return consolidated_rows return consolidated_rows
......
"""Utilities for view-level API logic."""
def split_query_argument(argument):
"""
Splits a comma-separated querystring argument into a list.
Returns None if the argument is empty.
"""
if argument:
return argument.split(',')
else:
return None
...@@ -54,9 +54,19 @@ DATABASES = { ...@@ -54,9 +54,19 @@ DATABASES = {
########## ELASTICSEARCH CONFIGURATION ########## ELASTICSEARCH CONFIGURATION
ELASTICSEARCH_LEARNERS_HOST = environ.get('ELASTICSEARCH_LEARNERS_HOST', None) ELASTICSEARCH_LEARNERS_HOST = environ.get('ELASTICSEARCH_LEARNERS_HOST', None)
ELASTICSEARCH_LEARNERS_INDEX = environ.get('ELASTICSEARCH_LEARNERS_INDEX', None) ELASTICSEARCH_LEARNERS_INDEX = environ.get('ELASTICSEARCH_LEARNERS_INDEX', None)
ELASTICSEARCH_LEARNERS_UPDATE_INDEX = environ.get('ELASTICSEARCH_LEARNERS_UPDATE_INDEX', None)
# access credentials for signing requests to AWS.
# For more information see http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
ELASTICSEARCH_AWS_ACCESS_KEY_ID = None
ELASTICSEARCH_AWS_SECRET_ACCESS_KEY = None
# override the default elasticsearch connection class and useful for signing certificates
# e.g. 'analytics_data_api.v0.connections.BotoHttpConnection'
ELASTICSEARCH_CONNECTION_CLASS = None
# only needed with BotoHttpConnection, e.g. 'us-east-1'
ELASTICSEARCH_CONNECTION_DEFAULT_REGION = None
########## END ELASTICSEARCH CONFIGURATION ########## END ELASTICSEARCH CONFIGURATION
########## GENERAL CONFIGURATION ########## GENERAL CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone # See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone
TIME_ZONE = 'UTC' TIME_ZONE = 'UTC'
...@@ -165,6 +175,11 @@ MIDDLEWARE_CLASSES = ( ...@@ -165,6 +175,11 @@ MIDDLEWARE_CLASSES = (
'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware',
'analytics_data_api.v0.middleware.LearnerEngagementTimelineNotFoundErrorMiddleware',
'analytics_data_api.v0.middleware.LearnerNotFoundErrorMiddleware',
'analytics_data_api.v0.middleware.CourseNotSpecifiedErrorMiddleware',
'analytics_data_api.v0.middleware.CourseKeyMalformedErrorMiddleware',
'analytics_data_api.v0.middleware.ParameterValueErrorMiddleware',
) )
########## END MIDDLEWARE CONFIGURATION ########## END MIDDLEWARE CONFIGURATION
...@@ -271,7 +286,11 @@ DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsApiRouter'] ...@@ -271,7 +286,11 @@ DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsApiRouter']
ENABLE_ADMIN_SITE = False ENABLE_ADMIN_SITE = False
# base url to generate link to user api
LMS_USER_ACCOUNT_BASE_URL = None
########## END ANALYTICS DATA API CONFIGURATION ########## END ANALYTICS DATA API CONFIGURATION
DATE_FORMAT = '%Y-%m-%d' DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%dT%H%M%S' DATETIME_FORMAT = '%Y-%m-%dT%H%M%S'
...@@ -19,10 +19,10 @@ DATABASES = { ...@@ -19,10 +19,10 @@ DATABASES = {
}, },
'analytics': { 'analytics': {
'ENGINE': 'django.db.backends.mysql', 'ENGINE': 'django.db.backends.mysql',
'NAME': 'analytics', 'NAME': 'reports_2_0',
'USER': 'root', 'USER': 'readonly001',
'PASSWORD': '', 'PASSWORD': 'meringues unfreehold sisterize morsing',
'HOST': '', 'HOST': 'stage-edx-analytics-report-rds.edx.org',
'PORT': '', 'PORT': '3306',
} }
} }
\ No newline at end of file
...@@ -18,4 +18,11 @@ INSTALLED_APPS += ( ...@@ -18,4 +18,11 @@ INSTALLED_APPS += (
'django_nose', 'django_nose',
) )
LMS_USER_ACCOUNT_BASE_URL = 'http://lms-host'
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
# Default elasticsearch port when running locally
ELASTICSEARCH_LEARNERS_HOST = 'http://localhost:9200/'
ELASTICSEARCH_LEARNERS_INDEX = 'roster_test'
ELASTICSEARCH_LEARNERS_UPDATE_INDEX = 'index_update_test'
boto==2.22.1 # MIT
Django==1.7.5 # BSD License Django==1.7.5 # BSD License
Markdown==2.6 # BSD
django-model-utils==2.2 # BSD django-model-utils==2.2 # BSD
djangorestframework==2.4.4 # BSD djangorestframework==2.4.4 # BSD
ipython==2.4.1 # BSD
django-rest-swagger==0.2.8 # BSD django-rest-swagger==0.2.8 # BSD
djangorestframework-csv==1.3.3 # BSD djangorestframework-csv==1.3.3 # BSD
django-countries==3.2 # MIT django-countries==3.2 # MIT
elasticsearch-dsl==0.0.9 # Apache 2.0
# markdown is used by swagger for rendering the api docs
Markdown==2.6 # BSD
-e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys -e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys
# Test dependencies go here. # Test dependencies go here.
-r base.txt -r base.txt
coverage==3.7.1 coverage==3.7.1
ddt==1.0.1
diff-cover >= 0.2.1 diff-cover >= 0.2.1
django-dynamic-fixture==1.8.1 django-dynamic-fixture==1.8.1
django-nose==1.4.1 django-nose==1.4.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment