Commit 05ee144f by Dennis Jen

Merge pull request #110 from edx/learner-analytics

Learner analytics
parents 3b4aa462 010ae3fa
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
omit = analyticsdataserver/settings* omit = analyticsdataserver/settings*
*wsgi.py *wsgi.py
analytics_data_api/management/commands/generate_fake_course_data.py analytics_data_api/management/commands/generate_fake_course_data.py
source = analyticsdataserver, analytics_data_api
branch = True
[report] [report]
# Regexes for lines to exclude from consideration # Regexes for lines to exclude from consideration
...@@ -9,4 +11,10 @@ exclude_lines = ...@@ -9,4 +11,10 @@ exclude_lines =
# Have to re-enable the standard pragma # Have to re-enable the standard pragma
pragma: no cover pragma: no cover
raise NotImplementedError raise NotImplementedError
\ No newline at end of file
[html]
directory = ${COVERAGE_DIR}/html/
[xml]
output = ${COVERAGE_DIR}/coverage.xml
...@@ -2,16 +2,19 @@ language: python ...@@ -2,16 +2,19 @@ language: python
python: "2.7" python: "2.7"
install: install:
- "pip install coveralls" - "pip install coveralls"
- "make test.install_elasticsearch"
cache: pip cache: pip
# Use docker for builds # Use docker for builds
sudo: false sudo: false
before_script:
- make test.run_elasticsearch
# Recommended by Travis in order to make sure ElasticSearch
# actually starts up. See # https://docs.travis-ci.com/user/database-setup/#ElasticSearch
- sleep 10
script: script:
- make validate - make validate
- git fetch origin master:refs/remotes/origin/master # https://github.com/edx/diff-cover#troubleshooting - git fetch origin master:refs/remotes/origin/master # https://github.com/edx/diff-cover#troubleshooting
- make diff.report - make diff.report
branches:
only:
- master
after_success: after_success:
- coveralls - coveralls
- bash ./scripts/build-stats-to-datadog.sh - bash ./scripts/build-stats-to-datadog.sh
ROOT = $(shell echo "$$PWD") ROOT = $(shell echo "$$PWD")
COVERAGE = $(ROOT)/build/coverage COVERAGE_DIR = $(ROOT)/build/coverage
PACKAGES = analyticsdataserver analytics_data_api PACKAGES = analyticsdataserver analytics_data_api
DATABASES = default analytics DATABASES = default analytics
ELASTICSEARCH_VERSION = 1.5.2
TEST_SETTINGS = analyticsdataserver.settings.test
.PHONY: requirements develop clean diff.report view.diff.report quality .PHONY: requirements develop clean diff.report view.diff.report quality
requirements: requirements:
pip install -q -r requirements/base.txt pip install -q -r requirements/base.txt
test.install_elasticsearch:
curl -L -O https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-$(ELASTICSEARCH_VERSION).zip
unzip elasticsearch-$(ELASTICSEARCH_VERSION).zip
test.run_elasticsearch:
cd elasticsearch-$(ELASTICSEARCH_VERSION) && ./bin/elasticsearch -d
test.requirements: requirements test.requirements: requirements
pip install -q -r requirements/test.txt pip install -q -r requirements/test.txt
...@@ -20,22 +28,22 @@ clean: ...@@ -20,22 +28,22 @@ clean:
coverage erase coverage erase
test: clean test: clean
. ./.test_env && ./manage.py test --settings=analyticsdataserver.settings.test --with-ignore-docstrings \ coverage run ./manage.py test --settings=$(TEST_SETTINGS) \
--exclude-dir=analyticsdataserver/settings --with-coverage --cover-inclusive --cover-branches \ --with-ignore-docstrings --exclude-dir=analyticsdataserver/settings \
--cover-html --cover-html-dir=$(COVERAGE)/html/ \
--cover-xml --cover-xml-file=$(COVERAGE)/coverage.xml \
$(foreach package,$(PACKAGES),--cover-package=$(package)) \
$(PACKAGES) $(PACKAGES)
export COVERAGE_DIR=$(COVERAGE_DIR) && \
coverage html && \
coverage xml
diff.report: diff.report:
diff-cover $(COVERAGE)/coverage.xml --html-report $(COVERAGE)/diff_cover.html diff-cover $(COVERAGE_DIR)/coverage.xml --html-report $(COVERAGE_DIR)/diff_cover.html
diff-quality --violations=pep8 --html-report $(COVERAGE)/diff_quality_pep8.html diff-quality --violations=pep8 --html-report $(COVERAGE_DIR)/diff_quality_pep8.html
diff-quality --violations=pylint --html-report $(COVERAGE)/diff_quality_pylint.html diff-quality --violations=pylint --html-report $(COVERAGE_DIR)/diff_quality_pylint.html
view.diff.report: view.diff.report:
xdg-open file:///$(COVERAGE)/diff_cover.html xdg-open file:///$(COVERAGE_DIR)/diff_cover.html
xdg-open file:///$(COVERAGE)/diff_quality_pep8.html xdg-open file:///$(COVERAGE_DIR)/diff_quality_pep8.html
xdg-open file:///$(COVERAGE)/diff_quality_pylint.html xdg-open file:///$(COVERAGE_DIR)/diff_quality_pylint.html
quality: quality:
pep8 $(PACKAGES) pep8 $(PACKAGES)
......
...@@ -29,6 +29,23 @@ Getting Started ...@@ -29,6 +29,23 @@ Getting Started
$ ./manage.py migrate --noinput $ ./manage.py migrate --noinput
$ ./manage.py migrate --noinput --database=analytics $ ./manage.py migrate --noinput --database=analytics
The learner API endpoints require elasticsearch with a mapping
defined on this `wiki page <https://openedx.atlassian.net/wiki/display/AN/Learner+Analytics#LearnerAnalytics-ElasticSearch>`_.
The connection to elasticsearch can be configured by the
``ELASTICSEARCH_LEARNERS_HOST`` and
``ELASTICSEARCH_LEARNERS_INDEX`` django settings. For testing, you
can install elasticsearch locally:
::
$ make test.install_elasticsearch
To run the cluster for testing:
::
$ make test.run_elasticsearch
3. Create a user and authentication token. Note that the user will be 3. Create a user and authentication token. Note that the user will be
created if one does not exist. created if one does not exist.
......
DISCUSSION = 'discussion'
PROBLEM = 'problem'
VIDEO = 'video'
INDIVIDUAL_TYPES = [DISCUSSION, PROBLEM, VIDEO]
PROBLEMS = 'problems'
VIDEOS = 'videos'
AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS]
from analytics_data_api.constants import engagement_entity_types
ATTEMPTED = 'attempted'
COMPLETED = 'completed'
CONTRIBUTED = 'contributed'
VIEWED = 'viewed'
# map entity types to events
EVENTS = {
engagement_entity_types.DISCUSSION: [CONTRIBUTED],
engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED],
engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED],
engagement_entity_types.VIDEO: [VIEWED],
engagement_entity_types.VIDEOS: [VIEWED],
}
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
class EngagementType(object):
"""
Encapsulates:
- The API consumer-facing display name for engagement types
- The internal question of whether the metric should be counted in terms
of the entity type or the raw number of events.
"""
# Defines the current canonical set of engagement types used in the Learner
# Analytics API.
ALL_TYPES = (
'problems_attempted',
'problems_completed',
'videos_viewed',
'discussion_contributions',
)
def __init__(self, entity_type, event_type):
"""
Initializes an EngagementType for a particular entity and event type.
Arguments:
entity_type (str): the type of module interacted with
event_type (str): the type of interaction on that entity
"""
if entity_type == PROBLEM:
if event_type == ATTEMPTED:
self.name = 'problems_attempted'
self.is_counted_by_entity = True
if event_type == COMPLETED:
self.name = 'problems_completed'
self.is_counted_by_entity = True
elif entity_type == VIDEO:
if event_type == VIEWED:
self.name = 'videos_viewed'
self.is_counted_by_entity = True
elif entity_type == DISCUSSION:
if event_type == CONTRIBUTED:
# Note that the discussion contribution metric counts
# total discussion contributions, not number of
# discussions contributed to.
self.name = 'discussion_contributions'
self.is_counted_by_entity = False
else:
raise ValueError(
'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format(
entity_type=entity_type,
event_type=event_type,
)
)
LEARNER_API_DEFAULT_LIST_PAGE_SIZE = 25
SEGMENTS = ["highly_engaged", "disengaging", "struggling", "inactive", "unenrolled"]
from elasticsearch import Elasticsearch
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from analytics_data_api.management.utils import elasticsearch_settings_defined
class Command(BaseCommand):
help = 'Creates Elasticsearch indices used by the Analytics Data API.'
def handle(self, *args, **options):
if not elasticsearch_settings_defined():
raise CommandError(
'You must define settings.ELASTICSEARCH_LEARNERS_HOST, '
'settings.ELASTICSEARCH_LEARNERS_INDEX, and settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX'
)
es = Elasticsearch([settings.ELASTICSEARCH_LEARNERS_HOST])
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_INDEX):
self.stderr.write('"{}" index already exists.'.format(settings.ELASTICSEARCH_LEARNERS_INDEX))
else:
es.indices.create(
index=settings.ELASTICSEARCH_LEARNERS_INDEX,
body={
'mappings': {
'roster_entry': {
'properties': {
'name': {
'type': 'string'
},
'username': {
'type': 'string', 'index': 'not_analyzed'
},
'email': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'course_id': {
'type': 'string', 'index': 'not_analyzed'
},
'enrollment_mode': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'segments': {
'type': 'string'
},
'cohort': {
'type': 'string', 'index': 'not_analyzed', 'doc_values': True
},
'discussion_contributions': {
'type': 'integer', 'doc_values': True
},
'problems_attempted': {
'type': 'integer', 'doc_values': True
},
'problems_completed': {
'type': 'integer', 'doc_values': True
},
'problem_attempts_per_completed': {
'type': 'float', 'doc_values': True
},
'attempt_ratio_order': {
'type': 'integer', 'doc_values': True
},
'videos_viewed': {
'type': 'integer', 'doc_values': True
},
'enrollment_date': {
'type': 'date', 'doc_values': True
},
}
}
}
}
)
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX):
self.stderr.write('"{}" index already exists.'.format(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX))
else:
es.indices.create(
index=settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX,
body={
'mappings': {
'marker': {
'properties': {
'date': {
'type': 'date', 'doc_values': True
},
'target_index': {
'type': 'string'
},
}
}
}
}
)
from elasticsearch import Elasticsearch
from django.conf import settings
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = 'Removes Elasticsearch indices used by the Analytics Data API'
def handle(self, *args, **options):
es = Elasticsearch([settings.ELASTICSEARCH_LEARNERS_HOST])
for index in [settings.ELASTICSEARCH_LEARNERS_INDEX, settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX]:
if es.indices.exists(settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX):
es.indices.delete(index=index)
...@@ -8,7 +8,7 @@ import random ...@@ -8,7 +8,7 @@ import random
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils import timezone from django.utils import timezone
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.constants import engagement_entity_types, engagement_events
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -32,7 +32,11 @@ class Command(BaseCommand): ...@@ -32,7 +32,11 @@ class Command(BaseCommand):
help = 'Generate fake data' help = 'Generate fake data'
option_list = BaseCommand.option_list + ( option_list = BaseCommand.option_list + (
make_option('-n', '--num-weeks', action='store', type="int", dest='num_weeks', make_option('-n', '--num-weeks', action='store', type="int", dest='num_weeks',
help='"Number of weeks worth of data to generate.'), help='Number of weeks worth of data to generate.'),
make_option('-c', '--course_id', action='store', type='string', dest='course_id',
help='Course ID for which to generate fake data'),
make_option('-u', '--username', action='store', type='string', dest='username',
help='Username for which to generate fake data'),
) )
def generate_daily_data(self, course_id, start_date, end_date): def generate_daily_data(self, course_id, start_date, end_date):
...@@ -182,8 +186,48 @@ class Command(BaseCommand): ...@@ -182,8 +186,48 @@ class Command(BaseCommand):
users_at_start=users_at_start, users_at_start=users_at_start,
users_at_end=random.randint(100, users_at_start)) users_at_end=random.randint(100, users_at_start))
def generate_learner_engagement_data(self, course_id, username, start_date, end_date, max_value=100):
logger.info("Deleting learner engagement module data...")
models.ModuleEngagement.objects.all().delete()
logger.info("Generating learner engagement module data...")
current = start_date
while current < end_date:
current = current + datetime.timedelta(days=1)
for entity_type in engagement_entity_types.INDIVIDUAL_TYPES:
for event in engagement_events.EVENTS[entity_type]:
num_events = random.randint(0, max_value)
if num_events:
for _ in xrange(num_events):
count = random.randint(0, max_value / 20)
entity_id = 'an-id-{}-{}'.format(entity_type, event)
models.ModuleEngagement.objects.create(
course_id=course_id, username=username, date=current,
entity_type=entity_type, entity_id=entity_id, event=event, count=count)
logger.info("Done!")
def generate_learner_engagement_range_data(self, course_id, start_date, end_date, max_value=100):
logger.info("Deleting engagement range data...")
models.ModuleEngagementMetricRanges.objects.all().delete()
logger.info("Generating engagement range data...")
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_ceil = random.random() * max_value * 0.5
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='low', low_value=0, high_value=low_ceil)
high_floor = random.random() * max_value * 0.5 + low_ceil
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='high', low_value=high_floor, high_value=max_value)
def handle(self, *args, **options): def handle(self, *args, **options):
course_id = 'edX/DemoX/Demo_Course' course_id = options.get('course_id', 'edX/DemoX/Demo_Course')
username = options.get('username', 'ed_xavier')
video_id = '0fac49ba' video_id = '0fac49ba'
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9' video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
start_date = datetime.datetime(year=2015, month=1, day=1, tzinfo=timezone.utc) start_date = datetime.datetime(year=2015, month=1, day=1, tzinfo=timezone.utc)
...@@ -199,3 +243,5 @@ class Command(BaseCommand): ...@@ -199,3 +243,5 @@ class Command(BaseCommand):
self.generate_daily_data(course_id, start_date, end_date) self.generate_daily_data(course_id, start_date, end_date)
self.generate_video_data(course_id, video_id, video_module_id) self.generate_video_data(course_id, video_id, video_module_id)
self.generate_video_timeline_data(video_id) self.generate_video_timeline_data(video_id)
self.generate_learner_engagement_data(course_id, username, start_date, end_date)
self.generate_learner_engagement_range_data(course_id, start_date, end_date)
from django.conf import settings
def elasticsearch_settings_defined():
return all(
setting is not None for setting in (
settings.ELASTICSEARCH_LEARNERS_HOST,
settings.ELASTICSEARCH_LEARNERS_INDEX,
settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX
)
)
import datetime
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.management import call_command, CommandError from django.core.management import call_command, CommandError
from django.test import TestCase from django.test import TestCase
...@@ -6,7 +8,7 @@ from rest_framework.authtoken.models import Token ...@@ -6,7 +8,7 @@ from rest_framework.authtoken.models import Token
from analytics_data_api.constants.country import get_country, UNKNOWN_COUNTRY from analytics_data_api.constants.country import get_country, UNKNOWN_COUNTRY
from analytics_data_api.utils import delete_user_auth_token, set_user_auth_token from analytics_data_api.utils import date_range, delete_user_auth_token, set_user_auth_token
class UtilsTests(TestCase): class UtilsTests(TestCase):
...@@ -91,3 +93,30 @@ class CountryTests(TestCase): ...@@ -91,3 +93,30 @@ class CountryTests(TestCase):
# Return unknown country if code is invalid # Return unknown country if code is invalid
self.assertEqual(get_country('A1'), UNKNOWN_COUNTRY) self.assertEqual(get_country('A1'), UNKNOWN_COUNTRY)
self.assertEqual(get_country(None), UNKNOWN_COUNTRY) self.assertEqual(get_country(None), UNKNOWN_COUNTRY)
class DateRangeTests(TestCase):
def test_empty_range(self):
date = datetime.datetime(2016, 1, 1)
self.assertEqual([date for date in date_range(date, date)], [])
def test_range_exclusive(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 2)
self.assertEqual([date for date in date_range(start_date, end_date)], [start_date])
def test_delta_goes_past_end_date(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 3)
time_delta = datetime.timedelta(days=5)
self.assertEqual([date for date in date_range(start_date, end_date, time_delta)], [start_date])
def test_general_range(self):
start_date = datetime.datetime(2016, 1, 1)
end_date = datetime.datetime(2016, 1, 5)
self.assertEqual([date for date in date_range(start_date, end_date)], [
datetime.datetime(2016, 1, 1),
datetime.datetime(2016, 1, 2),
datetime.datetime(2016, 1, 3),
datetime.datetime(2016, 1, 4),
])
from collections import defaultdict import datetime
from importlib import import_module
from django.db.models import Q from django.db.models import Q
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
from analytics_data_api.v0.models import ProblemResponseAnswerDistribution
def delete_user_auth_token(username): def delete_user_auth_token(username):
""" """
...@@ -47,49 +46,6 @@ def matching_tuple(answer): ...@@ -47,49 +46,6 @@ def matching_tuple(answer):
) )
def consolidate_answers(problem):
""" Attempt to consolidate erroneously randomized answers. """
answer_sets = defaultdict(list)
match_tuple_sets = defaultdict(set)
for answer in problem:
answer.consolidated_variant = False
answer_sets[answer.value_id].append(answer)
match_tuple_sets[answer.value_id].add(matching_tuple(answer))
# If a part has more than one unique tuple of matching fields, do not consolidate.
for _, match_tuple_set in match_tuple_sets.iteritems():
if len(match_tuple_set) > 1:
return problem
consolidated_answers = []
for _, answers in answer_sets.iteritems():
consolidated_answer = None
if len(answers) == 1:
consolidated_answers.append(answers[0])
continue
for answer in answers:
if consolidated_answer:
if isinstance(consolidated_answer, ProblemResponseAnswerDistribution):
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
consolidated_answers.append(consolidated_answer)
return consolidated_answers
def dictfetchall(cursor): def dictfetchall(cursor):
"""Returns all rows from a cursor as a dict""" """Returns all rows from a cursor as a dict"""
...@@ -98,3 +54,33 @@ def dictfetchall(cursor): ...@@ -98,3 +54,33 @@ def dictfetchall(cursor):
dict(zip([col[0] for col in desc], row)) dict(zip([col[0] for col in desc], row))
for row in cursor.fetchall() for row in cursor.fetchall()
] ]
def load_fully_qualified_definition(definition):
""" Returns the class given the full definition. """
module_name, class_name = definition.rsplit('.', 1)
module = import_module(module_name)
return getattr(module, class_name)
def date_range(start_date, end_date, delta=datetime.timedelta(days=1)):
"""
Returns a generator that iterates over the date range [start_date, end_date)
(start_date inclusive, end_date exclusive). Each date in the range is
offset from the previous date by a change of `delta`, which defaults
to one day.
Arguments:
start_date (datetime.datetime): The start date of the range, inclusive
end_date (datetime.datetime): The end date of the range, exclusive
delta (datetime.timedelta): The change in time between dates in the
range.
Returns:
Generator: A generator which iterates over all dates in the specified
range.
"""
cur_date = start_date
while cur_date < end_date:
yield cur_date
cur_date += delta
default_app_config = 'analytics_data_api.v0.apps.ApiAppConfig'
from django.apps import AppConfig
from django.conf import settings
from elasticsearch_dsl import connections
from analytics_data_api.utils import load_fully_qualified_definition
class ApiAppConfig(AppConfig):
name = 'analytics_data_api.v0'
def ready(self):
super(ApiAppConfig, self).ready()
if settings.ELASTICSEARCH_LEARNERS_HOST:
connection_params = {'hosts': [settings.ELASTICSEARCH_LEARNERS_HOST]}
if settings.ELASTICSEARCH_CONNECTION_CLASS:
connection_params['connection_class'] = \
load_fully_qualified_definition(settings.ELASTICSEARCH_CONNECTION_CLASS)
# aws settings
connection_params['aws_access_key_id'] = settings.ELASTICSEARCH_AWS_ACCESS_KEY_ID
connection_params['aws_secret_access_key'] = settings.ELASTICSEARCH_AWS_SECRET_ACCESS_KEY
connection_params['region'] = settings.ELASTICSEARCH_CONNECTION_DEFAULT_REGION
# Remove 'None' values so that we don't overwrite defaults
connection_params = {key: val for key, val in connection_params.items() if val is not None}
connections.connections.create_connection(**connection_params)
import json
import time
from boto.connection import AWSAuthConnection
from elasticsearch import Connection
class BotoHttpConnection(Connection):
"""
Uses AWS configured connection to sign requests before they're sent to
elasticsearch nodes.
"""
connection = None
def __init__(self, host='localhost', port=443, aws_access_key_id=None, aws_secret_access_key=None,
region=None, **kwargs):
super(BotoHttpConnection, self).__init__(host=host, port=port, **kwargs)
connection_params = {'host': host, 'port': port}
# If not provided, boto will attempt to use default environment variables to fill
# the access credentials.
connection_params['aws_access_key_id'] = aws_access_key_id
connection_params['aws_secret_access_key'] = aws_secret_access_key
connection_params['region'] = region
# Remove 'None' values so that we don't overwrite defaults
connection_params = {key: val for key, val in connection_params.items() if val is not None}
self.connection = ESConnection(**connection_params)
# pylint: disable=unused-argument
def perform_request(self, method, url, params=None, body=None, timeout=None, ignore=()):
"""
Called when making requests elasticsearch. Requests are signed and
http status, headers, and response is returned.
Note: the "timeout" kwarg is ignored in this case. Boto manages the timeout
and the default is 70 seconds.
See: https://github.com/boto/boto/blob/develop/boto/connection.py#L533
"""
if not isinstance(body, basestring):
body = json.dumps(body)
start = time.time()
response = self.connection.make_request(method, url, params=params, data=body)
duration = time.time() - start
raw_data = response.read()
# raise errors based on http status codes and let the client handle them
if not (200 <= response.status < 300) and response.status not in ignore:
self.log_request_fail(method, url, body, duration, response.status)
self._raise_error(response.status, raw_data)
self.log_request_success(method, url, url, body, response.status, raw_data, duration)
return response.status, dict(response.getheaders()), raw_data
class ESConnection(AWSAuthConnection):
"""
Use to sign requests for an AWS hosted elasticsearch cluster.
"""
def __init__(self, *args, **kwargs):
region = kwargs.pop('region', None)
kwargs.setdefault('is_secure', True)
super(ESConnection, self).__init__(*args, **kwargs)
self.auth_region_name = region
self.auth_service_name = 'es'
def _required_auth_capability(self):
"""
Supplies the capabilities of the auth handler and signs the responses to
AWS using HMAC-4.
"""
return ['hmac-v4']
import abc
class BaseError(Exception):
"""
Base error.
"""
__metaclass__ = abc.ABCMeta
message = None
def __str__(self):
return self.message
class LearnerNotFoundError(BaseError):
"""
Raise learner not found for a course.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
username = kwargs.pop('username')
super(LearnerNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(username=username, course_id=course_id)
@property
def message_template(self):
return 'Learner {username} not found for course {course_id}.'
class LearnerEngagementTimelineNotFoundError(BaseError):
"""
Raise learner engagement timeline not found for a course.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
username = kwargs.pop('username')
super(LearnerEngagementTimelineNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(username=username, course_id=course_id)
@property
def message_template(self):
return 'Learner {username} engagement timeline not found for course {course_id}.'
class CourseNotSpecifiedError(BaseError):
"""
Raise if course not specified.
"""
def __init__(self, *args, **kwargs):
super(CourseNotSpecifiedError, self).__init__(*args, **kwargs)
self.message = 'Course id/key not specified.'
class CourseKeyMalformedError(BaseError):
"""
Raise if course id/key malformed.
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
super(CourseKeyMalformedError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(course_id=course_id)
@property
def message_template(self):
return 'Course id/key {course_id} malformed.'
class ParameterValueError(BaseError):
"""Raise if multiple incompatible parameters were provided."""
def __init__(self, message, *args, **kwargs):
super(ParameterValueError, self).__init__(*args, **kwargs)
self.message = message
import abc
from django.http.response import JsonResponse
from rest_framework import status
from analytics_data_api.v0.exceptions import (
CourseKeyMalformedError,
CourseNotSpecifiedError,
LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError,
ParameterValueError,
)
class BaseProcessErrorMiddleware(object):
"""
Base error.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractproperty
def error(self):
""" Error class to catch. """
pass
@abc.abstractproperty
def error_code(self):
""" Error code to return. """
pass
@abc.abstractproperty
def status_code(self):
""" HTTP status code to return. """
pass
def process_exception(self, _request, exception):
if isinstance(exception, self.error):
return JsonResponse({
"error_code": self.error_code,
"developer_message": str(exception)
}, status=self.status_code)
class LearnerNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if learner not found.
"""
@property
def error(self):
return LearnerNotFoundError
@property
def error_code(self):
return 'no_learner_for_course'
@property
def status_code(self):
return status.HTTP_404_NOT_FOUND
class LearnerEngagementTimelineNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if learner engagement timeline not found.
"""
@property
def error(self):
return LearnerEngagementTimelineNotFoundError
@property
def error_code(self):
return 'no_learner_engagement_timeline'
@property
def status_code(self):
return status.HTTP_404_NOT_FOUND
class CourseNotSpecifiedErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 course not specified.
"""
@property
def error(self):
return CourseNotSpecifiedError
@property
def error_code(self):
return 'course_not_specified'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
class CourseKeyMalformedErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 if course key is malformed.
"""
@property
def error(self):
return CourseKeyMalformedError
@property
def error_code(self):
return 'course_key_malformed'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
class ParameterValueErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 400 if illegal parameter values are provided.
"""
@property
def error(self):
return ParameterValueError
@property
def error_code(self):
return 'illegal_parameter_values'
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST
import datetime
from itertools import groupby
from django.conf import settings
from django.db import models from django.db import models
from django.db.models import Count, Sum
# some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String # pylint: disable=no-name-in-module
from analytics_data_api.constants import country, genders from analytics_data_api.constants import country, genders, learner
from analytics_data_api.constants.engagement_types import EngagementType
from analytics_data_api.utils import date_range
class CourseActivityWeekly(models.Model): class CourseActivityWeekly(models.Model):
...@@ -206,3 +215,270 @@ class Video(BaseVideo): ...@@ -206,3 +215,270 @@ class Video(BaseVideo):
class Meta(BaseVideo.Meta): class Meta(BaseVideo.Meta):
db_table = 'video' db_table = 'video'
class RosterUpdate(DocType):
date = Date()
# pylint: disable=old-style-class
class Meta:
index = settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX
doc_type = 'marker'
@classmethod
def get_last_updated(cls):
return cls.search().query('term', target_index=settings.ELASTICSEARCH_LEARNERS_INDEX).execute()
class RosterEntry(DocType):
course_id = String()
username = String()
name = String()
email = String()
enrollment_mode = String()
cohort = String()
segments = String() # segments is an array/list of strings
problems_attempted = Integer()
problems_completed = Integer()
problem_attempts_per_completed = Float()
# Useful for ordering problem_attempts_per_completed (because results can include null, which is
# different from zero). attempt_ratio_order is equal to the number of problem attempts if
# problem_attempts_per_completed is > 1 and set to -problem_attempts if
# problem_attempts_per_completed = 1.
attempt_ratio_order = Integer()
discussion_contributions = Integer()
videos_watched = Integer()
enrollment_date = Date()
last_updated = Date()
# pylint: disable=old-style-class
class Meta:
index = settings.ELASTICSEARCH_LEARNERS_INDEX
doc_type = 'roster_entry'
@classmethod
def get_course_user(cls, course_id, username):
return cls.search().query('term', course_id=course_id).query(
'term', username=username).execute()
@classmethod
def get_users_in_course(
cls,
course_id,
segments=None,
ignore_segments=None,
cohort=None,
enrollment_mode=None,
text_search=None,
sort_policies=None,
):
"""
Construct a search query for all users in `course_id` and return
the Search object.
sort_policies is an array, where the first element is the primary sort.
Elements in the array are dicts with fields: order_by (field to sort by)
and sort_order (either 'asc' or 'desc'). Default to 'username' and 'asc'.
Raises `ValueError` if both `segments` and `ignore_segments` are provided.
"""
if not sort_policies:
sort_policies = [{
'order_by': None,
'sort_order': None
}]
# set default sort policy to 'username' and 'asc'
for field, default in [('order_by', 'username'), ('sort_order', 'asc')]:
if sort_policies[0][field] is None:
sort_policies[0][field] = default
# Error handling
if segments and ignore_segments:
raise ValueError('Cannot combine `segments` and `ignore_segments` parameters.')
for segment in (segments or list()) + (ignore_segments or list()):
if segment not in learner.SEGMENTS:
raise ValueError("segments/ignore_segments value '{segment}' must be one of: ({segments})".format(
segment=segment, segments=', '.join(learner.SEGMENTS)
))
order_by_options = (
'username', 'email', 'discussion_contributions', 'problems_attempted', 'problems_completed',
'problem_attempts_per_completed', 'attempt_ratio_order', 'videos_viewed'
)
sort_order_options = ('asc', 'desc')
for sort_policy in sort_policies:
if sort_policy['order_by'] not in order_by_options:
raise ValueError("order_by value '{order_by}' must be one of: ({order_by_options})".format(
order_by=sort_policy['order_by'], order_by_options=', '.join(order_by_options)
))
if sort_policy['sort_order'] not in sort_order_options:
raise ValueError("sort_order value '{sort_order}' must be one of: ({sort_order_options})".format(
sort_order=sort_policy['sort_order'], sort_order_options=', '.join(sort_order_options)
))
search = cls.search()
search.query = Q('bool', must=[Q('term', course_id=course_id)])
# Filtering/Search
if segments:
search.query.must.append(Q('bool', should=[Q('term', segments=segment) for segment in segments]))
elif ignore_segments:
for segment in ignore_segments:
search = search.query(~Q('term', segments=segment))
if cohort:
search = search.query('term', cohort=cohort)
if enrollment_mode:
search = search.query('term', enrollment_mode=enrollment_mode)
if text_search:
search.query.must.append(Q('multi_match', query=text_search, fields=['name', 'username', 'email']))
# construct the sort hierarchy
search = search.sort(*[
{
sort_policy['order_by']: {
'order': sort_policy['sort_order'],
# ordering of missing fields
'missing': '_last' if sort_policy['sort_order'] == 'asc' else '_first'
}
}
for sort_policy in sort_policies
])
return search
@classmethod
def get_course_metadata(cls, course_id):
"""
Returns the number of students belonging to particular cohorts,
segments, and enrollment modes within a course. Returns data in the
following format:
{
'cohorts': {
<cohort_name>: <learner_count>
},
'segments': {
<segment_name>: <learner_count>
},
'enrollment_modes': {
<enrollment_mode_name>: <learner_count>
}
}
"""
search = cls.search()
search.query = Q('bool', must=[Q('term', course_id=course_id)])
search.aggs.bucket('enrollment_modes', 'terms', field='enrollment_mode')
search.aggs.bucket('segments', 'terms', field='segments')
search.aggs.bucket('cohorts', 'terms', field='cohort')
response = search.execute()
# Build up the map of aggregation name to count
aggregations = {
aggregation_name: {
bucket.key: bucket.doc_count
for bucket in response.aggregations[aggregation_name].buckets
}
for aggregation_name in response.aggregations
}
# Add default values of 0 for segments with no learners
for segment in learner.SEGMENTS:
if segment not in aggregations['segments']:
aggregations['segments'][segment] = 0
return aggregations
class ModuleEngagementTimelineManager(models.Manager):
"""
Modifies the ModuleEngagement queryset to aggregate engagement data for
the learner engagement timeline.
"""
def get_timeline(self, course_id, username):
queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \
.values('date', 'entity_type', 'event') \
.annotate(total_count=Sum('count')) \
.annotate(distinct_entity_count=Count('entity_id')) \
.order_by('date')
timelines = []
for date, engagements in groupby(queryset, lambda x: (x['date'])):
# Iterate over engagements for this day and create a single day with
# engagement data.
day = {
u'date': date,
}
for engagement in engagements:
engagement_type = EngagementType(engagement['entity_type'], engagement['event'])
if engagement_type.is_counted_by_entity:
count_delta = engagement['distinct_entity_count']
else:
count_delta = engagement['total_count']
day[engagement_type.name] = day.get(engagement_type.name, 0) + count_delta
timelines.append(day)
# Fill in dates that may be missing, since the result store doesn't
# store empty engagement entries.
full_timeline = []
default_timeline_entry = {engagement_type: 0 for engagement_type in EngagementType.ALL_TYPES}
for index, current_date in enumerate(timelines):
full_timeline.append(current_date)
try:
next_date = timelines[index + 1]
except IndexError:
continue
one_day = datetime.timedelta(days=1)
if next_date['date'] > current_date['date'] + one_day:
full_timeline += [
dict(date=date, **default_timeline_entry)
for date in date_range(current_date['date'] + one_day, next_date['date'])
]
return full_timeline
class ModuleEngagement(models.Model):
"""User interactions with entities within the courseware."""
course_id = models.CharField(db_index=True, max_length=255)
username = models.CharField(max_length=255)
date = models.DateTimeField()
# This will be one of "problem", "video" or "discussion"
entity_type = models.CharField(max_length=255)
# For problems this will be the usage key, for videos it will be the html encoded module ID,
# for forums it will be the commentable_id
entity_id = models.CharField(max_length=255)
# A description of what interaction occurred, e.g. "contributed" or "viewed"
event = models.CharField(max_length=255)
# The number of times the user interacted with this entity in this way on this day.
count = models.IntegerField()
objects = ModuleEngagementTimelineManager()
class Meta(object):
db_table = 'module_engagement'
class ModuleEngagementMetricRanges(models.Model):
"""
Represents the low and high values for a module engagement entity and event pair,
known as the metric. The range_type will either be high or low, bounded by
low_value and high_value.
"""
course_id = models.CharField(db_index=True, max_length=255)
start_date = models.DateTimeField()
# This is a left-closed interval. No data from the end_date is included in the analysis.
end_date = models.DateTimeField()
metric = models.CharField(max_length=50)
range_type = models.CharField(max_length=50)
# Also a left-closed interval, so any metric whose value is equal to the high_value
# is not included in this range.
high_value = models.FloatField()
low_value = models.FloatField()
class Meta(object):
db_table = 'module_engagement_metric_ranges'
from urlparse import urljoin
from django.conf import settings from django.conf import settings
from rest_framework import serializers from rest_framework import pagination, serializers
from analytics_data_api.constants import enrollment_modes, genders from analytics_data_api.constants import (
engagement_entity_types,
engagement_events,
enrollment_modes,
genders,
)
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
...@@ -169,13 +175,16 @@ class SequentialOpenDistributionSerializer(ModelSerializerWithCreatedField): ...@@ -169,13 +175,16 @@ class SequentialOpenDistributionSerializer(ModelSerializerWithCreatedField):
) )
class BaseCourseEnrollmentModelSerializer(ModelSerializerWithCreatedField): class DefaultIfNoneMixin(object):
date = serializers.DateField(format=settings.DATE_FORMAT)
def default_if_none(self, value, default=0): def default_if_none(self, value, default=0):
return value if value is not None else default return value if value is not None else default
class BaseCourseEnrollmentModelSerializer(DefaultIfNoneMixin, ModelSerializerWithCreatedField):
date = serializers.DateField(format=settings.DATE_FORMAT)
class CourseEnrollmentDailySerializer(BaseCourseEnrollmentModelSerializer): class CourseEnrollmentDailySerializer(BaseCourseEnrollmentModelSerializer):
""" Representation of course enrollment for a single day and course. """ """ Representation of course enrollment for a single day and course. """
...@@ -306,3 +315,150 @@ class VideoTimelineSerializer(ModelSerializerWithCreatedField): ...@@ -306,3 +315,150 @@ class VideoTimelineSerializer(ModelSerializerWithCreatedField):
'num_views', 'num_views',
'created' 'created'
) )
class LastUpdatedSerializer(serializers.Serializer):
last_updated = serializers.DateField(source='date', format=settings.DATE_FORMAT)
class LearnerSerializer(serializers.Serializer, DefaultIfNoneMixin):
username = serializers.CharField(source='username')
enrollment_mode = serializers.CharField(source='enrollment_mode')
name = serializers.CharField(source='name')
account_url = serializers.SerializerMethodField('get_account_url')
email = serializers.CharField(source='email')
segments = serializers.Field(source='segments')
engagements = serializers.SerializerMethodField('get_engagements')
enrollment_date = serializers.DateField(source='enrollment_date', format=settings.DATE_FORMAT)
cohort = serializers.CharField(source='cohort')
def transform_segments(self, _obj, value):
# returns null instead of empty strings
return value or []
def transform_cohort(self, _obj, value):
# returns null instead of empty strings
return value or None
def get_account_url(self, obj):
if settings.LMS_USER_ACCOUNT_BASE_URL:
return urljoin(settings.LMS_USER_ACCOUNT_BASE_URL, obj.username)
else:
return None
def get_engagements(self, obj):
"""
Add the engagement totals.
"""
engagements = {}
# fill in these fields will 0 if values not returned/found
default_if_none_fields = ['discussion_contributions', 'problems_attempted',
'problems_completed', 'videos_viewed']
for field in default_if_none_fields:
engagements[field] = self.default_if_none(getattr(obj, field, None), 0)
# preserve null values for problem attempts per completed
engagements['problem_attempts_per_completed'] = getattr(obj, 'problem_attempts_per_completed', None)
return engagements
class EdxPaginationSerializer(pagination.PaginationSerializer):
"""
Adds values to the response according to edX REST API Conventions.
"""
count = serializers.Field(source='paginator.count')
num_pages = serializers.Field(source='paginator.num_pages')
class ElasticsearchDSLSearchSerializer(EdxPaginationSerializer):
def __init__(self, *args, **kwargs):
"""Make sure that the elasticsearch query is executed."""
# Because the elasticsearch-dsl search object has a different
# API from the queryset object that's expected by the django
# Paginator object, we have to manually execute the query.
# Note that the `kwargs['instance']` is the Page object, and
# `kwargs['instance'].object_list` is actually an
# elasticsearch-dsl search object.
kwargs['instance'].object_list = kwargs['instance'].object_list.execute()
super(ElasticsearchDSLSearchSerializer, self).__init__(*args, **kwargs)
class EngagementDaySerializer(DefaultIfNoneMixin, serializers.Serializer):
date = serializers.DateField(format=settings.DATE_FORMAT)
problems_attempted = serializers.IntegerField(required=True, default=0)
problems_completed = serializers.IntegerField(required=True, default=0)
discussion_contributions = serializers.IntegerField(required=True, default=0)
videos_viewed = serializers.IntegerField(required=True, default=0)
def transform_problems_attempted(self, _obj, value):
return self.default_if_none(value, 0)
def transform_problems_completed(self, _obj, value):
return self.default_if_none(value, 0)
def transform_discussion_contributions(self, _obj, value):
return self.default_if_none(value, 0)
def transform_videos_viewed(self, _obj, value):
return self.default_if_none(value, 0)
class DateRangeSerializer(serializers.Serializer):
start = serializers.DateTimeField(source='start_date', format=settings.DATE_FORMAT)
end = serializers.DateTimeField(source='end_date', format=settings.DATE_FORMAT)
class EnagementRangeMetricSerializer(serializers.Serializer):
"""
Serializes ModuleEngagementMetricRanges (low_range and high_range) into
the below_average, average, above_average ranges represented as arrays.
"""
below_average = serializers.SerializerMethodField('get_below_average_range')
average = serializers.SerializerMethodField('get_average_range')
above_average = serializers.SerializerMethodField('get_above_average_range')
def get_average_range(self, obj):
metric_range = [
obj['low_range'].high_value if obj['low_range'] else None,
obj['high_range'].low_value if obj['high_range'] else None,
]
return metric_range
def get_below_average_range(self, obj):
return self._get_range(obj['low_range'])
def get_above_average_range(self, obj):
return self._get_range(obj['high_range'])
def _get_range(self, metric_range):
return [metric_range.low_value, metric_range.high_value] if metric_range else [None, None]
class CourseLearnerMetadataSerializer(serializers.Serializer):
enrollment_modes = serializers.Field(source='es_data.enrollment_modes')
segments = serializers.Field(source='es_data.segments')
cohorts = serializers.Field(source='es_data.cohorts')
engagement_ranges = serializers.SerializerMethodField('get_engagement_ranges')
def get_engagement_ranges(self, obj):
query_set = obj['engagement_ranges']
engagement_ranges = {
'date_range': DateRangeSerializer(query_set[0] if len(query_set) else None).data
}
# go through each entity and event type combination and fill in the ranges
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_range_queryset = query_set.filter(metric=metric, range_type='low')
high_range_queryset = query_set.filter(metric=metric, range_type='high')
engagement_ranges.update({
metric: EnagementRangeMetricSerializer({
'low_range': low_range_queryset[0] if len(low_range_queryset) else None,
'high_range': high_range_queryset[0] if len(high_range_queryset) else None,
}).data
})
return engagement_ranges
import socket
from django.test import TestCase
from elasticsearch.exceptions import ElasticsearchException
from mock import patch
from analytics_data_api.v0.connections import BotoHttpConnection, ESConnection
class ESConnectionTests(TestCase):
def test_constructor_params(self):
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='access_key',
aws_secret_access_key='secret',
region='region_123')
self.assertEqual(connection.auth_region_name, 'region_123')
self.assertEqual(connection.aws_access_key_id, 'access_key')
self.assertEqual(connection.aws_secret_access_key, 'secret')
def test_signing(self):
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='my_access_key',
aws_secret_access_key='secret',
region='region_123')
# create a request and sign it
request = connection.build_base_http_request('GET', '/', None)
request.authorize(connection)
# confirm the header contains signing method and key id
auth_header = request.headers['Authorization']
self.assertTrue('AWS4-HMAC-SHA256' in auth_header)
self.assertTrue('my_access_key' in auth_header)
def test_timeout(self):
def fake_connection(_address):
raise socket.timeout('fake error')
socket.create_connection = fake_connection
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
aws_access_key_id='access_key',
aws_secret_access_key='secret',
region='region_123')
connection.num_retries = 0
with self.assertRaises(socket.error):
connection.make_request('GET', 'https://example.com')
class BotoHttpConnectionTests(TestCase):
@patch('analytics_data_api.v0.connections.ESConnection.make_request')
def test_perform_request_success(self, mock_response):
mock_response.return_value.status = 200
connection = BotoHttpConnection(aws_access_key_id='access_key', aws_secret_access_key='secret')
with patch('elasticsearch.connection.base.logger.info') as mock_logger:
status, _header, _data = connection.perform_request('get', 'http://example.com')
self.assertEqual(status, 200)
self.assertGreater(mock_logger.call_count, 0)
@patch('analytics_data_api.v0.connections.ESConnection.make_request')
def test_perform_request_error(self, mock_response):
mock_response.return_value.status = 500
connection = BotoHttpConnection(aws_access_key_id='access_key', aws_secret_access_key='secret')
with self.assertRaises(ElasticsearchException):
with patch('elasticsearch.connection.base.logger.debug') as mock_logger:
connection.perform_request('get', 'http://example.com')
self.assertGreater(mock_logger.call_count, 0)
import json
from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.keys import CourseKey
from rest_framework import status
DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014' DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014'
...@@ -12,3 +15,24 @@ class DemoCourseMixin(object): ...@@ -12,3 +15,24 @@ class DemoCourseMixin(object):
cls.course_id = DEMO_COURSE_ID cls.course_id = DEMO_COURSE_ID
cls.course_key = CourseKey.from_string(cls.course_id) cls.course_key = CourseKey.from_string(cls.course_id)
super(DemoCourseMixin, cls).setUpClass() super(DemoCourseMixin, cls).setUpClass()
class VerifyCourseIdMixin(object):
def verify_no_course_id(self, response):
""" Assert that a course ID must be provided. """
self.assertEquals(response.status_code, status.HTTP_400_BAD_REQUEST)
expected = {
u"error_code": u"course_not_specified",
u"developer_message": u"Course id/key not specified."
}
self.assertDictEqual(json.loads(response.content), expected)
def verify_bad_course_id(self, response, course_id='malformed-course-id'):
""" Assert that a course ID must be valid. """
self.assertEquals(response.status_code, status.HTTP_400_BAD_REQUEST)
expected = {
u"error_code": u"course_key_malformed",
u"developer_message": u"Course id/key {} malformed.".format(course_id)
}
self.assertDictEqual(json.loads(response.content), expected)
import datetime
import json
import ddt
from django.utils.http import urlquote
from django_dynamic_fixture import G
import pytz
from rest_framework import status
from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
from analytics_data_api.v0 import models
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin
@ddt.ddt
class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
DEFAULT_USERNAME = 'ed_xavier'
path_template = '/api/v0/engagement_timelines/{}/?course_id={}'
def create_engagement(self, entity_type, event_type, entity_id, count, date=None):
"""Create a ModuleEngagement model"""
if date is None:
date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
G(
models.ModuleEngagement,
course_id=self.course_id,
username=self.DEFAULT_USERNAME,
date=date,
entity_type=entity_type,
entity_id=entity_id,
event=event_type,
count=count,
)
@ddt.data(
(PROBLEM, ATTEMPTED, 'problems_attempted', True),
(PROBLEM, COMPLETED, 'problems_completed', True),
(VIDEO, VIEWED, 'videos_viewed', True),
(DISCUSSION, CONTRIBUTED, 'discussion_contributions', False),
)
@ddt.unpack
def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation):
"""
Verify that some metrics are counted by unique ID, while some are
counted by total interactions.
"""
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
expected_data = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 0,
}
]
}
if expect_id_aggregation:
expected_data['days'][0][metric_display_name] = 2
else:
expected_data['days'][0][metric_display_name] = 10
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
self.assertEquals(
response.data,
expected_data
)
def test_timeline(self):
"""
Smoke test the learner engagement timeline.
"""
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one)
self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two)
self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 6,
'problems_attempted': 1,
'problems_completed': 1,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 2,
'problems_completed': 0,
'videos_viewed': 1
},
]
}
self.assertEquals(response.data, expected)
def test_day_gap(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc)
last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc)
self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day)
self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-05-26',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 1
},
{
'date': '2015-05-27',
'discussion_contributions': 0,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 0
},
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 1,
'problems_completed': 0,
'videos_viewed': 0
},
]
}
self.assertEquals(response.data, expected)
def test_not_found(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, status.HTTP_404_NOT_FOUND)
expected = {
u"error_code": u"no_learner_engagement_timeline",
u"developer_message": u"Learner {} engagement timeline not found for course {}.".format(
self.DEFAULT_USERNAME, self.course_id)
}
self.assertDictEqual(json.loads(response.content), expected)
def test_no_course_id(self):
base_path = '/api/v0/engagement_timelines/{}'
response = self.authenticated_get((base_path).format('ed_xavier'))
self.verify_no_course_id(response)
def test_bad_course_id(self):
path = self.path_template.format(self.DEFAULT_USERNAME, 'malformed-course-id')
response = self.authenticated_get(path)
self.verify_bad_course_id(response)
import copy
import datetime
from itertools import groupby
import json
from urllib import urlencode
import ddt
from django_dynamic_fixture import G
from elasticsearch import Elasticsearch
from mock import patch, Mock
import pytz
from rest_framework import status
from django.conf import settings
from django.core import management
from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants import engagement_entity_types, engagement_events
from analytics_data_api.v0.models import ModuleEngagementMetricRanges
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin
class LearnerAPITestMixin(object):
"""Manages an elasticsearch index for testing the learner API."""
def setUp(self):
"""Creates the index and defines a mapping."""
super(LearnerAPITestMixin, self).setUp()
self._es = Elasticsearch([settings.ELASTICSEARCH_LEARNERS_HOST])
management.call_command('create_elasticsearch_learners_indices')
self.addCleanup(lambda: management.call_command('delete_elasticsearch_learners_indices'))
def _create_learner(
self,
username,
course_id,
name=None,
email=None,
enrollment_mode='honor',
segments=None,
cohort='Team edX',
discussion_contributions=0,
problems_attempted=0,
problems_completed=0,
problem_attempts_per_completed=None,
attempt_ratio_order=0,
videos_viewed=0,
enrollment_date='2015-01-28',
):
"""Create a single learner roster entry in the elasticsearch index."""
body = {
'username': username,
'course_id': course_id,
'name': name if name is not None else username,
'email': email if email is not None else '{}@example.com'.format(username),
'enrollment_mode': enrollment_mode,
'discussion_contributions': discussion_contributions,
'problems_attempted': problems_attempted,
'problems_completed': problems_completed,
'attempt_ratio_order': attempt_ratio_order,
'videos_viewed': videos_viewed,
'enrollment_date': enrollment_date,
}
# leave null fields from being stored in the index. Otherwise, they will have
# an explicit null value and we want to test for the case when they're not returned
optional_fields = [('segments', segments), ('cohort', cohort),
('problem_attempts_per_completed', problem_attempts_per_completed)]
for optional_field in optional_fields:
if optional_field[1]:
body[optional_field[0]] = optional_field[1]
self._es.create(
index=settings.ELASTICSEARCH_LEARNERS_INDEX,
doc_type='roster_entry',
body=body
)
def create_learners(self, learners):
"""
Creates multiple learner roster entries. `learners` is a list of
dicts, each representing a learner which must at least contain
the keys 'username' and 'course_id'. Other learner fields can
be provided as additional keys in the dict - see the mapping
defined in `setUp`.
"""
for learner in learners:
self._create_learner(**learner)
self._es.indices.refresh(index=settings.ELASTICSEARCH_LEARNERS_INDEX)
def create_update_index(self, date=None):
"""
Created an index with the date of when the learner index was updated.
"""
self._es.create(
index=settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX,
doc_type='marker',
body={
'date': date,
'target_index': settings.ELASTICSEARCH_LEARNERS_INDEX,
}
)
self._es.indices.refresh(index=settings.ELASTICSEARCH_LEARNERS_UPDATE_INDEX)
@ddt.ddt
class LearnerTests(VerifyCourseIdMixin, LearnerAPITestMixin, TestCaseWithAuthentication):
"""Tests for the single learner endpoint."""
path_template = '/api/v0/learners/{}/?course_id={}'
@ddt.data(
('ed_xavier', 'Edward Xavier', 'edX/DemoX/Demo_Course', 'honor', ['has_potential'], 'Team edX',
43, 3, 6, 0, 8.4, 2, '2015-04-24', '2015-08-05'),
('ed_xavier', 'Edward Xavier', 'edX/DemoX/Demo_Course', 'verified'),
)
@ddt.unpack
def test_get_user(self, username, name, course_id, enrollment_mode, segments=None, cohort=None,
problems_attempted=None, problems_completed=None, videos_viewed=None,
discussion_contributions=None, problem_attempts_per_completed=None,
attempt_ratio_order=None, enrollment_date=None, last_updated=None):
self.create_learners([{
"username": username,
"name": name,
"course_id": course_id,
"enrollment_mode": enrollment_mode,
"segments": segments,
"cohort": cohort,
"problems_attempted": problems_attempted,
"problems_completed": problems_completed,
"videos_viewed": videos_viewed,
"discussion_contributions": discussion_contributions,
"problem_attempts_per_completed": problem_attempts_per_completed,
"attempt_ratio_order": attempt_ratio_order,
"enrollment_date": enrollment_date,
}])
self.create_update_index(last_updated)
response = self.authenticated_get(self.path_template.format(username, course_id))
self.assertEquals(response.status_code, 200)
expected = {
"username": username,
"enrollment_mode": enrollment_mode,
"name": name,
"email": "{}@example.com".format(username),
"account_url": "http://lms-host/{}".format(username),
"segments": segments or [],
"cohort": cohort,
"engagements": {
"problems_attempted": problems_attempted or 0,
"problems_completed": problems_completed or 0,
"videos_viewed": videos_viewed or 0,
"discussion_contributions": discussion_contributions or 0,
"problem_attempts_per_completed": problem_attempts_per_completed,
},
"enrollment_date": enrollment_date,
"last_updated": last_updated,
}
self.assertDictEqual(expected, response.data)
@patch('analytics_data_api.v0.models.RosterEntry.get_course_user', Mock(return_value=[]))
def test_not_found(self):
user_name = 'a_user'
course_id = 'edX/DemoX/Demo_Course'
response = self.authenticated_get(self.path_template.format(user_name, course_id))
self.assertEquals(response.status_code, status.HTTP_404_NOT_FOUND)
expected = {
u"error_code": u"no_learner_for_course",
u"developer_message": u"Learner a_user not found for course edX/DemoX/Demo_Course."
}
self.assertDictEqual(json.loads(response.content), expected)
def test_no_course_id(self):
base_path = '/api/v0/learners/{}'
response = self.authenticated_get((base_path).format('ed_xavier'))
self.verify_no_course_id(response)
def test_bad_course_id(self):
path = self.path_template.format('ed_xavier', 'malformed-course-id')
response = self.authenticated_get(path)
self.verify_bad_course_id(response)
@ddt.ddt
class LearnerListTests(LearnerAPITestMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
"""Tests for the learner list endpoint."""
def setUp(self):
super(LearnerListTests, self).setUp()
self.course_id = 'edX/DemoX/Demo_Course'
self.create_update_index('2015-09-28')
def _get(self, course_id, **query_params):
"""Helper to send a GET request to the API."""
query_params['course_id'] = course_id
return self.authenticated_get('/api/v0/learners/', query_params)
def assert_learners_returned(self, response, expected_learners):
"""
Verify that the learners in the response match the expected
learners, in order. Each learner in `expected_learners` is a
dictionary subset of the expected returned representation. If
`expected_learners` is None, assert that no learners were
returned.
"""
self.assertEqual(response.status_code, 200)
payload = json.loads(response.content)
returned_learners = payload['results']
if expected_learners is None:
self.assertEqual(returned_learners, list())
else:
self.assertEqual(len(expected_learners), len(returned_learners))
for expected_learner, returned_learner in zip(expected_learners, returned_learners):
self.assertDictContainsSubset(expected_learner, returned_learner)
def test_all_learners(self):
usernames = ['dan', 'dennis', 'victor', 'olga', 'gabe', 'brian', 'alison']
self.create_learners([{'username': username, 'course_id': self.course_id} for username in usernames])
response = self._get(self.course_id)
# Default ordering is by username
self.assert_learners_returned(response, [{'username': username} for username in sorted(usernames)])
def test_course_id(self):
self.create_learners([
{'username': 'user_1', 'course_id': self.course_id},
{'username': 'user_2', 'course_id': 'other/course/id'}
])
response = self._get(self.course_id)
self.assert_learners_returned(response, [{'username': 'user_1'}])
def test_data(self):
self.create_learners([{
'username': 'user_1',
'course_id': self.course_id,
'enrollment_mode': 'honor',
'segments': ['a', 'b'],
'cohort': 'alpha',
"problems_attempted": 43,
"problems_completed": 3,
"videos_viewed": 6,
"discussion_contributions": 0,
"problem_attempts_per_completed": 23.14,
}])
response = self._get(self.course_id)
self.assert_learners_returned(response, [{
'username': 'user_1',
'enrollment_mode': 'honor',
'segments': ['a', 'b'],
'cohort': 'alpha',
"engagements": {
"problems_attempted": 43,
"problems_completed": 3,
"videos_viewed": 6,
"discussion_contributions": 0,
"problem_attempts_per_completed": 23.14,
},
'last_updated': '2015-09-28',
}])
@ddt.data(
('segments', ['highly_engaged'], 'segments', 'highly_engaged', True),
('segments', ['highly_engaged', 'struggling'], 'segments', 'highly_engaged', True),
('segments', ['highly_engaged', 'struggling'], 'segments', 'struggling', True),
('segments', ['highly_engaged', 'struggling'], 'segments', 'highly_engaged,struggling', True),
('segments', ['highly_engaged', 'struggling'], 'segments', '', True),
('segments', ['highly_engaged', 'struggling'], 'segments', 'disengaging', False),
('segments', ['highly_engaged'], 'ignore_segments', 'highly_engaged', False),
('segments', ['highly_engaged', 'struggling'], 'ignore_segments', 'highly_engaged', False),
('segments', ['highly_engaged', 'struggling'], 'ignore_segments', 'struggling', False),
('segments', ['highly_engaged', 'struggling'], 'ignore_segments', 'highly_engaged,struggling', False),
('segments', ['highly_engaged', 'struggling'], 'ignore_segments', '', True),
('segments', ['highly_engaged', 'struggling'], 'ignore_segments', 'disengaging', True),
('cohort', 'a', 'cohort', 'a', True),
('cohort', 'a', 'cohort', '', True),
('cohort', 'a', 'cohort', 'b', False),
('enrollment_mode', 'a', 'enrollment_mode', 'a', True),
('enrollment_mode', 'a', 'enrollment_mode', '', True),
('enrollment_mode', 'a', 'enrollment_mode', 'b', False),
('name', 'daniel', 'text_search', 'daniel', True),
('username', 'daniel', 'text_search', 'daniel', True),
('email', 'daniel@example.com', 'text_search', 'daniel@example.com', True),
('name', 'daniel', 'text_search', 'dan', False),
('email', 'daniel@example.com', 'text_search', 'alfred', False),
)
@ddt.unpack
def test_filters(
self,
attribute_name,
attribute_value,
filter_key,
filter_value,
expect_learner
):
"""
Tests filtering and searching logic. Sets up a single learner
with a given attribute value, then makes a GET request to the
API with the specified query parameter set to the specified
value. If `expect_learner` is True, we assert that the user was
returned, otherwise we assert that no users were returned.
"""
learner = {'username': 'user', 'course_id': self.course_id}
learner[attribute_name] = attribute_value
self.create_learners([learner])
learner.pop('course_id')
response = self._get(self.course_id, **{filter_key: filter_value})
expected_learners = [learner] if expect_learner else None
self.assert_learners_returned(response, expected_learners)
@ddt.data(
([{'username': 'a'}, {'username': 'b'}], None, None, [{'username': 'a'}, {'username': 'b'}]),
([{'username': 'a'}, {'username': 'b'}], None, 'desc', [{'username': 'b'}, {'username': 'a'}]),
([{'username': 'a'}, {'username': 'b'}], 'username', 'desc', [{'username': 'b'}, {'username': 'a'}]),
([{'username': 'a'}, {'username': 'b'}], 'email', 'asc', [{'username': 'a'}, {'username': 'b'}]),
([{'username': 'a'}, {'username': 'b'}], 'email', 'desc', [{'username': 'b'}, {'username': 'a'}]),
(
[{'username': 'a', 'discussion_contributions': 0}, {'username': 'b', 'discussion_contributions': 1}],
'discussion_contributions', 'asc', [{'username': 'a'}, {'username': 'b'}]
),
(
[{'username': 'a', 'discussion_contributions': 0}, {'username': 'b', 'discussion_contributions': 1}],
'discussion_contributions', 'desc', [{'username': 'b'}, {'username': 'a'}]
),
(
[{'username': 'a', 'problems_attempted': 0}, {'username': 'b', 'problems_attempted': 1}],
'problems_attempted', 'asc', [{'username': 'a'}, {'username': 'b'}]
),
(
[{'username': 'a', 'problems_attempted': 0}, {'username': 'b', 'problems_attempted': 1}],
'problems_attempted', 'desc', [{'username': 'b'}, {'username': 'a'}]
),
(
[{'username': 'a', 'problems_completed': 0}, {'username': 'b', 'problems_completed': 1}],
'problems_completed', 'asc', [{'username': 'a'}, {'username': 'b'}]
),
(
[{'username': 'a', 'problems_completed': 0}, {'username': 'b', 'problems_completed': 1}],
'problems_completed', 'desc', [{'username': 'b'}, {'username': 'a'}]
),
(
[{'username': 'a', 'videos_viewed': 0}, {'username': 'b', 'videos_viewed': 1}],
'videos_viewed', 'asc', [{'username': 'a'}, {'username': 'b'}]
),
(
[{'username': 'a', 'videos_viewed': 0}, {'username': 'b', 'videos_viewed': 1}],
'videos_viewed', 'desc', [{'username': 'b'}, {'username': 'a'}]
),
(
[{'username': 'a', 'problem_attempts_per_completed': 1.0, 'attempt_ratio_order': 1},
{'username': 'b', 'problem_attempts_per_completed': 2.0, 'attempt_ratio_order': 10},
{'username': 'c', 'problem_attempts_per_completed': 2.0, 'attempt_ratio_order': 2},
{'username': 'd', 'attempt_ratio_order': 0},
{'username': 'e', 'attempt_ratio_order': -10}],
'problem_attempts_per_completed', 'asc', [
{'username': 'a'}, {'username': 'b'}, {'username': 'c'}, {'username': 'd'}, {'username': 'e'}
]
),
(
[{'username': 'a', 'problem_attempts_per_completed': 1.0, 'attempt_ratio_order': 1},
{'username': 'b', 'problem_attempts_per_completed': 2.0, 'attempt_ratio_order': 10},
{'username': 'c', 'problem_attempts_per_completed': 2.0, 'attempt_ratio_order': 2},
{'username': 'd', 'attempt_ratio_order': 0},
{'username': 'e', 'attempt_ratio_order': -10}],
'problem_attempts_per_completed', 'desc', [
{'username': 'e'}, {'username': 'd'}, {'username': 'c'}, {'username': 'b'}, {'username': 'a'}]
),
)
@ddt.unpack
def test_sort(self, learners, order_by, sort_order, expected_users):
for learner in learners:
learner['course_id'] = self.course_id
self.create_learners(learners)
params = dict()
if order_by:
params['order_by'] = order_by
if sort_order:
params['sort_order'] = sort_order
response = self._get(self.course_id, **params)
self.assert_learners_returned(response, expected_users)
def test_pagination(self):
usernames = ['a', 'b', 'c', 'd', 'e']
expected_page_url_template = 'http://testserver/api/v0/learners/?' \
'{course_query}&page={page}&page_size={page_size}'
self.create_learners([{'username': username, 'course_id': self.course_id} for username in usernames])
response = self._get(self.course_id, page_size=2)
payload = json.loads(response.content)
self.assertDictContainsSubset(
{
'count': len(usernames),
'previous': None,
'next': expected_page_url_template.format(
course_query=urlencode({'course_id': self.course_id}), page=2, page_size=2
),
'num_pages': 3
},
payload
)
self.assert_learners_returned(response, [{'username': 'a'}, {'username': 'b'}])
response = self._get(self.course_id, page_size=2, page=3)
payload = json.loads(response.content)
self.assertDictContainsSubset(
{
'count': len(usernames),
'previous': expected_page_url_template.format(
course_query=urlencode({'course_id': self.course_id}), page=2, page_size=2
),
'next': None,
'num_pages': 3
},
payload
)
self.assert_learners_returned(response, [{'username': 'e'}])
# Error cases
@ddt.data(
({}, 'course_not_specified'),
({'course_id': ''}, 'course_not_specified'),
({'course_id': 'bad_course_id'}, 'course_key_malformed'),
({'course_id': 'edX/DemoX/Demo_Course', 'segments': 'a', 'ignore_segments': 'b'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'order_by': 'a_non_existent_field'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'sort_order': 'bad_value'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'page': -1}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'page': 0}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'page': 'bad_value'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'page_size': 'bad_value'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'page_size': 101}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'segments': 'a_non_existent_segment'}, 'illegal_parameter_values'),
({'course_id': 'edX/DemoX/Demo_Course', 'ignore_segments': 'a_non_existent_segment'},
'illegal_parameter_values'),
)
@ddt.unpack
def test_bad_request(self, parameters, expected_error_code):
response = self.authenticated_get('/api/v0/learners/', parameters)
self.assertEqual(response.status_code, 400)
self.assertEqual(json.loads(response.content)['error_code'], expected_error_code)
@ddt.ddt
class CourseLearnerMetadataTests(DemoCourseMixin, VerifyCourseIdMixin,
LearnerAPITestMixin, TestCaseWithAuthentication):
"""
Tests for the course learner metadata endpoint.
"""
def _get(self, course_id):
"""Helper to send a GET request to the API."""
return self.authenticated_get('/api/v0/course_learner_metadata/{}/'.format(course_id))
def get_expected_json(self, segments, enrollment_modes, cohorts):
expected_json = self._get_full_engagement_ranges()
expected_json['segments'] = segments
expected_json['enrollment_modes'] = enrollment_modes
expected_json['cohorts'] = cohorts
return expected_json
def assert_response_matches(self, response, expected_status_code, expected_data):
self.assertEqual(response.status_code, expected_status_code)
self.assertDictEqual(json.loads(response.content), expected_data)
def test_no_course_id(self):
response = self.authenticated_get('/api/v0/course_learner_metadata/')
self.assertEqual(response.status_code, 404)
@ddt.data(
{},
{'highly_engaged': 1},
{'disengaging': 1},
{'struggling': 1},
{'inactive': 1},
{'unenrolled': 1},
{'highly_engaged': 3, 'disengaging': 1},
{'disengaging': 10, 'inactive': 12},
{'highly_engaged': 1, 'disengaging': 2, 'struggling': 3, 'inactive': 4, 'unenrolled': 5},
)
def test_segments_unique_learners(self, segments):
"""
Tests segment counts when each learner belongs to at most one segment.
"""
learners = [
{'username': '{}_{}'.format(segment, i), 'course_id': self.course_id, 'segments': [segment]}
for segment, count in segments.items()
for i in xrange(count)
]
self.create_learners(learners)
expected_segments = {"highly_engaged": 0, "disengaging": 0, "struggling": 0, "inactive": 0, "unenrolled": 0}
expected_segments.update(segments)
expected = self.get_expected_json(
segments=expected_segments,
enrollment_modes={'honor': len(learners)} if learners else {},
cohorts={'Team edX': len(learners)} if learners else {},
)
self.assert_response_matches(self._get(self.course_id), 200, expected)
def test_segments_same_learner(self):
"""
Tests segment counts when each learner belongs to multiple segments.
"""
self.create_learners([
{'username': 'user_1', 'course_id': self.course_id, 'segments': ['struggling', 'disengaging']},
{'username': 'user_2', 'course_id': self.course_id, 'segments': ['disengaging']}
])
expected = self.get_expected_json(
segments={'disengaging': 2, 'struggling': 1, 'highly_engaged': 0, 'inactive': 0, 'unenrolled': 0},
enrollment_modes={'honor': 2},
cohorts={'Team edX': 2},
)
self.assert_response_matches(self._get(self.course_id), 200, expected)
@ddt.data(
[],
['honor'],
['verified'],
['audit'],
['nonexistent-enrollment-tracks-still-show-up'],
['honor', 'verified', 'audit'],
['honor', 'honor', 'verified', 'verified', 'audit', 'audit'],
)
def test_enrollment_modes(self, enrollment_modes):
self.create_learners([
{'username': 'user_{}'.format(i), 'course_id': self.course_id, 'enrollment_mode': enrollment_mode}
for i, enrollment_mode in enumerate(enrollment_modes)
])
expected_enrollment_modes = {}
for enrollment_mode, group in groupby(enrollment_modes):
# can't call 'len' directly on a group object
count = len([mode for mode in group])
expected_enrollment_modes[enrollment_mode] = count
expected = self.get_expected_json(
segments={'disengaging': 0, 'struggling': 0, 'highly_engaged': 0, 'inactive': 0, 'unenrolled': 0},
enrollment_modes=expected_enrollment_modes,
cohorts={'Team edX': len(enrollment_modes)} if enrollment_modes else {},
)
self.assert_response_matches(self._get(self.course_id), 200, expected)
@ddt.data(
[],
['Yellow'],
['Blue'],
['Red', 'Red', 'yellow team', 'yellow team', 'green'],
)
def test_cohorts(self, cohorts):
self.create_learners([
{'username': 'user_{}'.format(i), 'course_id': self.course_id, 'cohort': cohort}
for i, cohort in enumerate(cohorts)
])
expected_cohorts = {
cohort: len([mode for mode in group]) for cohort, group in groupby(cohorts)
}
expected = self.get_expected_json(
segments={'disengaging': 0, 'struggling': 0, 'highly_engaged': 0, 'inactive': 0, 'unenrolled': 0},
enrollment_modes={'honor': len(cohorts)} if cohorts else {},
cohorts=expected_cohorts,
)
self.assert_response_matches(self._get(self.course_id), 200, expected)
@property
def empty_engagement_ranges(self):
""" Returns the engagement ranges where all fields are set to None. """
empty_engagement_ranges = {
'engagement_ranges': {
'date_range': {
'start': None,
'end': None
}
}
}
empty_range = {
range_type: [None, None] for range_type in ['below_average', 'average', 'above_average']
}
for metric in self.engagement_metrics:
empty_engagement_ranges['engagement_ranges'][metric] = copy.deepcopy(empty_range)
return empty_engagement_ranges
@property
def engagement_metrics(self):
""" Convenience method for getting the metric types. """
metrics = []
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metrics.append('{0}_{1}'.format(entity_type, event))
return metrics
def test_no_engagement_ranges(self):
response = self._get(self.course_id)
self.assertEqual(response.status_code, 200)
self.assertDictContainsSubset(self.empty_engagement_ranges, json.loads(response.content))
def test_one_engagement_range(self):
metric_type = 'problems_completed'
start_date = datetime.datetime(2015, 7, 1, tzinfo=pytz.utc)
end_date = datetime.datetime(2015, 7, 21, tzinfo=pytz.utc)
G(ModuleEngagementMetricRanges, course_id=self.course_id, start_date=start_date, end_date=end_date,
metric=metric_type, range_type='high', low_value=90, high_value=6120)
expected_ranges = self.empty_engagement_ranges
expected_ranges['engagement_ranges'].update({
'date_range': {
'start': '2015-07-01',
'end': '2015-07-21'
},
metric_type: {
'below_average': [None, None],
'average': [None, 90.0],
'above_average': [90.0, 6120.0]
}
})
response = self._get(self.course_id)
self.assertEqual(response.status_code, 200)
self.assertDictContainsSubset(expected_ranges, json.loads(response.content))
def _get_full_engagement_ranges(self):
""" Populates a full set of engagement ranges and returns the expected engagement ranges. """
start_date = datetime.datetime(2015, 7, 1, tzinfo=pytz.utc)
end_date = datetime.datetime(2015, 7, 21, tzinfo=pytz.utc)
expected = {
'engagement_ranges': {
'date_range': {
'start': '2015-07-01',
'end': '2015-07-21'
}
}
}
max_value = 1000.0
for metric_type in self.engagement_metrics:
low_ceil = 100.5
G(ModuleEngagementMetricRanges, course_id=self.course_id, start_date=start_date, end_date=end_date,
metric=metric_type, range_type='low', low_value=0, high_value=low_ceil)
high_floor = 800.8
G(ModuleEngagementMetricRanges, course_id=self.course_id, start_date=start_date, end_date=end_date,
metric=metric_type, range_type='high', low_value=high_floor, high_value=max_value)
expected['engagement_ranges'][metric_type] = {
'below_average': [0.0, low_ceil],
'average': [low_ceil, high_floor],
'above_average': [high_floor, max_value]
}
return expected
def test_engagement_ranges_only(self):
expected = self._get_full_engagement_ranges()
response = self._get(self.course_id)
self.assertEqual(response.status_code, 200)
self.assertDictContainsSubset(expected, json.loads(response.content))
...@@ -2,11 +2,14 @@ from django.conf.urls import patterns, url, include ...@@ -2,11 +2,14 @@ from django.conf.urls import patterns, url, include
from django.core.urlresolvers import reverse_lazy from django.core.urlresolvers import reverse_lazy
from django.views.generic import RedirectView from django.views.generic import RedirectView
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
urlpatterns = patterns( urlpatterns = patterns(
'', '',
url(r'^courses/', include('analytics_data_api.v0.urls.courses', namespace='courses')), url(r'^courses/', include('analytics_data_api.v0.urls.courses', namespace='courses')),
url(r'^problems/', include('analytics_data_api.v0.urls.problems', namespace='problems')), url(r'^problems/', include('analytics_data_api.v0.urls.problems', namespace='problems')),
url(r'^videos/', include('analytics_data_api.v0.urls.videos', namespace='videos')), url(r'^videos/', include('analytics_data_api.v0.urls.videos', namespace='videos')),
url('^', include('analytics_data_api.v0.urls.learners', namespace='learners')),
# pylint: disable=no-value-for-parameter # pylint: disable=no-value-for-parameter
url(r'^authenticated/$', RedirectView.as_view(url=reverse_lazy('authenticated')), name='authenticated'), url(r'^authenticated/$', RedirectView.as_view(url=reverse_lazy('authenticated')), name='authenticated'),
......
from django.conf.urls import patterns, url from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import courses as views from analytics_data_api.v0.views import courses as views
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
COURSE_URLS = [ COURSE_URLS = [
('activity', views.CourseActivityWeeklyView, 'activity'), ('activity', views.CourseActivityWeeklyView, 'activity'),
('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'), ('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'),
......
from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import learners as views
USERNAME_PATTERN = r'(?P<username>[\w.+-]+)'
urlpatterns = patterns(
'',
url(r'^learners/$', views.LearnerListView.as_view(), name='learners'),
url(r'^learners/{}/$'.format(USERNAME_PATTERN), views.LearnerView.as_view(), name='learner'),
url(r'^engagement_timelines/{}/$'.format(USERNAME_PATTERN),
views.EngagementTimelineView.as_view(), name='engagement_timelines'),
url(r'^course_learner_metadata/{}/$'.format(COURSE_ID_PATTERN),
views.CourseLearnerMetadata.as_view(), name='course_learner_metadata'),
)
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from analytics_data_api.v0.exceptions import (CourseNotSpecifiedError, CourseKeyMalformedError)
class CourseViewMixin(object):
"""
Captures the course_id from the url and validates it.
"""
course_id = None
def get(self, request, *args, **kwargs):
self.course_id = self.kwargs.get('course_id', request.QUERY_PARAMS.get('course_id', None))
if not self.course_id:
raise CourseNotSpecifiedError()
try:
CourseKey.from_string(self.course_id)
except InvalidKeyError:
raise CourseKeyMalformedError(course_id=self.course_id)
return super(CourseViewMixin, self).get(request, *args, **kwargs)
"""
API methods for module level data.
"""
import logging
from rest_framework import generics, status
from analytics_data_api.constants import (
learner
)
from analytics_data_api.v0.exceptions import (
LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError,
ParameterValueError,
)
from analytics_data_api.v0.models import (
ModuleEngagement,
ModuleEngagementMetricRanges,
RosterEntry,
RosterUpdate,
)
from analytics_data_api.v0.serializers import (
CourseLearnerMetadataSerializer,
ElasticsearchDSLSearchSerializer,
EngagementDaySerializer,
LastUpdatedSerializer,
LearnerSerializer,
)
from analytics_data_api.v0.views import CourseViewMixin
from analytics_data_api.v0.views.utils import split_query_argument
logger = logging.getLogger(__name__)
class LastUpdateMixin(object):
@classmethod
def get_last_updated(cls):
""" Returns the serialized RosterUpdate last_updated field. """
roster_update = RosterUpdate.get_last_updated()
last_updated = {'date': None}
if len(roster_update) == 1:
last_updated = roster_update[0]
else:
logger.warn('RosterUpdate not found.')
return LastUpdatedSerializer(last_updated).data
class LearnerView(LastUpdateMixin, CourseViewMixin, generics.RetrieveAPIView):
"""
Get data for a particular learner in a particular course.
**Example Request**
GET /api/v0/learners/{username}/?course_id={course_id}
**Response Values**
Returns metadata and engagement data for the learner in JSON format.
* username: The username of the enrolled learner.
* enrollment_mode: The learner's selected learning track (for
example, "audit" or "verified").
* name: The learner's full name.
* email: The learner's email address.
* segments: Classification, based on engagement, of this learner's
work in this course (for example, "highly_engaged" or
"struggling").
* engagements: Summary of engagement events for a time span.
* videos_viewed: Number of times any course video was played.
* problems_completed: Number of unique problems the learner
answered correctly.
* problems_attempted: Number of unique problems attempted.
This is a count of the individual problems the learner
tried. Each problem in a course can increment this count by
a maximum of 1.
* discussion_contributions: Number of posts, responses, or
comments the learner contributed to course discussions.
**Parameters**
You can specify the course ID for which you want data.
course_id -- The course identifier for which user data is requested.
For example, edX/DemoX/Demo_Course.
"""
serializer_class = LearnerSerializer
username = None
lookup_field = 'username'
def get(self, request, *args, **kwargs):
self.username = self.kwargs.get('username')
return super(LearnerView, self).get(request, *args, **kwargs)
def retrieve(self, request, *args, **kwargs):
"""
Adds the last_updated field to the result.
"""
response = super(LearnerView, self).retrieve(request, args, kwargs)
response.data.update(self.get_last_updated())
return response
def get_queryset(self):
return RosterEntry.get_course_user(self.course_id, self.username)
def get_object(self, queryset=None):
queryset = self.get_queryset()
if len(queryset) == 1:
return queryset[0]
raise LearnerNotFoundError(username=self.username, course_id=self.course_id)
class LearnerListView(LastUpdateMixin, CourseViewMixin, generics.ListAPIView):
"""
Get a paginated list of data for all learners in a course.
**Example Request**
GET /api/v0/learners/?course_id={course_id}
**Response Values**
Returns a paginated list of learner metadata and engagement data.
Pagination data is returned in the top level of the returned JSON
object.
* count: The number of learners that match the query.
* page: The current one-indexed page number.
* next: A hyperlink to the next page if one exists, otherwise null.
* previous: A hyperlink to the previous page if one exists,
otherwise null.
The 'results' key in the returned object maps to an array of
learners that contains, at most, a full page's worth of learners. For
each learner there is an object that contains the following keys.
* username: The username of an enrolled learner.
* enrollment_mode: The learner's selected learning track (for
example, "audit" or "verified").
* name: The learner's full name.
* email: The learner's email address.
* segments: Classification, based on engagement, of each learner's
work in this course (for example, "highly_engaged" or
"struggling").
* engagements: Summary of engagement events for a time span.
* videos_viewed: Number of times any course video was played.
* problems_completed: Number of unique problems the learner
answered correctly.
* problems_attempted: Number of unique problems attempted.
This is a count of the individual problems the learner
tried. Each problem in a course can increment this count by
a maximum of 1.
* discussions_contributed: Number of posts, responses, or
comments the learner contributed to course discussions.
**Parameters**
You can filter the list of learners by course ID and by other
parameters, including enrollment mode and text search. You can also
control the page size and page number of the response, as well as sort
the learners in the response.
course_id -- The course identifier for which user data is requested.
For example, edX/DemoX/Demo_Course.
page -- The page of results that should be returned.
page_size -- The maximum number of results to return per page.
text_search -- An alphanumeric string that is used to search name,
username, and email address values to find learners.
segments -- A comma-separated list of segment names that is used
to select learners. Only learners who are categorized in at least
one of the segments are returned. Cannot be used in combination
with the `ignore_segments` argument.
ignore_segments -- A comma-separated list of segment names that is
used to exclude learners. Only learners who are NOT categorized
in any of the segments are returned. Cannot be used in combination
with the `segments` argument.
cohort -- The cohort to which all returned learners must
belong.
enrollment_mode -- The learning track to which all returned
learners must belong.
order_by -- The field for sorting the response. Defaults to 'username'.
sort_order -- The sort direction. One of 'asc' (ascending) or 'desc'
(descending). Defaults to 'asc'.
"""
serializer_class = LearnerSerializer
pagination_serializer_class = ElasticsearchDSLSearchSerializer
paginate_by_param = 'page_size'
paginate_by = learner.LEARNER_API_DEFAULT_LIST_PAGE_SIZE
max_paginate_by = 100 # TODO -- tweak during load testing
def _validate_query_params(self):
"""Validates various querystring parameters."""
query_params = self.request.QUERY_PARAMS
page = query_params.get('page')
if page:
try:
page = int(page)
except ValueError:
raise ParameterValueError('Page must be an integer')
finally:
if page < 1:
raise ParameterValueError(
'Page numbers are one-indexed, therefore the page value must be greater than 0'
)
page_size = query_params.get('page_size')
if page_size:
try:
page_size = int(page_size)
except ValueError:
raise ParameterValueError('Page size must be an integer')
finally:
if page_size > self.max_paginate_by or page_size < 1:
raise ParameterValueError('Page size must be in the range [1, {}]'.format(self.max_paginate_by))
def list(self, request, *args, **kwargs):
"""
Adds the last_updated field to the results.
"""
response = super(LearnerListView, self).list(request, args, kwargs)
last_updated = self.get_last_updated()
for result in response.data['results']:
result.update(last_updated)
return response
def get_queryset(self):
"""
Fetches the user list and last updated from elasticsearch returned returned
as a an array of dicts with fields "learner" and "last_updated".
"""
self._validate_query_params()
query_params = self.request.QUERY_PARAMS
order_by = query_params.get('order_by')
sort_order = query_params.get('sort_order')
sort_policies = [{
'order_by': order_by,
'sort_order': sort_order
}]
# Ordering by problem_attempts_per_completed can be ambiguous because
# values could be infinite (e.g. divide by zero) if no problems were completed.
# Instead, secondary sorting by attempt_ratio_order will produce a sensible ordering.
if order_by == 'problem_attempts_per_completed':
sort_policies.append({
'order_by': 'attempt_ratio_order',
'sort_order': 'asc' if sort_order == 'desc' else 'desc'
})
params = {
'segments': split_query_argument(query_params.get('segments')),
'ignore_segments': split_query_argument(query_params.get('ignore_segments')),
'cohort': query_params.get('cohort'),
'enrollment_mode': query_params.get('enrollment_mode'),
'text_search': query_params.get('text_search'),
'sort_policies': sort_policies,
}
# Remove None values from `params` so that we don't overwrite default
# parameter values in `get_users_in_course`.
params = {key: val for key, val in params.items() if val is not None}
try:
return RosterEntry.get_users_in_course(self.course_id, **params)
except ValueError as e:
raise ParameterValueError(e.message)
class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
"""
Get a particular learner's engagement timeline for a particular course.
Days without data are not returned.
**Example Request**
GET /api/v0/engagement_timeline/{username}/?course_id={course_id}
**Response Values**
Returns the engagement timeline in an array.
* days: An array of the learner's daily engagement timeline.
* problems_attempted: Number of unique problems attempted.
This is a count of the individual problems the learner
tried. Each problem in a course can increment this count by
a maximum of 1.
* problems_completed: Number of unique problems the learner
answered correctly.
* discussion_contributions: Number of times the learner
contributed to course discussions through posts, responses,
or comments.
* videos_viewed: Number of times any course video was played.
* problem_attempts_per_completed: Number of attempts per
correctly answered problem. If no problems were answered
correctly, null is returned.
**Parameters**
You can specify the course ID for which you want data.
course_id -- The course identifier for which user data is requested.
For example, edX/DemoX/Demo_Course.
"""
serializer_class = EngagementDaySerializer
username = None
lookup_field = 'username'
def list(self, request, *args, **kwargs):
response = super(EngagementTimelineView, self).list(request, *args, **kwargs)
if response.status_code == status.HTTP_200_OK:
response.data = {'days': response.data}
return response
def get(self, request, *args, **kwargs):
self.username = self.kwargs.get('username')
return super(EngagementTimelineView, self).get(request, *args, **kwargs)
def get_queryset(self):
queryset = ModuleEngagement.objects.get_timeline(self.course_id, self.username)
if len(queryset) == 0:
raise LearnerEngagementTimelineNotFoundError(username=self.username, course_id=self.course_id)
return queryset
class CourseLearnerMetadata(CourseViewMixin, generics.RetrieveAPIView):
"""
Get metadata about the learners in a course. Includes data on segments,
cohorts, and enrollment modes. Also includes an engagement rubric.
**Example Request**
GET /api/v0/course_learner_metadata/{course_id}/
**Response Values**
Returns an object with the following keys.
* cohorts: An object that maps the names of cohorts in the course
to the number of learners belonging to those cohorts.
* segments: An object that maps the names of segments in the course
to the number of learners belonging to those segments. The
current set of segments is "highly_engaged", "disengaging",
"struggling", "inactive", and "unenrolled".
* enrollment_modes: An object that maps the names of learning
tracks in the course to the number of learners belonging to those
tracks. Examples include "audit" and "verified".
* engagement_ranges: An object containing ranges of learner
engagement with the courseware. Each range has 'below_average',
'average', and 'above_average' keys. These keys map to
two-element arrays, in which the first element is the lower bound
(inclusive) and the second element is the upper bound
(exclusive). It has the following keys.
* date_range: The time period to which this data applies.
* problems_attempted: Engagement ranges for the number of
unique problems tried in the date range.
* problems_completed: Engagement ranges for the number of
unique problems answered correctly in the date range.
* problem_attempts_per_completed: Engagement ranges for the
number of problem attempts per completed problem in the date
range.
* discussion_contributions: Engagement ranges for the number of
times learners participated in discussions in the date range.
"""
serializer_class = CourseLearnerMetadataSerializer
def get_object(self, queryset=None):
# Because we're serializing data from both Elasticsearch and MySQL into
# the same JSON object, we have to pass both sources of data in a dict
# to our custom course metadata serializer.
return {
'es_data': RosterEntry.get_course_metadata(self.course_id),
'engagement_ranges': ModuleEngagementMetricRanges.objects.filter(course_id=self.course_id)
}
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
API methods for module level data. API methods for module level data.
""" """
from collections import defaultdict
from itertools import groupby from itertools import groupby
from django.db import OperationalError from django.db import OperationalError
...@@ -19,7 +20,7 @@ from analytics_data_api.v0.serializers import ( ...@@ -19,7 +20,7 @@ from analytics_data_api.v0.serializers import (
GradeDistributionSerializer, GradeDistributionSerializer,
SequentialOpenDistributionSerializer, SequentialOpenDistributionSerializer,
) )
from analytics_data_api.utils import consolidate_answers from analytics_data_api.utils import matching_tuple
class ProblemResponseAnswerDistributionView(generics.ListAPIView): class ProblemResponseAnswerDistributionView(generics.ListAPIView):
...@@ -55,6 +56,48 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -55,6 +56,48 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
serializer_class = ConsolidatedAnswerDistributionSerializer serializer_class = ConsolidatedAnswerDistributionSerializer
allow_empty = False allow_empty = False
@classmethod
def consolidate_answers(cls, problem):
""" Attempt to consolidate erroneously randomized answers. """
answer_sets = defaultdict(list)
match_tuple_sets = defaultdict(set)
for answer in problem:
answer.consolidated_variant = False
answer_sets[answer.value_id].append(answer)
match_tuple_sets[answer.value_id].add(matching_tuple(answer))
# If a part has more than one unique tuple of matching fields, do not consolidate.
for _, match_tuple_set in match_tuple_sets.iteritems():
if len(match_tuple_set) > 1:
return problem
consolidated_answers = []
for _, answers in answer_sets.iteritems():
consolidated_answer = None
if len(answers) == 1:
consolidated_answers.append(answers[0])
continue
for answer in answers:
if consolidated_answer:
if isinstance(consolidated_answer, ProblemResponseAnswerDistribution):
consolidated_answer.count += answer.count
else:
consolidated_answer.first_response_count += answer.first_response_count
consolidated_answer.last_response_count += answer.last_response_count
else:
consolidated_answer = answer
consolidated_answer.variant = None
consolidated_answer.consolidated_variant = True
consolidated_answers.append(consolidated_answer)
return consolidated_answers
def get_queryset(self): def get_queryset(self):
"""Select all the answer distribution response having to do with this usage of the problem.""" """Select all the answer distribution response having to do with this usage of the problem."""
problem_id = self.kwargs.get('problem_id') problem_id = self.kwargs.get('problem_id')
...@@ -69,7 +112,7 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView): ...@@ -69,7 +112,7 @@ class ProblemResponseAnswerDistributionView(generics.ListAPIView):
consolidated_rows = [] consolidated_rows = []
for _, part in groupby(queryset, lambda x: x.part_id): for _, part in groupby(queryset, lambda x: x.part_id):
consolidated_rows += consolidate_answers(list(part)) consolidated_rows += self.consolidate_answers(list(part))
return consolidated_rows return consolidated_rows
......
"""Utilities for view-level API logic."""
def split_query_argument(argument):
"""
Splits a comma-separated querystring argument into a list.
Returns None if the argument is empty.
"""
if argument:
return argument.split(',')
else:
return None
...@@ -54,9 +54,19 @@ DATABASES = { ...@@ -54,9 +54,19 @@ DATABASES = {
########## ELASTICSEARCH CONFIGURATION ########## ELASTICSEARCH CONFIGURATION
ELASTICSEARCH_LEARNERS_HOST = environ.get('ELASTICSEARCH_LEARNERS_HOST', None) ELASTICSEARCH_LEARNERS_HOST = environ.get('ELASTICSEARCH_LEARNERS_HOST', None)
ELASTICSEARCH_LEARNERS_INDEX = environ.get('ELASTICSEARCH_LEARNERS_INDEX', None) ELASTICSEARCH_LEARNERS_INDEX = environ.get('ELASTICSEARCH_LEARNERS_INDEX', None)
ELASTICSEARCH_LEARNERS_UPDATE_INDEX = environ.get('ELASTICSEARCH_LEARNERS_UPDATE_INDEX', None)
# access credentials for signing requests to AWS.
# For more information see http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
ELASTICSEARCH_AWS_ACCESS_KEY_ID = None
ELASTICSEARCH_AWS_SECRET_ACCESS_KEY = None
# override the default elasticsearch connection class and useful for signing certificates
# e.g. 'analytics_data_api.v0.connections.BotoHttpConnection'
ELASTICSEARCH_CONNECTION_CLASS = None
# only needed with BotoHttpConnection, e.g. 'us-east-1'
ELASTICSEARCH_CONNECTION_DEFAULT_REGION = None
########## END ELASTICSEARCH CONFIGURATION ########## END ELASTICSEARCH CONFIGURATION
########## GENERAL CONFIGURATION ########## GENERAL CONFIGURATION
# See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone # See: https://docs.djangoproject.com/en/dev/ref/settings/#time-zone
TIME_ZONE = 'UTC' TIME_ZONE = 'UTC'
...@@ -165,6 +175,11 @@ MIDDLEWARE_CLASSES = ( ...@@ -165,6 +175,11 @@ MIDDLEWARE_CLASSES = (
'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware',
'analytics_data_api.v0.middleware.LearnerEngagementTimelineNotFoundErrorMiddleware',
'analytics_data_api.v0.middleware.LearnerNotFoundErrorMiddleware',
'analytics_data_api.v0.middleware.CourseNotSpecifiedErrorMiddleware',
'analytics_data_api.v0.middleware.CourseKeyMalformedErrorMiddleware',
'analytics_data_api.v0.middleware.ParameterValueErrorMiddleware',
) )
########## END MIDDLEWARE CONFIGURATION ########## END MIDDLEWARE CONFIGURATION
...@@ -271,7 +286,11 @@ DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsApiRouter'] ...@@ -271,7 +286,11 @@ DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsApiRouter']
ENABLE_ADMIN_SITE = False ENABLE_ADMIN_SITE = False
# base url to generate link to user api
LMS_USER_ACCOUNT_BASE_URL = None
########## END ANALYTICS DATA API CONFIGURATION ########## END ANALYTICS DATA API CONFIGURATION
DATE_FORMAT = '%Y-%m-%d' DATE_FORMAT = '%Y-%m-%d'
DATETIME_FORMAT = '%Y-%m-%dT%H%M%S' DATETIME_FORMAT = '%Y-%m-%dT%H%M%S'
...@@ -19,10 +19,10 @@ DATABASES = { ...@@ -19,10 +19,10 @@ DATABASES = {
}, },
'analytics': { 'analytics': {
'ENGINE': 'django.db.backends.mysql', 'ENGINE': 'django.db.backends.mysql',
'NAME': 'analytics', 'NAME': 'reports_2_0',
'USER': 'root', 'USER': 'readonly001',
'PASSWORD': '', 'PASSWORD': 'meringues unfreehold sisterize morsing',
'HOST': '', 'HOST': 'stage-edx-analytics-report-rds.edx.org',
'PORT': '', 'PORT': '3306',
} }
} }
\ No newline at end of file
...@@ -18,4 +18,11 @@ INSTALLED_APPS += ( ...@@ -18,4 +18,11 @@ INSTALLED_APPS += (
'django_nose', 'django_nose',
) )
LMS_USER_ACCOUNT_BASE_URL = 'http://lms-host'
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
# Default elasticsearch port when running locally
ELASTICSEARCH_LEARNERS_HOST = 'http://localhost:9200/'
ELASTICSEARCH_LEARNERS_INDEX = 'roster_test'
ELASTICSEARCH_LEARNERS_UPDATE_INDEX = 'index_update_test'
boto==2.22.1 # MIT
Django==1.7.5 # BSD License Django==1.7.5 # BSD License
Markdown==2.6 # BSD
django-model-utils==2.2 # BSD django-model-utils==2.2 # BSD
djangorestframework==2.4.4 # BSD djangorestframework==2.4.4 # BSD
ipython==2.4.1 # BSD
django-rest-swagger==0.2.8 # BSD django-rest-swagger==0.2.8 # BSD
djangorestframework-csv==1.3.3 # BSD djangorestframework-csv==1.3.3 # BSD
django-countries==3.2 # MIT django-countries==3.2 # MIT
elasticsearch-dsl==0.0.9 # Apache 2.0
# markdown is used by swagger for rendering the api docs
Markdown==2.6 # BSD
-e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys -e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys
# Test dependencies go here. # Test dependencies go here.
-r base.txt -r base.txt
coverage==3.7.1 coverage==3.7.1
ddt==1.0.1
diff-cover >= 0.2.1 diff-cover >= 0.2.1
django-dynamic-fixture==1.8.1 django-dynamic-fixture==1.8.1
django-nose==1.4.1 django-nose==1.4.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment