Commit 9fb50c73 by Dennis Jen Committed by Daniel Friedman

Implement learner course metadata endpoint

AN-6318

Authors:
- Dennis Jen
- Daniel Friedman
parent e7df0f3c
LEARNER_API_DEFAULT_LIST_PAGE_SIZE = 25
SEGMENTS = ["highly_engaged", "disengaging", "struggling", "inactive", "unenrolled"]
......@@ -182,7 +182,7 @@ class Command(BaseCommand):
users_at_start=users_at_start,
users_at_end=random.randint(100, users_at_start))
def generate_learner_engagement_data(self, course_id, username, start_date, end_date):
def generate_learner_engagement_data(self, course_id, username, start_date, end_date, max_value=100):
logger.info("Deleting learner engagement module data...")
models.ModuleEngagement.objects.all().delete()
......@@ -192,7 +192,7 @@ class Command(BaseCommand):
current = current + datetime.timedelta(days=1)
for entity_type in engagement_entity_types.INDIVIDUAL_TYPES:
for event in engagement_events.EVENTS[entity_type]:
count = random.randint(0, 100)
count = random.randint(0, max_value)
if count:
entity_id = 'an-id-{}-{}'.format(entity_type, event)
models.ModuleEngagement.objects.create(
......@@ -200,6 +200,25 @@ class Command(BaseCommand):
entity_type=entity_type, entity_id=entity_id, event=event, count=count)
logger.info("Done!")
def generate_learner_engagement_range_data(self, course_id, start_date, end_date, max_value=100):
logger.info("Deleting engagement range data...")
models.ModuleEngagementMetricRanges.objects.all().delete()
logger.info("Generating engagement range data...")
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_ceil = random.random() * max_value * 0.5
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='low', low_value=0, high_value=low_ceil)
high_floor = random.random() * max_value * 0.5 + low_ceil
models.ModuleEngagementMetricRanges.objects.create(
course_id=course_id, start_date=start_date, end_date=end_date, metric=metric,
range_type='high', low_value=high_floor, high_value=max_value)
def handle(self, *args, **options):
course_id = 'edX/DemoX/Demo_Course'
video_id = '0fac49ba'
......@@ -218,3 +237,4 @@ class Command(BaseCommand):
self.generate_video_data(course_id, video_id, video_module_id)
self.generate_video_timeline_data(video_id)
self.generate_learner_engagement_data(course_id, 'ed_xavier', start_date, end_date)
self.generate_learner_engagement_range_data(course_id, start_date, end_date)
......@@ -5,7 +5,7 @@ from django.db import models
from django.db.models import Sum
from elasticsearch_dsl import DocType, Q
from analytics_data_api.constants import country, engagement_entity_types, genders
from analytics_data_api.constants import country, engagement_entity_types, genders, learner
class CourseActivityWeekly(models.Model):
......@@ -242,8 +242,26 @@ class RosterEntry(DocType):
the Search object. Raises `ValueError` if both `segments` and
`ignore_segments` are provided.
"""
# Error handling
if segments and ignore_segments:
raise ValueError('Cannot combine `segments` and `ignore_segments` parameters.')
for segment in (segments or list()) + (ignore_segments or list()):
if segment not in learner.SEGMENTS:
raise ValueError("segments/ignore_segments value '{segment}' must be one of: ({segments})".format(
segment=segment, segments=', '.join(learner.SEGMENTS)
))
order_by_options = (
'username', 'email', 'discussions_contributed', 'problems_attempted', 'problems_completed', 'videos_viewed'
)
sort_order_options = ('asc', 'desc')
if order_by not in order_by_options:
raise ValueError("order_by value '{order_by}' must be one of: ({order_by_options})".format(
order_by=order_by, order_by_options=', '.join(order_by_options)
))
if sort_order not in sort_order_options:
raise ValueError("sort_order value '{sort_order}' must be one of: ({sort_order_options})".format(
sort_order=sort_order, sort_order_options=', '.join(sort_order_options)
))
search = cls.search()
search.query = Q('bool', must=[Q('term', course_id=course_id)])
......@@ -263,19 +281,51 @@ class RosterEntry(DocType):
search.query.must.append(Q('multi_match', query=text_search, fields=['name', 'username', 'email']))
# Sorting
order_by_options = (
'username', 'email', 'discussions_contributed', 'problems_attempted', 'problems_completed', 'videos_viewed'
)
sort_order_options = ('asc', 'desc')
if order_by not in order_by_options:
raise ValueError('order_by value must be one of: {}'.format(', '.join(order_by_options)))
if sort_order not in sort_order_options:
raise ValueError('sort_order value must be one of: {}'.format(', '.join(sort_order_options)))
sort_term = order_by if sort_order == 'asc' else '-{}'.format(order_by)
search = search.sort(sort_term)
return search
@classmethod
def get_course_metadata(cls, course_id):
"""
Returns the number of students belonging to particular cohorts,
segments, and enrollment modes within a course. Returns data in the
following format:
{
'cohorts': {
<cohort_name>: <learner_count>
},
'segments': {
<segment_name>: <learner_count>
},
'enrollment_modes': {
<enrollment_mode_name>: <learner_count>
}
}
"""
search = cls.search()
search.query = Q('bool', must=[Q('term', course_id=course_id)])
search.aggs.bucket('enrollment_modes', 'terms', field='enrollment_mode')
search.aggs.bucket('segments', 'terms', field='segments')
# TODO: enable during https://openedx.atlassian.net/browse/AN-6319
# search.aggs.bucket('group_by_cohorts', 'terms', field='cohort')
response = search.execute()
# Build up the map of aggregation name to count
aggregations = {
aggregation_name: {
bucket.key: bucket.doc_count
for bucket in response.aggregations[aggregation_name].buckets
}
for aggregation_name in response.aggregations
}
# Add default values of 0 for segments with no learners
for segment in learner.SEGMENTS:
if segment not in aggregations['segments']:
aggregations['segments'][segment] = 0
return aggregations
class ModuleEngagementTimelineManager(models.Manager):
"""
......@@ -326,3 +376,25 @@ class ModuleEngagement(models.Model):
class Meta(object):
db_table = 'module_engagement'
class ModuleEngagementMetricRanges(models.Model):
"""
Represents the low and high values for a module engagement entity and event pair,
known as the metric. The range_type will either be high or low, bounded by
low_value and high_value.
"""
course_id = models.CharField(db_index=True, max_length=255)
start_date = models.DateTimeField()
# This is a left-closed interval. No data from the end_date is included in the analysis.
end_date = models.DateTimeField()
metric = models.CharField(max_length=50)
range_type = models.CharField(max_length=50)
# Also a left-closed interval, so any metric whose value is equal to the high_value
# is not included in this range.
high_value = models.FloatField()
low_value = models.FloatField()
class Meta(object):
db_table = 'module_engagement_metric_ranges'
......@@ -388,3 +388,73 @@ class EngagementDaySerializer(DefaultIfNoneMixin, serializers.Serializer):
def transform_videos_viewed(self, _obj, value):
return self.default_if_none(value, 0)
class DateRangeSerializer(serializers.Serializer):
start = serializers.DateTimeField(source='start_date', format=settings.DATE_FORMAT)
end = serializers.DateTimeField(source='end_date', format=settings.DATE_FORMAT)
class EnagementRangeMetricSerializer(serializers.Serializer):
"""
Serializes ModuleEngagementMetricRanges (low_range and high_range) into
the below_average, average, above_average ranges represented as arrays.
"""
below_average = serializers.SerializerMethodField('get_below_average_range')
average = serializers.SerializerMethodField('get_average_range')
above_average = serializers.SerializerMethodField('get_above_average_range')
def get_average_range(self, obj):
metric_range = [
obj['low_range'].high_value if obj['low_range'] else None,
obj['high_range'].low_value if obj['high_range'] else None,
]
return metric_range
def get_below_average_range(self, obj):
return self._get_range(obj['low_range'])
def get_above_average_range(self, obj):
return self._get_range(obj['high_range'])
def _get_range(self, metric_range):
return [metric_range.low_value, metric_range.high_value] if metric_range else [None, None]
class CourseLearnerMetadataSerializer(serializers.Serializer):
enrollment_modes = serializers.SerializerMethodField('get_enrollment_modes')
segments = serializers.SerializerMethodField('get_segments')
# TODO: enable during https://openedx.atlassian.net/browse/AN-6319
# cohorts = serializers.SerializerMethodField('get_cohorts')
engagement_ranges = serializers.SerializerMethodField('get_engagement_ranges')
def get_enrollment_modes(self, obj):
return obj['es_data']['enrollment_modes']
def get_segments(self, obj):
return obj['es_data']['segments']
# TODO: enable during https://openedx.atlassian.net/browse/AN-6319
# def get_cohorts(self, obj):
# return obj['es_data']['cohorts']
def get_engagement_ranges(self, obj):
query_set = obj['engagement_ranges']
engagement_ranges = {
'date_range': DateRangeSerializer(query_set[0] if len(query_set) else None).data
}
# go through each entity and event type combination and fill in the ranges
for entity_type in engagement_entity_types.AGGREGATE_TYPES:
for event in engagement_events.EVENTS[entity_type]:
metric = '{0}_{1}'.format(entity_type, event)
low_range_queryset = query_set.filter(metric=metric, range_type='low')
high_range_queryset = query_set.filter(metric=metric, range_type='high')
engagement_ranges.update({
metric: EnagementRangeMetricSerializer({
'low_range': low_range_queryset[0] if len(low_range_queryset) else None,
'high_range': high_range_queryset[0] if len(high_range_queryset) else None,
}).data
})
return engagement_ranges
......@@ -2,16 +2,14 @@ from django.conf.urls import patterns, url, include
from django.core.urlresolvers import reverse_lazy
from django.views.generic import RedirectView
USERNAME_PATTERN = r'(?P<username>.+)'
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
urlpatterns = patterns(
'',
url(r'^courses/', include('analytics_data_api.v0.urls.courses', namespace='courses')),
url(r'^problems/', include('analytics_data_api.v0.urls.problems', namespace='problems')),
url(r'^videos/', include('analytics_data_api.v0.urls.videos', namespace='videos')),
url('^learners/', include('analytics_data_api.v0.urls.learners', namespace='learners')),
url(r'^engagement_timelines/', include('analytics_data_api.v0.urls.engagement_timelines',
namespace='engagement_timelines')),
url('^', include('analytics_data_api.v0.urls.learners', namespace='learners')),
# pylint: disable=no-value-for-parameter
url(r'^authenticated/$', RedirectView.as_view(url=reverse_lazy('authenticated')), name='authenticated'),
......
from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import courses as views
COURSE_ID_PATTERN = r'(?P<course_id>[^/+]+[/+][^/+]+[/+][^/]+)'
COURSE_URLS = [
('activity', views.CourseActivityWeeklyView, 'activity'),
('recent_activity', views.CourseActivityMostRecentWeekView, 'recent_activity'),
......
from django.conf.urls import patterns, url
from analytics_data_api.v0.views import engagement_timelines as views
from analytics_data_api.v0.urls import USERNAME_PATTERN
urlpatterns = patterns(
'',
url(r'^{}/$'.format(USERNAME_PATTERN), views.EngagementTimelineView.as_view(), name='engagement_timelines'),
)
from django.conf.urls import patterns, url
from analytics_data_api.v0.urls import COURSE_ID_PATTERN
from analytics_data_api.v0.views import learners as views
from analytics_data_api.v0.urls import USERNAME_PATTERN
USERNAME_PATTERN = r'(?P<username>[\w.+-]+)'
urlpatterns = patterns(
'',
url(r'^$', views.LearnerListView.as_view(), name='learners'),
url(r'^{}/$'.format(USERNAME_PATTERN), views.LearnerView.as_view(), name='learner'),
url(r'^learners/$', views.LearnerListView.as_view(), name='learners'),
url(r'^learners/{}/$'.format(USERNAME_PATTERN), views.LearnerView.as_view(), name='learner'),
url(r'^engagement_timelines/{}/$'.format(USERNAME_PATTERN),
views.EngagementTimelineView.as_view(), name='engagement_timelines'),
url(r'^course_learner_metadata/{}/$'.format(COURSE_ID_PATTERN),
views.CourseLearnerMetadata.as_view(), name='course_learner_metadata'),
)
......@@ -6,13 +6,14 @@ from analytics_data_api.v0.exceptions import (CourseNotSpecifiedError, CourseKey
class CourseViewMixin(object):
"""
Captures the course_id query arg and validates it.
Captures the course_id from the url and validates it.
"""
course_id = None
def get(self, request, *args, **kwargs):
self.course_id = request.QUERY_PARAMS.get('course_id', None)
self.course_id = self.kwargs.get('course_id', request.QUERY_PARAMS.get('course_id', None))
if not self.course_id:
raise CourseNotSpecifiedError()
try:
......
"""
API methods for module level data.
"""
from rest_framework import generics, status
from analytics_data_api.v0.exceptions import LearnerEngagementTimelineNotFoundError
from analytics_data_api.v0.models import ModuleEngagement
from analytics_data_api.v0.serializers import EngagementDaySerializer
from analytics_data_api.v0.views import CourseViewMixin
class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
"""
Get a particular learner's engagement timeline for a particular course. Days
without data will not be returned.
**Example Request**
GET /api/v0/engagement_timeline/{username}/?course_id={course_id}
**Response Values**
Returns the engagement timeline.
* days: Array of the learner's daily engagement timeline.
* problems_attempted: Unique number of unique problems attempted.
* problems_completed: Unique number of problems completed.
* discussions_contributed: Number of discussions participated in (e.g. forum posts)
* videos_viewed: Number of videos watched.
**Parameters**
You can specify course ID for which you want data.
course_id -- The course within which user data is requested.
"""
serializer_class = EngagementDaySerializer
username = None
lookup_field = 'username'
def list(self, request, *args, **kwargs):
response = super(EngagementTimelineView, self).list(request, *args, **kwargs)
if response.status_code == status.HTTP_200_OK:
response.data = {'days': response.data}
return response
def get(self, request, *args, **kwargs):
self.username = self.kwargs.get('username')
return super(EngagementTimelineView, self).get(request, *args, **kwargs)
def get_queryset(self):
queryset = ModuleEngagement.objects.get_timelines(self.course_id, self.username)
if len(queryset) == 0:
raise LearnerEngagementTimelineNotFoundError(username=self.username, course_id=self.course_id)
return queryset
"""
API methods for module level data.
"""
from rest_framework import generics
from rest_framework import generics, status
from analytics_data_api.constants import learner
from analytics_data_api.constants import (
learner
)
from analytics_data_api.v0.exceptions import (
LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError,
ParameterValueError,
)
from analytics_data_api.v0.models import RosterEntry
from analytics_data_api.v0.serializers import ElasticsearchDSLSearchSerializer, LearnerSerializer
from analytics_data_api.v0.models import (
ModuleEngagement,
ModuleEngagementMetricRanges,
RosterEntry
)
from analytics_data_api.v0.serializers import (
CourseLearnerMetadataSerializer,
ElasticsearchDSLSearchSerializer,
EngagementDaySerializer,
LearnerSerializer,
)
from analytics_data_api.v0.views import CourseViewMixin
from analytics_data_api.v0.views.utils import split_query_argument
......@@ -179,3 +191,102 @@ class LearnerListView(CourseViewMixin, generics.ListAPIView):
return RosterEntry.get_users_in_course(self.course_id, **params)
except ValueError as e:
raise ParameterValueError(e.message)
class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
"""
Get a particular learner's engagement timeline for a particular course. Days
without data will not be returned.
**Example Request**
GET /api/v0/engagement_timeline/{username}/?course_id={course_id}
**Response Values**
Returns the engagement timeline.
* days: Array of the learner's daily engagement timeline.
* problems_attempted: Unique number of unique problems attempted.
* problems_completed: Unique number of problems completed.
* discussions_contributed: Number of discussions participated in (e.g. forum posts)
* videos_viewed: Number of videos watched.
**Parameters**
You can specify course ID for which you want data.
course_id -- The course within which user data is requested.
"""
serializer_class = EngagementDaySerializer
username = None
lookup_field = 'username'
def list(self, request, *args, **kwargs):
response = super(EngagementTimelineView, self).list(request, *args, **kwargs)
if response.status_code == status.HTTP_200_OK:
response.data = {'days': response.data}
return response
def get(self, request, *args, **kwargs):
self.username = self.kwargs.get('username')
return super(EngagementTimelineView, self).get(request, *args, **kwargs)
def get_queryset(self):
queryset = ModuleEngagement.objects.get_timelines(self.course_id, self.username)
if len(queryset) == 0:
raise LearnerEngagementTimelineNotFoundError(username=self.username, course_id=self.course_id)
return queryset
class CourseLearnerMetadata(CourseViewMixin, generics.RetrieveAPIView):
"""
Get metadata on learners within a course. Includes data on segments,
cohorts, enrollment modes, and an engagement rubric.
**Example Request**
GET /api/v0/course_learner_metadata/{course_id}/
**Response Values**
Returns a JSON object with the following keys:
* cohorts: An object mapping the names of cohorts in the course to
the number of students belonging to those cohorts.
* segments: An object mapping the names of segments in the course
to the number of students belonging to those segments. The
current set of segments are: "highly_engaged", "disengaging",
"struggling", "inactive", and "unenrolled".
* enrollment_modes: An object mapping the names of enrollment modes
in the course to the number of students belonging to those
enrollment modes. Examples include "honor" and "verified".
* engagement_ranges: An object containing ranges of learner
engagement with the courseware. Each range has 'below_average',
'average', and 'above_average' keys which map to two-element
arrays of which the first element is the lower bound (inclusive)
and the second element is the upper bound (exclusive). It has
the following keys:
* date_range: The time duration for which this data applies
* problems_attempted: engagement ranges for the number of
problems attempted in the date range.
* problems_completed: engagement ranges for the number of
problems completed in the date range.
* problem_attempts_per_completed: engagement ranges for the
number of problem attempts per completed problem in the date
range.
* discussions_contributed: engagement ranges for the number of
discussions contributed in the date range.
"""
serializer_class = CourseLearnerMetadataSerializer
def get_object(self, queryset=None):
# Because we're serializing data from both Elasticsearch and MySQL into
# the same JSON object, we have to pass both sources of data in a dict
# to our custom course metadata serializer.
return {
'es_data': RosterEntry.get_course_metadata(self.course_id),
'engagement_ranges': ModuleEngagementMetricRanges.objects.filter(course_id=self.course_id)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment