Commit 24ce9709 by Daniel Friedman

Fix engagement timeline metrics

Now distinguishes between metrics which are aggregated by unique entity
id (such as problems or videos) and those which simply add up the total
number of interactions, regardless of which module they were acted upon.
parent f907d94e
......@@ -6,9 +6,3 @@ INDIVIDUAL_TYPES = [DISCUSSION, PROBLEM, VIDEO]
PROBLEMS = 'problems'
VIDEOS = 'videos'
AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS]
# useful for agregating ModuleEngagement to ModuleEngagementTimeline
SINGULAR_TO_PLURAL = {
PROBLEM: PROBLEMS,
VIDEO: VIDEOS,
}
......@@ -2,12 +2,12 @@ from analytics_data_api.constants import engagement_entity_types
ATTEMPTED = 'attempted'
COMPLETED = 'completed'
CONTRIBUTIONS = 'contributions'
CONTRIBUTED = 'contributed'
VIEWED = 'viewed'
# map entity types to events
EVENTS = {
engagement_entity_types.DISCUSSION: [CONTRIBUTIONS],
engagement_entity_types.DISCUSSION: [CONTRIBUTED],
engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED],
engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED],
engagement_entity_types.VIDEO: [VIEWED],
......
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
class EngagementType(object):
"""
Encapsulates:
- The API consumer-facing display name for engagement types
- The internal question of whether the metric should be counted in terms
of the entity type or the raw number of events.
"""
def __init__(self, entity_type, event_type):
"""
Initializes an EngagementType for a particular entity and event type.
Arguments:
entity_type (str): the type of module interacted with
event_type (str): the type of interaction on that entity
"""
if entity_type == PROBLEM:
if event_type == ATTEMPTED:
self.name = 'problems_attempted'
self.is_counted_by_entity = True
if event_type == COMPLETED:
self.name = 'problems_completed'
self.is_counted_by_entity = True
elif entity_type == VIDEO:
if event_type == VIEWED:
self.name = 'videos_viewed'
self.is_counted_by_entity = True
elif entity_type == DISCUSSION:
if event_type == CONTRIBUTED:
# Note that the discussion contribution metric counts
# total discussion contributions, not number of
# discussions contributed to.
self.name = 'discussion_contributions'
self.is_counted_by_entity = False
else:
raise ValueError(
'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format(
entity_type=entity_type,
event_type=event_type,
)
)
......@@ -2,11 +2,12 @@ from itertools import groupby
from django.conf import settings
from django.db import models
from django.db.models import Sum
from django.db.models import Count, Sum
# some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String # pylint: disable=no-name-in-module
from analytics_data_api.constants import country, engagement_entity_types, genders, learner
from analytics_data_api.constants import country, genders, learner
from analytics_data_api.constants.engagement_types import EngagementType
class CourseActivityWeekly(models.Model):
......@@ -394,24 +395,28 @@ class ModuleEngagementTimelineManager(models.Manager):
def get_timelines(self, course_id, username):
queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \
.values('date', 'entity_type', 'event') \
.annotate(count=Sum('count')) \
.annotate(total_count=Sum('count')) \
.annotate(distinct_entity_count=Count('entity_id')) \
.order_by('date')
timelines = []
for key, group in groupby(queryset, lambda x: (x['date'])):
# Iterate over groups and create a single item with engagement data
item = {
u'date': key,
for date, engagements in groupby(queryset, lambda x: (x['date'])):
# Iterate over engagements for this day and create a single day with
# engagement data.
day = {
u'date': date,
}
for engagement in group:
entity_type = engagement_entity_types.SINGULAR_TO_PLURAL.get(engagement['entity_type'],
engagement['entity_type'])
engagement_type = '{}_{}'.format(entity_type, engagement['event'])
count = item.get(engagement_type, 0)
count += engagement['count']
item[engagement_type] = count
timelines.append(item)
for engagement in engagements:
engagement_type = EngagementType(engagement['entity_type'], engagement['event'])
if engagement_type.is_counted_by_entity:
count_delta = engagement['distinct_entity_count']
else:
count_delta = engagement['total_count']
day[engagement_type.name] = day.get(engagement_type.name, 0) + count_delta
timelines.append(day)
return timelines
......@@ -422,7 +427,7 @@ class ModuleEngagement(models.Model):
course_id = models.CharField(db_index=True, max_length=255)
username = models.CharField(max_length=255)
date = models.DateTimeField()
# This will be one of "problem", "video" or "forum"
# This will be one of "problem", "video" or "discussion"
entity_type = models.CharField(max_length=255)
# For problems this will be the usage key, for videos it will be the html encoded module ID,
# for forums it will be the commentable_id
......
import datetime
import json
import ddt
from django.utils.http import urlquote
from django_dynamic_fixture import G
import pytz
from rest_framework import status
from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants import engagement_entity_types, engagement_events
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
from analytics_data_api.v0 import models
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin
@ddt.ddt
class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
DEFAULT_USERNAME = 'ed_xavier'
path_template = '/api/v0/engagement_timelines/{}/?course_id={}'
def _create_engagement(self):
""" Create module engagement data for testing. """
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=100)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.COMPLETED, count=12)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.DISCUSSION,
entity_id='some-type-of-id', event=engagement_events.CONTRIBUTIONS, count=10)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO,
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=44)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=8)
def test_timeline(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
self._create_engagement()
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
def create_engagement(self, entity_type, event_type, entity_id, count, date=None):
"""Create a ModuleEngagement model"""
if date is None:
date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
G(
models.ModuleEngagement,
course_id=self.course_id,
username=self.DEFAULT_USERNAME,
date=date,
entity_type=entity_type,
entity_id=entity_id,
event=event_type,
count=count,
)
expected = {
@ddt.data(
(PROBLEM, ATTEMPTED, 'problems_attempted', True),
(PROBLEM, COMPLETED, 'problems_completed', True),
(VIDEO, VIEWED, 'videos_viewed', True),
(DISCUSSION, CONTRIBUTED, 'discussion_contributions', False),
)
@ddt.unpack
def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation):
"""
Verify that some metrics are counted by unique ID, while some are
counted by total interactions.
"""
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
expected_data = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 0,
'problems_attempted': 100,
'problems_completed': 12,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 8,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 44
},
'videos_viewed': 0,
}
]
}
self.assertEquals(response.data, expected)
if expect_id_aggregation:
expected_data['days'][0][metric_display_name] = 2
else:
expected_data['days'][0][metric_display_name] = 10
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
self.assertEquals(
response.data,
expected_data
)
def test_one(self):
def test_timeline(self):
"""
Smoke test the learner engagement timeline.
"""
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923)
day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one)
self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two)
self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 6923,
'problems_completed': 0,
'date': '2015-01-01',
'discussion_contributions': 6,
'problems_attempted': 1,
'problems_completed': 1,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 2,
'problems_completed': 0,
'videos_viewed': 1
},
]
}
self.assertEquals(response.data, expected)
def test_day_gap(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 26, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO,
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=1)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923)
first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc)
last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc)
self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day)
self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
......@@ -102,7 +128,7 @@ class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWith
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 6923,
'problems_attempted': 1,
'problems_completed': 0,
'videos_viewed': 0
},
......
......@@ -286,8 +286,9 @@ class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
a maximum of 1.
* problems_completed: Number of unique problems the learner
answered correctly.
* discussions_contributed: Number of posts, responses, or
comments the learner contributed to course discussions.
* discussion_contributions: Number of times the learner
contributed to course discussions through posts, responses,
or comments.
* videos_viewed: Number of times any course video was played.
* problem_attempts_per_completed: Number of attempts per
correctly answered problem. If no problems were answered
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment