Commit 24ce9709 by Daniel Friedman

Fix engagement timeline metrics

Now distinguishes between metrics which are aggregated by unique entity
id (such as problems or videos) and those which simply add up the total
number of interactions, regardless of which module they were acted upon.
parent f907d94e
...@@ -6,9 +6,3 @@ INDIVIDUAL_TYPES = [DISCUSSION, PROBLEM, VIDEO] ...@@ -6,9 +6,3 @@ INDIVIDUAL_TYPES = [DISCUSSION, PROBLEM, VIDEO]
PROBLEMS = 'problems' PROBLEMS = 'problems'
VIDEOS = 'videos' VIDEOS = 'videos'
AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS] AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS]
# useful for agregating ModuleEngagement to ModuleEngagementTimeline
SINGULAR_TO_PLURAL = {
PROBLEM: PROBLEMS,
VIDEO: VIDEOS,
}
...@@ -2,12 +2,12 @@ from analytics_data_api.constants import engagement_entity_types ...@@ -2,12 +2,12 @@ from analytics_data_api.constants import engagement_entity_types
ATTEMPTED = 'attempted' ATTEMPTED = 'attempted'
COMPLETED = 'completed' COMPLETED = 'completed'
CONTRIBUTIONS = 'contributions' CONTRIBUTED = 'contributed'
VIEWED = 'viewed' VIEWED = 'viewed'
# map entity types to events # map entity types to events
EVENTS = { EVENTS = {
engagement_entity_types.DISCUSSION: [CONTRIBUTIONS], engagement_entity_types.DISCUSSION: [CONTRIBUTED],
engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED], engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED],
engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED], engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED],
engagement_entity_types.VIDEO: [VIEWED], engagement_entity_types.VIDEO: [VIEWED],
......
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
class EngagementType(object):
"""
Encapsulates:
- The API consumer-facing display name for engagement types
- The internal question of whether the metric should be counted in terms
of the entity type or the raw number of events.
"""
def __init__(self, entity_type, event_type):
"""
Initializes an EngagementType for a particular entity and event type.
Arguments:
entity_type (str): the type of module interacted with
event_type (str): the type of interaction on that entity
"""
if entity_type == PROBLEM:
if event_type == ATTEMPTED:
self.name = 'problems_attempted'
self.is_counted_by_entity = True
if event_type == COMPLETED:
self.name = 'problems_completed'
self.is_counted_by_entity = True
elif entity_type == VIDEO:
if event_type == VIEWED:
self.name = 'videos_viewed'
self.is_counted_by_entity = True
elif entity_type == DISCUSSION:
if event_type == CONTRIBUTED:
# Note that the discussion contribution metric counts
# total discussion contributions, not number of
# discussions contributed to.
self.name = 'discussion_contributions'
self.is_counted_by_entity = False
else:
raise ValueError(
'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format(
entity_type=entity_type,
event_type=event_type,
)
)
...@@ -2,11 +2,12 @@ from itertools import groupby ...@@ -2,11 +2,12 @@ from itertools import groupby
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
from django.db.models import Sum from django.db.models import Count, Sum
# some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable # some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String # pylint: disable=no-name-in-module
from analytics_data_api.constants import country, engagement_entity_types, genders, learner from analytics_data_api.constants import country, genders, learner
from analytics_data_api.constants.engagement_types import EngagementType
class CourseActivityWeekly(models.Model): class CourseActivityWeekly(models.Model):
...@@ -394,24 +395,28 @@ class ModuleEngagementTimelineManager(models.Manager): ...@@ -394,24 +395,28 @@ class ModuleEngagementTimelineManager(models.Manager):
def get_timelines(self, course_id, username): def get_timelines(self, course_id, username):
queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \ queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \
.values('date', 'entity_type', 'event') \ .values('date', 'entity_type', 'event') \
.annotate(count=Sum('count')) \ .annotate(total_count=Sum('count')) \
.annotate(distinct_entity_count=Count('entity_id')) \
.order_by('date') .order_by('date')
timelines = [] timelines = []
for key, group in groupby(queryset, lambda x: (x['date'])): for date, engagements in groupby(queryset, lambda x: (x['date'])):
# Iterate over groups and create a single item with engagement data # Iterate over engagements for this day and create a single day with
item = { # engagement data.
u'date': key, day = {
u'date': date,
} }
for engagement in group: for engagement in engagements:
entity_type = engagement_entity_types.SINGULAR_TO_PLURAL.get(engagement['entity_type'], engagement_type = EngagementType(engagement['entity_type'], engagement['event'])
engagement['entity_type'])
engagement_type = '{}_{}'.format(entity_type, engagement['event']) if engagement_type.is_counted_by_entity:
count = item.get(engagement_type, 0) count_delta = engagement['distinct_entity_count']
count += engagement['count'] else:
item[engagement_type] = count count_delta = engagement['total_count']
timelines.append(item)
day[engagement_type.name] = day.get(engagement_type.name, 0) + count_delta
timelines.append(day)
return timelines return timelines
...@@ -422,7 +427,7 @@ class ModuleEngagement(models.Model): ...@@ -422,7 +427,7 @@ class ModuleEngagement(models.Model):
course_id = models.CharField(db_index=True, max_length=255) course_id = models.CharField(db_index=True, max_length=255)
username = models.CharField(max_length=255) username = models.CharField(max_length=255)
date = models.DateTimeField() date = models.DateTimeField()
# This will be one of "problem", "video" or "forum" # This will be one of "problem", "video" or "discussion"
entity_type = models.CharField(max_length=255) entity_type = models.CharField(max_length=255)
# For problems this will be the usage key, for videos it will be the html encoded module ID, # For problems this will be the usage key, for videos it will be the html encoded module ID,
# for forums it will be the commentable_id # for forums it will be the commentable_id
......
import datetime import datetime
import json import json
import ddt
from django.utils.http import urlquote from django.utils.http import urlquote
from django_dynamic_fixture import G from django_dynamic_fixture import G
import pytz import pytz
from rest_framework import status from rest_framework import status
from analyticsdataserver.tests import TestCaseWithAuthentication from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants import engagement_entity_types, engagement_events from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
from analytics_data_api.v0 import models from analytics_data_api.v0 import models
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin
@ddt.ddt
class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication): class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
DEFAULT_USERNAME = 'ed_xavier' DEFAULT_USERNAME = 'ed_xavier'
path_template = '/api/v0/engagement_timelines/{}/?course_id={}' path_template = '/api/v0/engagement_timelines/{}/?course_id={}'
def _create_engagement(self): def create_engagement(self, entity_type, event_type, entity_id, count, date=None):
""" Create module engagement data for testing. """ """Create a ModuleEngagement model"""
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, if date is None:
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=100) G(
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, models.ModuleEngagement,
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, course_id=self.course_id,
entity_id='some-type-of-id', event=engagement_events.COMPLETED, count=12) username=self.DEFAULT_USERNAME,
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, date=date,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.DISCUSSION, entity_type=entity_type,
entity_id='some-type-of-id', event=engagement_events.CONTRIBUTIONS, count=10) entity_id=entity_id,
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, event=event_type,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO, count=count,
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=44) )
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=8)
def test_timeline(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
self._create_engagement()
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = { @ddt.data(
(PROBLEM, ATTEMPTED, 'problems_attempted', True),
(PROBLEM, COMPLETED, 'problems_completed', True),
(VIDEO, VIEWED, 'videos_viewed', True),
(DISCUSSION, CONTRIBUTED, 'discussion_contributions', False),
)
@ddt.unpack
def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation):
"""
Verify that some metrics are counted by unique ID, while some are
counted by total interactions.
"""
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
expected_data = {
'days': [ 'days': [
{ {
'date': '2015-01-01', 'date': '2015-01-01',
'discussion_contributions': 0, 'discussion_contributions': 0,
'problems_attempted': 100, 'problems_attempted': 0,
'problems_completed': 12,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 8,
'problems_completed': 0, 'problems_completed': 0,
'videos_viewed': 44 'videos_viewed': 0,
}, }
] ]
} }
self.assertEquals(response.data, expected) if expect_id_aggregation:
expected_data['days'][0][metric_display_name] = 2
else:
expected_data['days'][0][metric_display_name] = 10
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
self.assertEquals(
response.data,
expected_data
)
def test_one(self): def test_timeline(self):
"""
Smoke test the learner engagement timeline.
"""
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc)
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923) self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one)
self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two)
self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two)
response = self.authenticated_get(path) response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected = { expected = {
'days': [ 'days': [
{ {
'date': '2015-05-28', 'date': '2015-01-01',
'discussion_contributions': 0, 'discussion_contributions': 6,
'problems_attempted': 6923, 'problems_attempted': 1,
'problems_completed': 0, 'problems_completed': 1,
'videos_viewed': 0 'videos_viewed': 0
}, },
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 2,
'problems_completed': 0,
'videos_viewed': 1
},
] ]
} }
self.assertEquals(response.data, expected) self.assertEquals(response.data, expected)
def test_day_gap(self): def test_day_gap(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc)
date=datetime.datetime(2015, 5, 26, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO, last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc)
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=1) self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day)
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923)
response = self.authenticated_get(path) response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected = { expected = {
...@@ -102,7 +128,7 @@ class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWith ...@@ -102,7 +128,7 @@ class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWith
{ {
'date': '2015-05-28', 'date': '2015-05-28',
'discussion_contributions': 0, 'discussion_contributions': 0,
'problems_attempted': 6923, 'problems_attempted': 1,
'problems_completed': 0, 'problems_completed': 0,
'videos_viewed': 0 'videos_viewed': 0
}, },
......
...@@ -286,8 +286,9 @@ class EngagementTimelineView(CourseViewMixin, generics.ListAPIView): ...@@ -286,8 +286,9 @@ class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
a maximum of 1. a maximum of 1.
* problems_completed: Number of unique problems the learner * problems_completed: Number of unique problems the learner
answered correctly. answered correctly.
* discussions_contributed: Number of posts, responses, or * discussion_contributions: Number of times the learner
comments the learner contributed to course discussions. contributed to course discussions through posts, responses,
or comments.
* videos_viewed: Number of times any course video was played. * videos_viewed: Number of times any course video was played.
* problem_attempts_per_completed: Number of attempts per * problem_attempts_per_completed: Number of attempts per
correctly answered problem. If no problems were answered correctly answered problem. If no problems were answered
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment