Commit c606c295 by Gabe Mulley

Merge pull request #5002 from mulby/gabe/add-segmentio-hook

Add event tracking endpoint to be called by segment.io
parents b58b6882 7900e8d3
"""Handle events that were forwarded from the segment.io webhook integration"""
import datetime
import json
import logging
from django.conf import settings
from django.contrib.auth.models import User
from django.views.decorators.http import require_POST
from django_future.csrf import csrf_exempt
from eventtracking import tracker as eventtracker
from opaque_keys.edx.keys import CourseKey
from opaque_keys import InvalidKeyError
from util.json_request import expect_json, JsonResponse
from track import tracker
from track import shim
log = logging.getLogger(__name__)
ERROR_UNAUTHORIZED = 'Unauthorized'
WARNING_IGNORED_CHANNEL = 'Channel ignored'
WARNING_IGNORED_ACTION = 'Action ignored'
ERROR_MISSING_USER_ID = 'Required user_id missing from context'
ERROR_USER_NOT_EXIST = 'Specified user does not exist'
ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer'
ERROR_MISSING_EVENT_TYPE = 'The event_type field must be specified in the properties dictionary'
ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found'
ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found'
@require_POST
@expect_json
@csrf_exempt
def track_segmentio_event(request):
"""
An endpoint for logging events using segment.io's webhook integration.
segment.io provides a custom integration mechanism that initiates a request to a configurable URL every time an
event is received by their system. This endpoint is designed to receive those requests and convert the events into
standard tracking log entries.
For now we limit the scope of handled events to track and screen events from mobile devices. In the future we could
enable logging of other types of events, however, there is significant overlap with our non-segment.io based event
tracking. Given that segment.io is closed third party solution we are limiting its required usage to just
collecting events from mobile devices for the time being.
Many of the root fields of a standard edX tracking event are read out of the "properties" dictionary provided by the
segment.io event, which is, in turn, provided by the client that emitted the event.
In order for an event to be logged the following preconditions must be met:
* The "key" query string parameter must exactly match the django setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While
the endpoint is public, we want to limit access to it to the segment.io servers only.
* The value of the "channel" field of the event must be included in the list specified by the django setting
TRACKING_SEGMENTIO_ALLOWED_CHANNELS. This is intended to restrict the set of events to specific channels. For
example: just mobile devices.
* The value of the "action" field of the event must be included in the list specified by the django setting
TRACKING_SEGMENTIO_ALLOWED_ACTIONS. In order to make use of *all* of the features segment.io offers we would have
to implement some sort of persistent storage of information contained in some actions (like identify). For now,
we defer support of those actions and just support a limited set that can be handled without storing information
in external state.
* The value of the standard "userId" field of the event must be an integer that can be used to look up the user
using the primary key of the User model.
* Include an "event_type" field in the properties dictionary that indicates the edX event type. Note this can differ
from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is
intended to be human readable, the "event_type" field is expected to conform to the standard for naming events
found in the edX data documentation.
Additionally the event can optionally:
* Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the
existing context on the server overriding any existing keys. This context dictionary should include a "course_id"
field when the event is scoped to a particular course. The value of this field should be a valid course key. The
context may contain other arbitrary data that will be logged with the event, for example: identification
information for the device that emitted the event.
* Provide a "page" parameter in the properties dictionary which indicates the page that was being displayed to the
user or the mobile application screen that was visible to the user at the time the event was emitted.
"""
# Validate the security token. We must use a query string parameter for this since we cannot customize the POST body
# in the segment.io webhook configuration, we can only change the URL that they call, so we force this token to be
# included in the URL and reject any requests that do not include it. This also assumes HTTPS is used to make the
# connection between their server and ours.
expected_secret = getattr(settings, 'TRACKING_SEGMENTIO_WEBHOOK_SECRET', None)
provided_secret = request.GET.get('key')
if not expected_secret or provided_secret != expected_secret:
return failure_response(ERROR_UNAUTHORIZED, status=401)
# The POST body will contain the JSON encoded event
full_segment_event = request.json
def logged_failure_response(*args, **kwargs):
"""Indicate a failure and log information about the event that will aide debugging efforts"""
failed_response = failure_response(*args, **kwargs)
log.warning('Unable to process event received from segment.io: %s', json.dumps(full_segment_event))
return failed_response
# Selectively listen to particular channels
channel = full_segment_event.get('channel')
allowed_channels = [c.lower() for c in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_CHANNELS', [])]
if not channel or channel.lower() not in allowed_channels:
return response(WARNING_IGNORED_CHANNEL, committed=False)
# Ignore actions that are unsupported
action = full_segment_event.get('action')
allowed_actions = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_ACTIONS', [])]
if not action or action.lower() not in allowed_actions:
return response(WARNING_IGNORED_ACTION, committed=False)
# We mostly care about the properties
segment_event = full_segment_event.get('properties', {})
context = {}
# Start with the context provided by segment.io in the "client" field if it exists
segment_context = full_segment_event.get('context')
if segment_context:
context['client'] = segment_context
# Overlay any context provided in the properties
context.update(segment_event.get('context', {}))
user_id = full_segment_event.get('userId')
if not user_id:
return logged_failure_response(ERROR_MISSING_USER_ID)
# userId is assumed to be the primary key of the django User model
try:
user = User.objects.get(pk=user_id)
except User.DoesNotExist:
return logged_failure_response(ERROR_USER_NOT_EXIST)
except ValueError:
return logged_failure_response(ERROR_INVALID_USER_ID)
else:
context['user_id'] = user_id
# course_id is expected to be provided in the context when applicable
course_id = context.get('course_id')
if course_id:
try:
course_key = CourseKey.from_string(course_id)
context['org_id'] = course_key.org
except InvalidKeyError:
log.warning(
'unable to parse course_id "{course_id}" from event: {event}'.format(
course_id=course_id,
event=json.dumps(full_segment_event),
),
exc_info=True
)
if 'timestamp' in full_segment_event:
time = parse_iso8601_timestamp(full_segment_event['timestamp'])
else:
return logged_failure_response(ERROR_MISSING_TIMESTAMP)
if 'receivedAt' in full_segment_event:
context['received_at'] = parse_iso8601_timestamp(full_segment_event['receivedAt'])
else:
return logged_failure_response(ERROR_MISSING_RECEIVED_AT)
if 'event_type' in segment_event:
event_type = segment_event['event_type']
else:
return logged_failure_response(ERROR_MISSING_EVENT_TYPE)
with eventtracker.get_tracker().context('edx.segmentio', context):
complete_context = eventtracker.get_tracker().resolve_context()
event = {
"username": user.username,
"event_type": event_type,
# Will be either "mobile", "browser" or "server". These names happen to be identical to the names we already
# use so no mapping is necessary.
"event_source": channel,
# This timestamp is reported by the local clock on the device so it may be wildly incorrect.
"time": time,
"context": complete_context,
"page": segment_event.get('page'),
"host": complete_context.get('host', ''),
"agent": '',
"ip": segment_event.get('ip', ''),
"event": segment_event.get('event', {}),
}
# Some duplicated fields are passed into event-tracking via the context by track.middleware.
# Remove them from the event here since they are captured elsewhere.
shim.remove_shim_context(event)
tracker.send(event)
return response()
def response(message=None, status=200, committed=True):
"""
Produce a response from the segment.io event handler.
Returns: A JSON encoded string giving more information about what action was taken while processing the request.
"""
result = {
'committed': committed
}
if message:
result['message'] = message
return JsonResponse(result, status=status)
def failure_response(message, status=400):
"""
Return a failure response when something goes wrong handling segment.io events.
Returns: A JSON encoded string giving more information about what went wrong when processing the request.
"""
return response(message=message, status=status, committed=False)
def parse_iso8601_timestamp(timestamp):
"""Parse a particular type of ISO8601 formatted timestamp"""
return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
"""Ensure we can parse events sent to us from the segment.io webhook integration"""
from datetime import datetime
import json
from ddt import ddt, data
from freezegun import freeze_time
from mock import patch, sentinel
from django.contrib.auth.models import User
from django.test import TestCase
from django.test.client import RequestFactory
from django.test.utils import override_settings
from track.middleware import TrackMiddleware
from track.views import segmentio
EXPECTED_TIME = datetime(2013, 10, 3, 8, 24, 55)
SECRET = 'anything'
ENDPOINT = '/segmentio/test/event'
USER_ID = 10
@ddt
@override_settings(
TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET,
TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT],
TRACKING_SEGMENTIO_ALLOWED_ACTIONS=['Track', 'Screen'],
TRACKING_SEGMENTIO_ALLOWED_CHANNELS=['mobile']
)
@freeze_time(EXPECTED_TIME)
class SegmentIOTrackingTestCase(TestCase):
"""Test processing of segment.io events"""
def setUp(self):
self.request_factory = RequestFactory()
patcher = patch('track.views.segmentio.tracker')
self.mock_tracker = patcher.start()
self.addCleanup(patcher.stop)
def test_segmentio_tracking_get_request(self):
request = self.request_factory.get(ENDPOINT)
response = segmentio.track_segmentio_event(request)
self.assertEquals(response.status_code, 405)
self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member
@override_settings(
TRACKING_SEGMENTIO_WEBHOOK_SECRET=None
)
def test_segmentio_tracking_no_secret_config(self):
request = self.request_factory.post(ENDPOINT)
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401)
def assert_segmentio_uncommitted_response(self, response, expected_message, expected_status=400):
"""Assert that no event was emitted and an appropriate commit==false message was returned"""
self.assertEquals(response.status_code, expected_status)
parsed_content = json.loads(response.content)
self.assertEquals(parsed_content, {'committed': False, 'message': expected_message})
self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member
def test_segmentio_tracking_no_secret_provided(self):
request = self.request_factory.post(ENDPOINT)
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401)
def test_segmentio_tracking_secret_mismatch(self):
request = self.create_request(key='y')
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401)
def create_request(self, key=None, **kwargs):
"""Create a fake request that emulates a request from the segment.io servers to ours"""
if key is None:
key = SECRET
return self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs)
@data('Identify', 'Group', 'Alias', 'Page', 'identify')
def test_segmentio_ignore_actions(self, action):
response = self.post_segmentio_event(action=action)
self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_ACTION, 200)
def post_segmentio_event(self, **kwargs):
"""Post a fake segment.io event to the view that processes it"""
request = self.create_request(
data=self.create_segmentio_event_json(**kwargs),
content_type='application/json'
)
return segmentio.track_segmentio_event(request)
@data('server', 'browser', 'Browser')
def test_segmentio_ignore_channels(self, channel):
response = self.post_segmentio_event(channel=channel)
self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_CHANNEL, 200)
def create_segmentio_event(self, **kwargs):
"""Populate a fake segment.io event with data of interest"""
action = kwargs.get('action', 'Track')
sample_event = {
"userId": kwargs.get('user_id', USER_ID),
"event": "Did something",
"properties": {
'event_type': kwargs.get('event_type', ''),
'event': kwargs.get('event', {}),
'context': {
'course_id': kwargs.get('course_id') or '',
}
},
"channel": kwargs.get('channel', 'mobile'),
"context": {
"library": {
"name": "unknown",
"version": "unknown"
}
},
"receivedAt": "2014-08-27T16:33:39.100Z",
"timestamp": "2014-08-27T16:33:39.215Z",
"type": action.lower(),
"projectId": "u0j33yjkr8",
"messageId": "qy52hwp4",
"version": 2,
"integrations": {},
"options": {
"library": "unknown",
"providers": {}
},
"action": action
}
return sample_event
def create_segmentio_event_json(self, **kwargs):
"""Return a json string containing a fake segment.io event"""
return json.dumps(self.create_segmentio_event(**kwargs))
def test_segmentio_tracking_no_user_for_user_id(self):
response = self.post_segmentio_event(user_id=40)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_USER_NOT_EXIST, 400)
def test_segmentio_tracking_invalid_user_id(self):
response = self.post_segmentio_event(user_id='foobar')
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_INVALID_USER_ID, 400)
@data('foo/bar/baz', 'course-v1:foo+bar+baz')
def test_segmentio_tracking(self, course_id):
middleware = TrackMiddleware()
request = self.create_request(
data=self.create_segmentio_event_json(event_type=str(sentinel.event_type), event={'foo': 'bar'}, course_id=course_id),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
middleware.process_request(request)
# The middleware normally emits an event, make sure it doesn't in this case.
self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member
try:
response = segmentio.track_segmentio_event(request)
self.assertEquals(response.status_code, 200)
expected_event = {
'username': str(sentinel.username),
'ip': '',
'event_source': 'mobile',
'event_type': str(sentinel.event_type),
'event': {'foo': 'bar'},
'agent': '',
'page': None,
'time': datetime.strptime("2014-08-27T16:33:39.215Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
'host': 'testserver',
'context': {
'user_id': USER_ID,
'course_id': course_id,
'org_id': 'foo',
'path': ENDPOINT,
'client': {
'library': {
'name': 'unknown',
'version': 'unknown'
}
},
'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
},
}
finally:
middleware.process_response(request, None)
self.mock_tracker.send.assert_called_once_with(expected_event) # pylint: disable=maybe-no-member
def test_segmentio_tracking_invalid_course_id(self):
request = self.create_request(
data=self.create_segmentio_event_json(course_id='invalid'),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
response = segmentio.track_segmentio_event(request)
self.assertEquals(response.status_code, 200)
self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member
def test_segmentio_tracking_missing_event_type(self):
sample_event_raw = self.create_segmentio_event()
sample_event_raw['properties'] = {}
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_EVENT_TYPE, 400)
def test_segmentio_tracking_missing_timestamp(self):
sample_event_raw = self.create_event_without_fields('timestamp')
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_TIMESTAMP, 400)
def create_event_without_fields(self, *fields):
"""Create a fake event and remove some fields from it"""
event = self.create_segmentio_event()
for field in fields:
if field in event:
del event[field]
return event
def test_segmentio_tracking_missing_received_at(self):
sample_event_raw = self.create_event_without_fields('receivedAt')
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
response = segmentio.track_segmentio_event(request)
self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_RECEIVED_AT, 400)
...@@ -400,6 +400,10 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD ...@@ -400,6 +400,10 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD
# Event tracking # Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {}))
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET)
TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_ACTIONS", TRACKING_SEGMENTIO_ALLOWED_ACTIONS)
TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_CHANNELS", TRACKING_SEGMENTIO_ALLOWED_CHANNELS)
# Student identity verification settings # Student identity verification settings
VERIFY_STUDENT = AUTH_TOKENS.get("VERIFY_STUDENT", VERIFY_STUDENT) VERIFY_STUDENT = AUTH_TOKENS.get("VERIFY_STUDENT", VERIFY_STUDENT)
......
...@@ -460,7 +460,7 @@ TRACKING_BACKENDS = { ...@@ -460,7 +460,7 @@ TRACKING_BACKENDS = {
# We're already logging events, and we don't want to capture user # We're already logging events, and we don't want to capture user
# names/passwords. Heartbeat events are likely not interesting. # names/passwords. Heartbeat events are likely not interesting.
TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat', r'^/segmentio/event']
EVENT_TRACKING_ENABLED = True EVENT_TRACKING_ENABLED = True
EVENT_TRACKING_BACKENDS = { EVENT_TRACKING_BACKENDS = {
...@@ -492,6 +492,10 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): ...@@ -492,6 +492,10 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
} }
}) })
TRACKING_SEGMENTIO_WEBHOOK_SECRET = None
TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ['Track', 'Screen']
TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ['mobile']
######################## GOOGLE ANALYTICS ########################### ######################## GOOGLE ANALYTICS ###########################
GOOGLE_ANALYTICS_ACCOUNT = None GOOGLE_ANALYTICS_ACCOUNT = None
GOOGLE_ANALYTICS_LINKEDIN = 'GOOGLE_ANALYTICS_LINKEDIN_DUMMY' GOOGLE_ANALYTICS_LINKEDIN = 'GOOGLE_ANALYTICS_LINKEDIN_DUMMY'
......
...@@ -28,6 +28,7 @@ urlpatterns = ('', # nopep8 ...@@ -28,6 +28,7 @@ urlpatterns = ('', # nopep8
url(r'^reject_name_change$', 'student.views.reject_name_change'), url(r'^reject_name_change$', 'student.views.reject_name_change'),
url(r'^pending_name_changes$', 'student.views.pending_name_changes'), url(r'^pending_name_changes$', 'student.views.pending_name_changes'),
url(r'^event$', 'track.views.user_track'), url(r'^event$', 'track.views.user_track'),
url(r'^segmentio/event$', 'track.views.segmentio.track_segmentio_event'),
url(r'^t/(?P<template>[^/]*)$', 'static_template_view.views.index'), # TODO: Is this used anymore? What is STATIC_GRAB? url(r'^t/(?P<template>[^/]*)$', 'static_template_view.views.index'), # TODO: Is this used anymore? What is STATIC_GRAB?
url(r'^accounts/login$', 'student.views.accounts_login', name="accounts_login"), url(r'^accounts/login$', 'student.views.accounts_login', name="accounts_login"),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment