Commit a6003991 by Nimisha Asthagiri

MA-157 Mobile analytics end-to-end.

Move app-generated context back into properties.
Ignore BI events through segment's webhook.
Ignore events without Data in Properties.
parent 3c999122
...@@ -24,6 +24,7 @@ WARNING_IGNORED_TYPE = 'Type ignored' ...@@ -24,6 +24,7 @@ WARNING_IGNORED_TYPE = 'Type ignored'
ERROR_MISSING_USER_ID = 'Required user_id missing from context' ERROR_MISSING_USER_ID = 'Required user_id missing from context'
ERROR_USER_NOT_EXIST = 'Specified user does not exist' ERROR_USER_NOT_EXIST = 'Specified user does not exist'
ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer' ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer'
ERROR_MISSING_DATA = 'The data field must be specified in the properties dictionary'
ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary' ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary'
ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found' ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found'
ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found'
...@@ -119,7 +120,7 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements ...@@ -119,7 +120,7 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
full_segment_event = request.json full_segment_event = request.json
# We mostly care about the properties # We mostly care about the properties
segment_event = full_segment_event.get('properties', {}) segment_properties = full_segment_event.get('properties', {})
# Start with the context provided by segment.io in the "client" field if it exists # Start with the context provided by segment.io in the "client" field if it exists
# We should tightly control which fields actually get included in the event emitted. # We should tightly control which fields actually get included in the event emitted.
...@@ -136,32 +137,38 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements ...@@ -136,32 +137,38 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else: else:
context['event_source'] = event_source context['event_source'] = event_source
# Ignore types that are unsupported if 'name' not in segment_properties:
raise EventValidationError(ERROR_MISSING_NAME)
if 'data' not in segment_properties:
raise EventValidationError(ERROR_MISSING_DATA)
# Ignore event types and names that are unsupported
segment_event_type = full_segment_event.get('type') segment_event_type = full_segment_event.get('type')
segment_event_name = segment_properties['name']
allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])] allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])]
if not segment_event_type or segment_event_type.lower() not in allowed_types: disallowed_substring_names = [
a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES', [])
]
if (
not segment_event_type or
(segment_event_type.lower() not in allowed_types) or
any(disallowed_subs_name in segment_event_name.lower() for disallowed_subs_name in disallowed_substring_names)
):
raise EventValidationError(WARNING_IGNORED_TYPE) raise EventValidationError(WARNING_IGNORED_TYPE)
if segment_context: if segment_context:
# copy required fields from segment's context dict to our custom context dict
for context_field_name, segment_context_field_name in [
('course_id', 'course_id'),
('open_in_browser_url', 'open_in_browser_url'),
('agent', 'userAgent')
]:
if segment_context_field_name in segment_context:
context[context_field_name] = segment_context[segment_context_field_name]
# copy the entire segment's context dict as a sub-field of our custom context dict # copy the entire segment's context dict as a sub-field of our custom context dict
context['client'] = dict(segment_context) context['client'] = dict(segment_context)
context['agent'] = segment_context.get('userAgent', '')
# remove duplicate and unnecessary fields from our copy # remove duplicate and unnecessary fields from our copy
for field in ('traits', 'integrations', 'userAgent', 'course_id', 'open_in_browser_url'): for field in ('traits', 'integrations', 'userAgent'):
if field in context['client']: if field in context['client']:
del context['client'][field] del context['client'][field]
# Overlay any context provided in the properties # Overlay any context provided in the properties
context.update(segment_event.get('context', {})) context.update(segment_properties.get('context', {}))
user_id = full_segment_event.get('userId') user_id = full_segment_event.get('userId')
if not user_id: if not user_id:
...@@ -203,13 +210,10 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements ...@@ -203,13 +210,10 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else: else:
raise EventValidationError(ERROR_MISSING_RECEIVED_AT) raise EventValidationError(ERROR_MISSING_RECEIVED_AT)
if 'name' not in segment_event: context['ip'] = segment_properties.get('context', {}).get('ip', '')
raise EventValidationError(ERROR_MISSING_NAME)
context['ip'] = segment_event.get('context', {}).get('ip', '')
with tracker.get_tracker().context('edx.segmentio', context): with tracker.get_tracker().context('edx.segmentio', context):
tracker.emit(segment_event['name'], segment_event.get('data', {})) tracker.emit(segment_event_name, segment_properties.get('data', {}))
def parse_iso8601_timestamp(timestamp): def parse_iso8601_timestamp(timestamp):
......
...@@ -44,6 +44,7 @@ def expect_failure_with_message(message): ...@@ -44,6 +44,7 @@ def expect_failure_with_message(message):
TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET, TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET,
TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT], TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT],
TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'], TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'],
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES=['.bi.'],
TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'}, TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'},
EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR, EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR,
) )
...@@ -97,6 +98,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -97,6 +98,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
def test_segmentio_ignore_actions(self, action): def test_segmentio_ignore_actions(self, action):
self.post_segmentio_event(action=action) self.post_segmentio_event(action=action)
@data('edx.bi.some_name', 'EDX.BI.CAPITAL_NAME')
@expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE)
def test_segmentio_ignore_names(self, name):
self.post_segmentio_event(name=name)
def post_segmentio_event(self, **kwargs): def post_segmentio_event(self, **kwargs):
"""Post a fake segment.io event to the view that processes it""" """Post a fake segment.io event to the view that processes it"""
request = self.create_request( request = self.create_request(
...@@ -114,6 +120,9 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -114,6 +120,9 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
"properties": { "properties": {
'name': kwargs.get('name', str(sentinel.name)), 'name': kwargs.get('name', str(sentinel.name)),
'data': kwargs.get('data', {}), 'data': kwargs.get('data', {}),
'context': {
'course_id': kwargs.get('course_id') or '',
}
}, },
"channel": 'server', "channel": 'server',
"context": { "context": {
...@@ -122,7 +131,6 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -122,7 +131,6 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
"version": "unknown" "version": "unknown"
}, },
'userAgent': str(sentinel.user_agent), 'userAgent': str(sentinel.user_agent),
'course_id': kwargs.get('course_id') or '',
}, },
"receivedAt": "2014-08-27T16:33:39.100Z", "receivedAt": "2014-08-27T16:33:39.100Z",
"timestamp": "2014-08-27T16:33:39.215Z", "timestamp": "2014-08-27T16:33:39.215Z",
...@@ -139,10 +147,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -139,10 +147,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
} }
if 'context' in kwargs: if 'context' in kwargs:
sample_event['context'].update(kwargs['context']) sample_event['properties']['context'].update(kwargs['context'])
if 'open_in_browser_url' in kwargs:
sample_event['context']['open_in_browser_url'] = kwargs['open_in_browser_url']
return sample_event return sample_event
...@@ -231,6 +236,18 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -231,6 +236,18 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
segmentio.track_segmentio_event(request) segmentio.track_segmentio_event(request)
@expect_failure_with_message(segmentio.ERROR_MISSING_DATA)
def test_missing_data(self):
sample_event_raw = self.create_segmentio_event()
del sample_event_raw['properties']['data']
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))
segmentio.track_segmentio_event(request)
@expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP) @expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP)
def test_missing_timestamp(self): def test_missing_timestamp(self):
sample_event_raw = self.create_event_without_fields('timestamp') sample_event_raw = self.create_event_without_fields('timestamp')
...@@ -305,8 +322,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -305,8 +322,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
data=self.create_segmentio_event_json( data=self.create_segmentio_event_json(
name=name, name=name,
data=input_payload, data=input_payload,
open_in_browser_url='https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
context={ context={
'open_in_browser_url': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
'course_id': course_id, 'course_id': course_id,
'application': { 'application': {
'name': 'edx.mobileapp.android', 'name': 'edx.mobileapp.android',
...@@ -344,11 +361,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -344,11 +361,11 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
'name': 'test-app', 'name': 'test-app',
'version': 'unknown' 'version': 'unknown'
}, },
'application': { },
'name': 'edx.mobileapp.android', 'application': {
'version': '29', 'name': 'edx.mobileapp.android',
'component': 'videoplayer' 'version': '29',
} 'component': 'videoplayer'
}, },
'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
}, },
......
...@@ -397,8 +397,15 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD ...@@ -397,8 +397,15 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD
# Event tracking # Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {}))
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET) TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get(
"TRACKING_SEGMENTIO_WEBHOOK_SECRET",
TRACKING_SEGMENTIO_WEBHOOK_SECRET
)
TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES) TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES)
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ENV_TOKENS.get(
"TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES",
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES
)
TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP) TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP)
# Student identity verification settings # Student identity verification settings
......
...@@ -540,6 +540,7 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): ...@@ -540,6 +540,7 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
TRACKING_SEGMENTIO_WEBHOOK_SECRET = None TRACKING_SEGMENTIO_WEBHOOK_SECRET = None
TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track'] TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track']
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ['.bi.']
TRACKING_SEGMENTIO_SOURCE_MAP = { TRACKING_SEGMENTIO_SOURCE_MAP = {
'analytics-android': 'mobile', 'analytics-android': 'mobile',
'analytics-ios': 'mobile', 'analytics-ios': 'mobile',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment