Commit cf719a40 by Piotr Mitros

Add referer and accept_language to tracking logs

3 line patch, together with a world of pain due to buggy code, broken abstractions in tests, etc.
parent d644da9e
import hmac """
This is a middleware layer which keeps a log of all requests made
to the server. It is responsible for removing security tokens and
similar from such events, and relaying them to the event tracking
framework.
"""
import hashlib import hashlib
import hmac
import json import json
import re
import logging import logging
import re
import sys
from django.conf import settings from django.conf import settings
...@@ -18,7 +27,11 @@ META_KEY_TO_CONTEXT_KEY = { ...@@ -18,7 +27,11 @@ META_KEY_TO_CONTEXT_KEY = {
'REMOTE_ADDR': 'ip', 'REMOTE_ADDR': 'ip',
'SERVER_NAME': 'host', 'SERVER_NAME': 'host',
'HTTP_USER_AGENT': 'agent', 'HTTP_USER_AGENT': 'agent',
'PATH_INFO': 'path' 'PATH_INFO': 'path',
# Not a typo. See:
# http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer
'HTTP_REFERER': 'referer',
'HTTP_ACCEPT_LANGUAGE': 'accept_language',
} }
...@@ -70,7 +83,27 @@ class TrackMiddleware(object): ...@@ -70,7 +83,27 @@ class TrackMiddleware(object):
views.server_track(request, request.META['PATH_INFO'], event) views.server_track(request, request.META['PATH_INFO'], event)
except: except:
pass ## Why do we have the overly broad except?
##
## I added instrumentation so if we drop events on the
## floor, we at least know about it. However, we really
## should just return a 500 here: (1) This will translate
## to much more insidious user-facing bugs if we make any
## decisions based on incorrect data. (2) If the system
## is down, we should fail and fix it.
event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])}
try:
views.server_track(request, request.META['PATH_INFO'], event)
except:
# At this point, things are really broken. We really
# should fail return a 500 to the user here. However,
# the interim decision is to just fail in order to be
# consistent with current policy, and expedite the PR.
# This version of the code makes no compromises
# relative to the code before, while a proper failure
# here would involve shifting compromises and
# discussion.
pass
def should_process_request(self, request): def should_process_request(self, request):
"""Don't track requests to the specified URL patterns""" """Don't track requests to the specified URL patterns"""
...@@ -139,6 +172,11 @@ class TrackMiddleware(object): ...@@ -139,6 +172,11 @@ class TrackMiddleware(object):
# django.contrib.sessions.backends.base._hash() but use MD5 # django.contrib.sessions.backends.base._hash() but use MD5
# instead of SHA1 so that the result has the same length (32) # instead of SHA1 so that the result has the same length (32)
# as the original session_key. # as the original session_key.
# TODO: Switch to SHA224, which is secure.
# If necessary, drop the last little bit of the hash to make it the same length.
# Using a known-insecure hash to shorten is silly.
# Also, why do we need same length?
key_salt = "common.djangoapps.track" + self.__class__.__name__ key_salt = "common.djangoapps.track" + self.__class__.__name__
key = hashlib.md5(key_salt + settings.SECRET_KEY).digest() key = hashlib.md5(key_salt + settings.SECRET_KEY).digest()
encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest() encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest()
......
...@@ -14,7 +14,9 @@ CONTEXT_FIELDS_TO_INCLUDE = [ ...@@ -14,7 +14,9 @@ CONTEXT_FIELDS_TO_INCLUDE = [
'session', 'session',
'ip', 'ip',
'agent', 'agent',
'host' 'host',
'referer',
'accept_language'
] ]
......
...@@ -31,6 +31,22 @@ class InMemoryBackend(object): ...@@ -31,6 +31,22 @@ class InMemoryBackend(object):
self.events.append(event) self.events.append(event)
def unicode_flatten(tree):
"""
Test cases have funny issues where some strings are unicode, and
some are not. This does not cause test failures, but causes test
output diffs to show many more difference than actually occur in the
data. This will convert everything to a common form.
"""
if isinstance(tree, basestring):
return unicode(tree)
elif isinstance(tree, list):
return map(unicode_flatten, list)
elif isinstance(tree, dict):
return dict([(unicode_flatten(key), unicode_flatten(value)) for key, value in tree.iteritems()])
return tree
@freeze_time(FROZEN_TIME) @freeze_time(FROZEN_TIME)
@override_settings( @override_settings(
EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG
...@@ -67,3 +83,7 @@ class EventTrackingTestCase(TestCase): ...@@ -67,3 +83,7 @@ class EventTrackingTestCase(TestCase):
def assert_events_emitted(self): def assert_events_emitted(self):
"""Ensure at least one event has been emitted at this point in the test.""" """Ensure at least one event has been emitted at this point in the test."""
self.assertGreaterEqual(len(self.backend.events), 1) self.assertGreaterEqual(len(self.backend.events), 1)
def assertEqualUnicode(self, tree_a, tree_b):
"""Like assertEqual, but give nicer errors for unicode vs. non-unicode"""
self.assertEqual(unicode_flatten(tree_a), unicode_flatten(tree_b))
...@@ -53,6 +53,8 @@ class TrackMiddlewareTestCase(TestCase): ...@@ -53,6 +53,8 @@ class TrackMiddlewareTestCase(TestCase):
def test_default_request_context(self): def test_default_request_context(self):
context = self.get_context_for_path('/courses/') context = self.get_context_for_path('/courses/')
self.assertEquals(context, { self.assertEquals(context, {
'accept_language': '',
'referer': '',
'user_id': '', 'user_id': '',
'session': '', 'session': '',
'username': '', 'username': '',
......
...@@ -23,6 +23,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): ...@@ -23,6 +23,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
data = {sentinel.key: sentinel.value} data = {sentinel.key: sentinel.value}
context = { context = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'username': sentinel.username, 'username': sentinel.username,
'session': sentinel.session, 'session': sentinel.session,
'ip': sentinel.ip, 'ip': sentinel.ip,
...@@ -40,6 +42,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): ...@@ -40,6 +42,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event() emitted_event = self.get_event()
expected_event = { expected_event = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'event_type': sentinel.name, 'event_type': sentinel.name,
'name': sentinel.name, 'name': sentinel.name,
'context': { 'context': {
...@@ -58,7 +62,7 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): ...@@ -58,7 +62,7 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None, 'page': None,
'session': sentinel.session, 'session': sentinel.session,
} }
self.assertEqual(expected_event, emitted_event) self.assertEqualUnicode(expected_event, emitted_event)
@override_settings( @override_settings(
EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR,
...@@ -69,6 +73,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): ...@@ -69,6 +73,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event() emitted_event = self.get_event()
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'event_type': sentinel.name, 'event_type': sentinel.name,
'name': sentinel.name, 'name': sentinel.name,
'context': {}, 'context': {},
...@@ -82,4 +88,4 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): ...@@ -82,4 +88,4 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None, 'page': None,
'session': '', 'session': '',
} }
self.assertEqual(expected_event, emitted_event) self.assertEqualUnicode(expected_event, emitted_event)
...@@ -58,6 +58,8 @@ def user_track(request): ...@@ -58,6 +58,8 @@ def user_track(request):
"username": username, "username": username,
"session": context.get('session', ''), "session": context.get('session', ''),
"ip": _get_request_header(request, 'REMOTE_ADDR'), "ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "browser", "event_source": "browser",
"event_type": _get_request_value(request, 'event_type'), "event_type": _get_request_value(request, 'event_type'),
"event": _get_request_value(request, 'event'), "event": _get_request_value(request, 'event'),
...@@ -95,6 +97,8 @@ def server_track(request, event_type, event, page=None): ...@@ -95,6 +97,8 @@ def server_track(request, event_type, event, page=None):
event = { event = {
"username": username, "username": username,
"ip": _get_request_header(request, 'REMOTE_ADDR'), "ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "server", "event_source": "server",
"event_type": event_type, "event_type": event_type,
"event": event, "event": event,
......
...@@ -53,6 +53,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -53,6 +53,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
def setUp(self): def setUp(self):
super(SegmentIOTrackingTestCase, self).setUp() super(SegmentIOTrackingTestCase, self).setUp()
self.maxDiff = None # pylint: disable=invalid-name
self.request_factory = RequestFactory() self.request_factory = RequestFactory()
def test_get_request(self): def test_get_request(self):
...@@ -189,6 +190,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -189,6 +190,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username), 'username': str(sentinel.username),
'ip': '', 'ip': '',
'session': '', 'session': '',
...@@ -207,7 +210,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -207,7 +210,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
}, },
'user_id': USER_ID, 'user_id': USER_ID,
'course_id': course_id, 'course_id': course_id,
'org_id': 'foo', 'org_id': u'foo',
'path': ENDPOINT, 'path': ENDPOINT,
'client': { 'client': {
'library': { 'library': {
...@@ -224,7 +227,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -224,7 +227,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
finally: finally:
middleware.process_response(request, None) middleware.process_response(request, None)
self.assertEquals(self.get_event(), expected_event) self.assertEqualUnicode(self.get_event(), expected_event)
def test_invalid_course_id(self): def test_invalid_course_id(self):
request = self.create_request( request = self.create_request(
...@@ -352,6 +355,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -352,6 +355,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200) self.assertEquals(response.status_code, 200)
expected_event_without_payload = { expected_event_without_payload = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username), 'username': str(sentinel.username),
'ip': '', 'ip': '',
'session': '', 'session': '',
...@@ -397,5 +402,5 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): ...@@ -397,5 +402,5 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
actual_event = dict(self.get_event()) actual_event = dict(self.get_event())
payload = json.loads(actual_event.pop('event')) payload = json.loads(actual_event.pop('event'))
self.assertEquals(actual_event, expected_event_without_payload) self.assertEqualUnicode(actual_event, expected_event_without_payload)
self.assertEquals(payload, expected_payload) self.assertEqualUnicode(payload, expected_payload)
...@@ -41,6 +41,8 @@ class TestTrackViews(TestCase): ...@@ -41,6 +41,8 @@ class TestTrackViews(TestCase):
views.user_track(request) views.user_track(request)
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'session': sentinel.session, 'session': sentinel.session,
'ip': '127.0.0.1', 'ip': '127.0.0.1',
...@@ -65,6 +67,8 @@ class TestTrackViews(TestCase): ...@@ -65,6 +67,8 @@ class TestTrackViews(TestCase):
views.user_track(request) views.user_track(request)
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'session': sentinel.session, 'session': sentinel.session,
'ip': '127.0.0.1', 'ip': '127.0.0.1',
...@@ -95,6 +99,8 @@ class TestTrackViews(TestCase): ...@@ -95,6 +99,8 @@ class TestTrackViews(TestCase):
views.user_track(request) views.user_track(request)
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'session': '', 'session': '',
'ip': '127.0.0.1', 'ip': '127.0.0.1',
...@@ -123,6 +129,8 @@ class TestTrackViews(TestCase): ...@@ -123,6 +129,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}') views.server_track(request, str(sentinel.event_type), '{}')
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'ip': '127.0.0.1', 'ip': '127.0.0.1',
'event_source': 'server', 'event_source': 'server',
...@@ -147,6 +155,8 @@ class TestTrackViews(TestCase): ...@@ -147,6 +155,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}') views.server_track(request, str(sentinel.event_type), '{}')
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'ip': '127.0.0.1', 'ip': '127.0.0.1',
'event_source': 'server', 'event_source': 'server',
...@@ -180,6 +190,8 @@ class TestTrackViews(TestCase): ...@@ -180,6 +190,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}') views.server_track(request, str(sentinel.event_type), '{}')
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'ip': '127.0.0.1', 'ip': '127.0.0.1',
'event_source': 'server', 'event_source': 'server',
...@@ -207,6 +219,8 @@ class TestTrackViews(TestCase): ...@@ -207,6 +219,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}') views.server_track(request, str(sentinel.event_type), '{}')
expected_event = { expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'ip': '', 'ip': '',
'event_source': 'server', 'event_source': 'server',
...@@ -223,6 +237,8 @@ class TestTrackViews(TestCase): ...@@ -223,6 +237,8 @@ class TestTrackViews(TestCase):
@freeze_time(expected_time) @freeze_time(expected_time)
def test_task_track(self): def test_task_track(self):
request_info = { request_info = {
'accept_language': '',
'referer': '',
'username': 'anonymous', 'username': 'anonymous',
'ip': '127.0.0.1', 'ip': '127.0.0.1',
'agent': 'agent', 'agent': 'agent',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment