Commit eb2d8c4e by Piotr Mitros

Merge pull request #6867 from edx/pmitros/add-referer

Add referer and accept_language to tracking logs
parents 5bcb3a5e cf719a40
import hmac
"""
This is a middleware layer which keeps a log of all requests made
to the server. It is responsible for removing security tokens and
similar from such events, and relaying them to the event tracking
framework.
"""
import hashlib
import hmac
import json
import re
import logging
import re
import sys
from django.conf import settings
......@@ -18,7 +27,11 @@ META_KEY_TO_CONTEXT_KEY = {
'REMOTE_ADDR': 'ip',
'SERVER_NAME': 'host',
'HTTP_USER_AGENT': 'agent',
'PATH_INFO': 'path'
'PATH_INFO': 'path',
# Not a typo. See:
# http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer
'HTTP_REFERER': 'referer',
'HTTP_ACCEPT_LANGUAGE': 'accept_language',
}
......@@ -70,7 +83,27 @@ class TrackMiddleware(object):
views.server_track(request, request.META['PATH_INFO'], event)
except:
pass
## Why do we have the overly broad except?
##
## I added instrumentation so if we drop events on the
## floor, we at least know about it. However, we really
## should just return a 500 here: (1) This will translate
## to much more insidious user-facing bugs if we make any
## decisions based on incorrect data. (2) If the system
## is down, we should fail and fix it.
event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])}
try:
views.server_track(request, request.META['PATH_INFO'], event)
except:
# At this point, things are really broken. We really
# should fail return a 500 to the user here. However,
# the interim decision is to just fail in order to be
# consistent with current policy, and expedite the PR.
# This version of the code makes no compromises
# relative to the code before, while a proper failure
# here would involve shifting compromises and
# discussion.
pass
def should_process_request(self, request):
"""Don't track requests to the specified URL patterns"""
......@@ -139,6 +172,11 @@ class TrackMiddleware(object):
# django.contrib.sessions.backends.base._hash() but use MD5
# instead of SHA1 so that the result has the same length (32)
# as the original session_key.
# TODO: Switch to SHA224, which is secure.
# If necessary, drop the last little bit of the hash to make it the same length.
# Using a known-insecure hash to shorten is silly.
# Also, why do we need same length?
key_salt = "common.djangoapps.track" + self.__class__.__name__
key = hashlib.md5(key_salt + settings.SECRET_KEY).digest()
encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest()
......
......@@ -14,7 +14,9 @@ CONTEXT_FIELDS_TO_INCLUDE = [
'session',
'ip',
'agent',
'host'
'host',
'referer',
'accept_language'
]
......
......@@ -31,6 +31,22 @@ class InMemoryBackend(object):
self.events.append(event)
def unicode_flatten(tree):
"""
Test cases have funny issues where some strings are unicode, and
some are not. This does not cause test failures, but causes test
output diffs to show many more difference than actually occur in the
data. This will convert everything to a common form.
"""
if isinstance(tree, basestring):
return unicode(tree)
elif isinstance(tree, list):
return map(unicode_flatten, list)
elif isinstance(tree, dict):
return dict([(unicode_flatten(key), unicode_flatten(value)) for key, value in tree.iteritems()])
return tree
@freeze_time(FROZEN_TIME)
@override_settings(
EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG
......@@ -67,3 +83,7 @@ class EventTrackingTestCase(TestCase):
def assert_events_emitted(self):
"""Ensure at least one event has been emitted at this point in the test."""
self.assertGreaterEqual(len(self.backend.events), 1)
def assertEqualUnicode(self, tree_a, tree_b):
"""Like assertEqual, but give nicer errors for unicode vs. non-unicode"""
self.assertEqual(unicode_flatten(tree_a), unicode_flatten(tree_b))
......@@ -53,6 +53,8 @@ class TrackMiddlewareTestCase(TestCase):
def test_default_request_context(self):
context = self.get_context_for_path('/courses/')
self.assertEquals(context, {
'accept_language': '',
'referer': '',
'user_id': '',
'session': '',
'username': '',
......
......@@ -23,6 +23,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
data = {sentinel.key: sentinel.value}
context = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'username': sentinel.username,
'session': sentinel.session,
'ip': sentinel.ip,
......@@ -40,6 +42,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event()
expected_event = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'event_type': sentinel.name,
'name': sentinel.name,
'context': {
......@@ -58,7 +62,7 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None,
'session': sentinel.session,
}
self.assertEqual(expected_event, emitted_event)
self.assertEqualUnicode(expected_event, emitted_event)
@override_settings(
EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR,
......@@ -69,6 +73,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event()
expected_event = {
'accept_language': '',
'referer': '',
'event_type': sentinel.name,
'name': sentinel.name,
'context': {},
......@@ -82,4 +88,4 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None,
'session': '',
}
self.assertEqual(expected_event, emitted_event)
self.assertEqualUnicode(expected_event, emitted_event)
......@@ -58,6 +58,8 @@ def user_track(request):
"username": username,
"session": context.get('session', ''),
"ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "browser",
"event_type": _get_request_value(request, 'event_type'),
"event": _get_request_value(request, 'event'),
......@@ -95,6 +97,8 @@ def server_track(request, event_type, event, page=None):
event = {
"username": username,
"ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "server",
"event_type": event_type,
"event": event,
......
......@@ -53,6 +53,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
def setUp(self):
super(SegmentIOTrackingTestCase, self).setUp()
self.maxDiff = None # pylint: disable=invalid-name
self.request_factory = RequestFactory()
def test_get_request(self):
......@@ -189,6 +190,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200)
expected_event = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username),
'ip': '',
'session': '',
......@@ -207,7 +210,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
},
'user_id': USER_ID,
'course_id': course_id,
'org_id': 'foo',
'org_id': u'foo',
'path': ENDPOINT,
'client': {
'library': {
......@@ -224,7 +227,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
finally:
middleware.process_response(request, None)
self.assertEquals(self.get_event(), expected_event)
self.assertEqualUnicode(self.get_event(), expected_event)
def test_invalid_course_id(self):
request = self.create_request(
......@@ -352,6 +355,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200)
expected_event_without_payload = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username),
'ip': '',
'session': '',
......@@ -397,5 +402,5 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
actual_event = dict(self.get_event())
payload = json.loads(actual_event.pop('event'))
self.assertEquals(actual_event, expected_event_without_payload)
self.assertEquals(payload, expected_payload)
self.assertEqualUnicode(actual_event, expected_event_without_payload)
self.assertEqualUnicode(payload, expected_payload)
......@@ -41,6 +41,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': sentinel.session,
'ip': '127.0.0.1',
......@@ -65,6 +67,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': sentinel.session,
'ip': '127.0.0.1',
......@@ -95,6 +99,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': '',
'ip': '127.0.0.1',
......@@ -123,6 +129,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
......@@ -147,6 +155,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
......@@ -180,6 +190,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
......@@ -207,6 +219,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '',
'event_source': 'server',
......@@ -223,6 +237,8 @@ class TestTrackViews(TestCase):
@freeze_time(expected_time)
def test_task_track(self):
request_info = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'agent': 'agent',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment