Commit 7bdc762e by Syed Hasan raza

Merge pull request #11662 from edx/shr/bug/AN-6713-decoding-errors

Add latin1 decoding to HTTP Headers
parents ca936a7f 0d6e0ac7
...@@ -140,12 +140,11 @@ class TrackMiddleware(object): ...@@ -140,12 +140,11 @@ class TrackMiddleware(object):
'ip': self.get_request_ip_address(request), 'ip': self.get_request_ip_address(request),
} }
for header_name, context_key in META_KEY_TO_CONTEXT_KEY.iteritems(): for header_name, context_key in META_KEY_TO_CONTEXT_KEY.iteritems():
context[context_key] = request.META.get(header_name, '') # HTTP headers may contain Latin1 characters. Decoding using Latin1 encoding here
# avoids encountering UnicodeDecodeError exceptions when these header strings are
# output to tracking logs.
context[context_key] = request.META.get(header_name, '').decode('latin1')
# HTTP_USER_AGENT user might can contain the information that include latin1 characters
# decoding this using latin1 scheme will prevent to raise UnicodeDecodeError when using
# json.dumps for tracking purpose.
context['agent'] = context['agent'].decode('latin1')
# Google Analytics uses the clientId to keep track of unique visitors. A GA cookie looks like # Google Analytics uses the clientId to keep track of unique visitors. A GA cookie looks like
# this: _ga=GA1.2.1033501218.1368477899. The clientId is this part: 1033501218.1368477899. # this: _ga=GA1.2.1033501218.1368477899. The clientId is this part: 1033501218.1368477899.
google_analytics_cookie = request.COOKIES.get('_ga') google_analytics_cookie = request.COOKIES.get('_ga')
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Tests for tracking middleware.""" """Tests for tracking middleware."""
import ddt
from mock import patch from mock import patch
from mock import sentinel from mock import sentinel
...@@ -13,7 +14,9 @@ from eventtracking import tracker ...@@ -13,7 +14,9 @@ from eventtracking import tracker
from track.middleware import TrackMiddleware from track.middleware import TrackMiddleware
@ddt.ddt
class TrackMiddlewareTestCase(TestCase): class TrackMiddlewareTestCase(TestCase):
""" Class for checking tracking requests """
def setUp(self): def setUp(self):
super(TrackMiddlewareTestCase, self).setUp() super(TrackMiddlewareTestCase, self).setUp()
...@@ -29,17 +32,25 @@ class TrackMiddlewareTestCase(TestCase): ...@@ -29,17 +32,25 @@ class TrackMiddlewareTestCase(TestCase):
self.track_middleware.process_request(request) self.track_middleware.process_request(request)
self.assertTrue(self.mock_server_track.called) self.assertTrue(self.mock_server_track.called)
def test_request_with_latin1_characters(self): @ddt.unpack
@ddt.data(
('HTTP_USER_AGENT', 'agent'),
('PATH_INFO', 'path'),
('HTTP_REFERER', 'referer'),
('HTTP_ACCEPT_LANGUAGE', 'accept_language'),
)
def test_request_with_latin1_characters(self, meta_key, context_key):
""" """
When HTTP_USER_AGENT in request.META contains latin1 characters. When HTTP headers contains latin1 characters.
""" """
request = self.request_factory.get('/somewhere') request = self.request_factory.get('/somewhere')
request.META['HTTP_USER_AGENT'] = 'test latin1 \xd3 \xe9 \xf1' # pylint: disable=no-member # pylint: disable=no-member
request.META[meta_key] = 'test latin1 \xd3 \xe9 \xf1' # pylint: disable=no-member
context = self.get_context_for_request(request) context = self.get_context_for_request(request)
# The bytes in the string on the right are utf8 encoded in the source file, so we decode them to construct # The bytes in the string on the right are utf8 encoded in the source file, so we decode them to construct
# a valid unicode string. # a valid unicode string.
self.assertEqual(context['agent'], 'test latin1 Ó é ñ'.decode('utf8')) self.assertEqual(context[context_key], 'test latin1 Ó é ñ'.decode('utf8'))
def test_default_filters_do_not_render_view(self): def test_default_filters_do_not_render_view(self):
for url in ['/event', '/event/1', '/login', '/heartbeat']: for url in ['/event', '/event/1', '/login', '/heartbeat']:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment