middleware.py 7.67 KB
Newer Older
1 2 3 4 5 6 7 8
"""
This is a middleware layer which keeps a log of all requests made
to the server. It is responsible for removing security tokens and
similar from such events, and relaying them to the event tracking
framework.
"""


9
import hashlib
10
import hmac
11
import json
12
import logging
13 14
import re
import sys
15 16

from django.conf import settings
17

18 19 20 21 22
from track import views
from track import contexts
from eventtracking import tracker


23 24 25
log = logging.getLogger(__name__)

CONTEXT_NAME = 'edx.request'
26 27 28 29
META_KEY_TO_CONTEXT_KEY = {
    'REMOTE_ADDR': 'ip',
    'SERVER_NAME': 'host',
    'HTTP_USER_AGENT': 'agent',
30 31 32 33 34
    'PATH_INFO': 'path',
    # Not a typo. See:
    # http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer
    'HTTP_REFERER': 'referer',
    'HTTP_ACCEPT_LANGUAGE': 'accept_language',
35
}
Piotr Mitros committed
36

37

38
class TrackMiddleware(object):
39 40 41 42 43
    """
    Tracks all requests made, as well as setting up context for other server
    emitted events.
    """

44
    def process_request(self, request):
Piotr Mitros committed
45
        try:
46
            self.enter_request_context(request)
47 48

            if not self.should_process_request(request):
Piotr Mitros committed
49
                return
50

51 52
            # Removes passwords from the tracking logs
            # WARNING: This list needs to be changed whenever we change
53
            # password handling functionality.
54 55
            #
            # As of the time of this comment, only 'password' is used
56
            # The rest are there for future extension.
57
            #
58
            # Passwords should never be sent as GET requests, but
59
            # this can happen due to older browser bugs. We censor
60 61
            # this too.
            #
62
            # We should manually confirm no passwords make it into log
63
            # files when we change this.
64

65
            censored_strings = ['password', 'newpassword', 'new_password',
66 67 68
                                'oldpassword', 'old_password']
            post_dict = dict(request.POST)
            get_dict = dict(request.GET)
69 70 71 72 73 74
            for string in censored_strings:
                if string in post_dict:
                    post_dict[string] = '*' * 8
                if string in get_dict:
                    get_dict[string] = '*' * 8

75 76 77 78
            event = {
                'GET': dict(get_dict),
                'POST': dict(post_dict),
            }
79

Piotr Mitros committed
80 81 82 83 84 85
            # TODO: Confirm no large file uploads
            event = json.dumps(event)
            event = event[:512]

            views.server_track(request, request.META['PATH_INFO'], event)
        except:
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
            ## Why do we have the overly broad except?
            ##
            ## I added instrumentation so if we drop events on the
            ## floor, we at least know about it. However, we really
            ## should just return a 500 here: (1) This will translate
            ## to much more insidious user-facing bugs if we make any
            ## decisions based on incorrect data.  (2) If the system
            ## is down, we should fail and fix it.
            event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])}
            try:
                views.server_track(request, request.META['PATH_INFO'], event)
            except:
                # At this point, things are really broken. We really
                # should fail return a 500 to the user here.  However,
                # the interim decision is to just fail in order to be
                # consistent with current policy, and expedite the PR.
                # This version of the code makes no compromises
                # relative to the code before, while a proper failure
                # here would involve shifting compromises and
                # discussion.
                pass
107

108 109
    def should_process_request(self, request):
        """Don't track requests to the specified URL patterns"""
110 111 112 113 114 115 116 117 118
        path = request.META['PATH_INFO']

        ignored_url_patterns = getattr(settings, 'TRACKING_IGNORE_URL_PATTERNS', [])
        for pattern in ignored_url_patterns:
            # Note we are explicitly relying on python's internal caching of
            # compiled regular expressions here.
            if re.match(pattern, path):
                return False
        return True
119

120
    def enter_request_context(self, request):
121
        """
122 123
        Extract information from the request and add it to the tracking
        context.
124

125
        The following fields are injected into the context:
126 127 128 129 130 131 132 133

        * session - The Django session key that identifies the user's session.
        * user_id - The numeric ID for the logged in user.
        * username - The username of the logged in user.
        * ip - The IP address of the client.
        * host - The "SERVER_NAME" header, which should be the name of the server running this code.
        * agent - The client browser identification string.
        * path - The path part of the requested URL.
134
        * client_id - The unique key used by Google Analytics to identify a user
135
        """
136 137 138 139 140 141 142 143
        context = {
            'session': self.get_session_key(request),
            'user_id': self.get_user_primary_key(request),
            'username': self.get_username(request),
        }
        for header_name, context_key in META_KEY_TO_CONTEXT_KEY.iteritems():
            context[context_key] = request.META.get(header_name, '')

stv committed
144
        # Google Analytics uses the clientId to keep track of unique visitors. A GA cookie looks like
145 146 147 148 149 150 151
        # this: _ga=GA1.2.1033501218.1368477899. The clientId is this part: 1033501218.1368477899.
        google_analytics_cookie = request.COOKIES.get('_ga')
        if google_analytics_cookie is None:
            context['client_id'] = None
        else:
            context['client_id'] = '.'.join(google_analytics_cookie.split('.')[2:])

152 153
        context.update(contexts.course_context_from_url(request.build_absolute_uri()))

154
        tracker.get_tracker().enter_context(
155 156
            CONTEXT_NAME,
            context
157 158
        )

159
    def get_session_key(self, request):
160
        """ Gets and encrypts the Django session key from the request or an empty string if it isn't found."""
161
        try:
162
            return self.encrypt_session_key(request.session.session_key)
163 164 165
        except AttributeError:
            return ''

166 167 168 169 170 171
    def encrypt_session_key(self, session_key):
        """Encrypts a Django session key to another 32-character hex value."""
        if not session_key:
            return ''

        # Follow the model of django.utils.crypto.salted_hmac() and
172 173 174
        # django.contrib.sessions.backends.base._hash() but use MD5
        # instead of SHA1 so that the result has the same length (32)
        # as the original session_key.
175 176 177 178 179

        # TODO: Switch to SHA224, which is secure.
        # If necessary, drop the last little bit of the hash to make it the same length.
        # Using a known-insecure hash to shorten is silly.
        # Also, why do we need same length?
180
        key_salt = "common.djangoapps.track" + self.__class__.__name__
181 182
        key = hashlib.md5(key_salt + settings.SECRET_KEY).digest()
        encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest()
183 184
        return encrypted_session_key

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
    def get_user_primary_key(self, request):
        """Gets the primary key of the logged in Django user"""
        try:
            return request.user.pk
        except AttributeError:
            return ''

    def get_username(self, request):
        """Gets the username of the logged in Django user"""
        try:
            return request.user.username
        except AttributeError:
            return ''

    def process_response(self, _request, response):
200
        """Exit the context if it exists."""
201
        try:
202
            tracker.get_tracker().exit_context(CONTEXT_NAME)
203
        except Exception:  # pylint: disable=broad-except
204 205 206
            pass

        return response