Commit fbc243db by Carlos Andrés Rocha

Add event tracking with configurable backends

parent 2ddaf908
...@@ -5,8 +5,11 @@ These are notable changes in edx-platform. This is a rolling list of changes, ...@@ -5,8 +5,11 @@ These are notable changes in edx-platform. This is a rolling list of changes,
in roughly chronological order, most recent first. Add your entries at or near in roughly chronological order, most recent first. Add your entries at or near
the top. Include a label indicating the component affected. the top. Include a label indicating the component affected.
Common: Added configurable backends for tracking events. Tracking events using
the python logging module is the default backend. Support for MongoDB and a
Django database is also available.
Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components
can be included to courses. can be included to courses.
LMS: Added alphabetical sorting of forum categories and subcategories. LMS: Added alphabetical sorting of forum categories and subcategories.
...@@ -287,4 +290,3 @@ Common: Updated CodeJail. ...@@ -287,4 +290,3 @@ Common: Updated CodeJail.
Common: Allow setting of authentication session cookie name. Common: Allow setting of authentication session cookie name.
LMS: Option to email students when enroll/un-enroll them. LMS: Option to email students when enroll/un-enroll them.
...@@ -161,3 +161,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT, ...@@ -161,3 +161,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT,
CELERY_BROKER_PASSWORD, CELERY_BROKER_PASSWORD,
CELERY_BROKER_HOSTNAME, CELERY_BROKER_HOSTNAME,
CELERY_BROKER_VHOST) CELERY_BROKER_VHOST)
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
...@@ -218,9 +218,6 @@ USE_L10N = True ...@@ -218,9 +218,6 @@ USE_L10N = True
# Localization strings (e.g. django.po) are under this directory # Localization strings (e.g. django.po) are under this directory
LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # mitx/conf/locale/ LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # mitx/conf/locale/
# Tracking
TRACK_MAX_EVENT = 10000
# Messages # Messages
MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage' MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage'
...@@ -391,3 +388,16 @@ MKTG_URL_LINK_MAP = { ...@@ -391,3 +388,16 @@ MKTG_URL_LINK_MAP = {
} }
COURSES_WITH_UNSAFE_CODE = [] COURSES_WITH_UNSAFE_CODE = []
############################## EVENT TRACKING #################################
TRACK_MAX_EVENT = 10000
TRACKING_BACKENDS = {
'logger': {
'ENGINE': 'track.backends.logger.LoggerBackend',
'OPTIONS': {
'name': 'tracking'
}
}
}
"""
Event tracking backend module.
Contains the base class for event trackers, and implementation of some
backends.
"""
from __future__ import absolute_import
import abc
# pylint: disable=unused-argument
class BaseBackend(object):
"""
Abstract Base Class for event tracking backends.
"""
__metaclass__ = abc.ABCMeta
def __init__(self, **kwargs):
pass
@abc.abstractmethod
def send(self, event):
"""Send event to tracker."""
pass
"""
Event tracker backend that saves events to a Django database.
"""
# TODO: this module is very specific to the event schema, and is only
# brought here for legacy support. It should be updated when the
# schema changes or eventually deprecated.
from __future__ import absolute_import
import logging
import dateutil
from django.db import models
from track.backends import BaseBackend
log = logging.getLogger('track.backends.django')
LOGFIELDS = [
'username',
'ip',
'event_source',
'event_type',
'event',
'agent',
'page',
'time',
'host'
]
class TrackingLog(models.Model):
"""Defines the fields that are stored in the tracking log database."""
dtcreated = models.DateTimeField('creation date', auto_now_add=True)
username = models.CharField(max_length=32, blank=True)
ip = models.CharField(max_length=32, blank=True)
event_source = models.CharField(max_length=32)
event_type = models.CharField(max_length=512, blank=True)
event = models.TextField(blank=True)
agent = models.CharField(max_length=256, blank=True)
page = models.CharField(max_length=512, blank=True, null=True)
time = models.DateTimeField('event time')
host = models.CharField(max_length=64, blank=True)
class Meta:
app_label = 'track'
db_table = 'track_trackinglog'
def __unicode__(self):
fmt = (
u"[{self.time}] {self.username}@{self.ip}: "
u"{self.event_source}| {self.event_type} | "
u"{self.page} | {self.event}"
)
return fmt.format(self=self)
class DjangoBackend(BaseBackend):
"""Event tracker backend that saves to a Django database"""
def __init__(self, name='default', **options):
"""
Configure database used by the backend.
:Parameters:
- `name` is the name of the database as specified in the project
settings.
"""
super(DjangoBackend, self).__init__(**options)
self.name = name
def send(self, event):
event['time'] = dateutil.parser.parse(event['time'])
field_values = {x: event.get(x, '') for x in LOGFIELDS}
tldat = TrackingLog(**field_values)
try:
tldat.save(using=self.name)
except Exception as e: # pylint: disable=broad-except
log.exception(e)
"""Event tracker backend that saves events to a python logger."""
from __future__ import absolute_import
import logging
import json
from django.conf import settings
from track.backends import BaseBackend
log = logging.getLogger('track.backends.logger')
class LoggerBackend(BaseBackend):
"""Event tracker backend that uses a python logger.
Events are logged to the INFO level as JSON strings.
"""
def __init__(self, name, **kwargs):
"""Event tracker backend that uses a python logger.
:Parameters:
- `name`: identifier of the logger, which should have
been configured using the default python mechanisms.
"""
super(LoggerBackend, self).__init__(**kwargs)
self.event_logger = logging.getLogger(name)
def send(self, event):
event_str = json.dumps(event)
# TODO: remove trucation of the serialized event, either at a
# higher level during the emittion of the event, or by
# providing warnings when the events exceed certain size.
event_str = event_str[:settings.TRACK_MAX_EVENT]
self.event_logger.info(event_str)
"""MongoDB event tracker backend."""
from __future__ import absolute_import
import logging
import pymongo
from pymongo import MongoClient
from pymongo.errors import PyMongoError
from track.backends import BaseBackend
log = logging.getLogger('track.backends.mongodb')
class MongoBackend(BaseBackend):
"""Class for a MongoDB event tracker Backend"""
def __init__(self, **kwargs):
"""
Connect to a MongoDB.
:Parameters:
- `host`: hostname
- `port`: port
- `user`: collection username
- `password`: collection user password
- `database`: name of the database
- `collection`: name of the collection
- `extra`: parameters to pymongo.MongoClient not listed above
"""
super(MongoBackend, self).__init__(**kwargs)
# Extract connection parameters from kwargs
host = kwargs.get('host', 'localhost')
port = kwargs.get('port', 27017)
user = kwargs.get('user', '')
password = kwargs.get('password', '')
db_name = kwargs.get('database', 'track')
collection_name = kwargs.get('collection', 'events')
# Other mongo connection arguments
extra = kwargs.get('extra', {})
# By default disable write acknowledgments, reducing the time
# blocking during an insert
extra['w'] = extra.get('w', 0)
# Make timezone aware by default
extra['tz_aware'] = extra.get('tz_aware', True)
# Connect to database and get collection
self.connection = MongoClient(
host=host,
port=port,
**extra
)
self.collection = self.connection[db_name][collection_name]
if user or password:
self.collection.database.authenticate(user, password)
self._create_indexes()
def _create_indexes(self):
# WARNING: The collection will be locked during the index
# creation. If the collection has a large number of
# documents in it, the operation can take a long time.
# TODO: The creation of indexes can be moved to a Django
# management command or equivalent. There is also an option to
# run the indexing on the background, without locking.
self.collection.ensure_index([('time', pymongo.DESCENDING)])
self.collection.ensure_index('event_type')
def send(self, event):
try:
self.collection.insert(event, manipulate=False)
except PyMongoError:
msg = 'Error inserting to MongoDB event tracker backend'
log.exception(msg)
from __future__ import absolute_import
from django.test import TestCase
from track.backends.django import DjangoBackend, TrackingLog
class TestDjangoBackend(TestCase):
def setUp(self):
self.backend = DjangoBackend()
def test_django_backend(self):
event = {
'username': 'test',
'time': '2013-01-01T12:01:00-05:00'
}
self.backend.send(event)
results = list(TrackingLog.objects.all())
self.assertEqual(len(results), 1)
self.assertEqual(results[0].username, 'test')
# Check if time is stored in UTC
self.assertEqual(str(results[0].time), '2013-01-01 17:01:00+00:00')
from __future__ import absolute_import
import json
import logging
from django.test import TestCase
from track.backends.logger import LoggerBackend
class TestLoggerBackend(TestCase):
def setUp(self):
self.handler = MockLoggingHandler()
self.handler.setLevel(logging.INFO)
logger_name = 'track.backends.logger.test'
logger = logging.getLogger(logger_name)
logger.addHandler(self.handler)
self.backend = LoggerBackend(name=logger_name)
def test_logger_backend(self):
self.handler.reset()
# Send a couple of events and check if they were recorded
# by the logger. The events are serialized to JSON.
event = {'test': True}
event_as_json = json.dumps(event)
self.backend.send(event)
self.backend.send(event)
self.assertEqual(
self.handler.messages['info'],
[event_as_json, event_as_json]
)
class MockLoggingHandler(logging.Handler):
"""
Mock logging handler.
Stores records in a dictionry of lists by level.
"""
def __init__(self, *args, **kwargs):
super(MockLoggingHandler, self).__init__(*args, **kwargs)
self.messages = None
self.reset()
def emit(self, record):
level = record.levelname.lower()
message = record.getMessage()
self.messages[level].append(message)
def reset(self):
self.messages = {
'debug': [],
'info': [],
'warning': [],
'error': [],
'critical': [],
}
from __future__ import absolute_import
from uuid import uuid4
import pymongo
from django.test import TestCase
from track.backends.mongodb import MongoBackend
class TestMongoBackend(TestCase):
def setUp(self):
# Use a random database name to prevent problems with tests running
# simultenousely against the same mongo instance
database = '_track_backends_mongodb_{0}'.format(uuid4().hex)
collection = '_test'
self.connection = pymongo.MongoClient()
self.database = self.connection[database]
self.collection = self.database[collection]
# During tests, wait until mongo acknowledged the write
write_concern = 1
self.backend = MongoBackend(
database=database,
collection=collection,
w=write_concern
)
def test_mongo_backend(self):
self.backend.send({'test': 1})
self.backend.send({'test': 2})
# Get all the objects in the db ignoring _id
results = list(self.collection.find({}, {'_id': False}))
self.assertEqual(len(results), 2)
self.assertEqual(results, [{'test': 1}, {'test': 2}])
def tearDown(self):
self.connection.drop_database(self.database)
from django.db import models from track.backends.django import TrackingLog
class TrackingLog(models.Model):
"""Defines the fields that are stored in the tracking log database"""
dtcreated = models.DateTimeField('creation date', auto_now_add=True)
username = models.CharField(max_length=32, blank=True)
ip = models.CharField(max_length=32, blank=True)
event_source = models.CharField(max_length=32)
event_type = models.CharField(max_length=512, blank=True)
event = models.TextField(blank=True)
agent = models.CharField(max_length=256, blank=True)
page = models.CharField(max_length=512, blank=True, null=True)
time = models.DateTimeField('event time')
host = models.CharField(max_length=64, blank=True)
def __unicode__(self):
fmt = (
u"[{self.time}] {self.username}@{self.ip}: "
u"{self.event_source}| {self.event_type} | "
u"{self.page} | {self.event}"
)
return fmt.format(self=self)
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
import track.tracker as tracker
from track.backends import BaseBackend
SIMPLE_SETTINGS = {
'default': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
'OPTIONS': {
'flag': True
}
}
}
MULTI_SETTINGS = {
'first': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
},
'second': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
}
}
class TestTrackerInstantiation(TestCase):
"""Test that a helper function can instantiate backends from their name."""
def setUp(self):
# pylint: disable=protected-access
self.get_backend = tracker._instantiate_backend_from_name
def test_instatiate_backend(self):
name = 'track.tests.test_tracker.DummyBackend'
options = {'flag': True}
backend = self.get_backend(name, options)
self.assertIsInstance(backend, DummyBackend)
self.assertTrue(backend.flag)
def test_instatiate_backends_with_invalid_values(self):
def get_invalid_backend(name, parameters):
return self.get_backend(name, parameters)
options = {}
name = 'track.backends.logger'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'track.backends.logger.Foo'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'this.package.does.not.exists'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'unittest.TestCase'
self.assertRaises(ValueError, get_invalid_backend, name, options)
class TestTrackerDjangoInstantiation(TestCase):
"""Test if backends are initialized properly from Django settings."""
@override_settings(TRACKING_BACKENDS=SIMPLE_SETTINGS)
def test_django_simple_settings(self):
"""Test configuration of a simple backend"""
backends = self._reload_backends()
self.assertEqual(len(backends), 1)
tracker.send({})
self.assertEqual(backends.values()[0].count, 1)
@override_settings(TRACKING_BACKENDS=MULTI_SETTINGS)
def test_django_multi_settings(self):
"""Test if multiple backends can be configured properly."""
backends = self._reload_backends().values()
self.assertEqual(len(backends), 2)
event_count = 10
for _ in xrange(event_count):
tracker.send({})
self.assertEqual(backends[0].count, event_count)
self.assertEqual(backends[1].count, event_count)
@override_settings(TRACKING_BACKENDS=MULTI_SETTINGS)
def test_django_remove_settings(self):
"""Test if a backend can be remove by setting it to None."""
settings.TRACKING_BACKENDS.update({'second': None})
backends = self._reload_backends()
self.assertEqual(len(backends), 1)
def _reload_backends(self):
# pylint: disable=protected-access
# Reset backends
tracker._initialize_backends_from_django_settings()
return tracker.backends
class DummyBackend(BaseBackend):
def __init__(self, **options):
super(DummyBackend, self).__init__(**options)
self.flag = options.get('flag', False)
self.count = 0
# pylint: disable=unused-argument
def send(self, event):
self.count += 1
"""
Module that tracks analytics events by sending them to different
configurable backends.
The backends can be configured using Django settings as the example
below::
TRACKING_BACKENDS = {
'tracker_name': {
'ENGINE': 'class.name.for.backend',
'OPTIONS': {
'host': ... ,
'port': ... ,
...
}
}
}
"""
import inspect
from importlib import import_module
from django.conf import settings
from track.backends import BaseBackend
__all__ = ['send']
backends = {}
def _initialize_backends_from_django_settings():
"""
Initialize the event tracking backends according to the
configuration in django settings
"""
backends.clear()
config = getattr(settings, 'TRACKING_BACKENDS', {})
for name, values in config.iteritems():
# Ignore empty values to turn-off default tracker backends
if values:
engine = values['ENGINE']
options = values.get('OPTIONS', {})
backends[name] = _instantiate_backend_from_name(engine, options)
def _instantiate_backend_from_name(name, options):
"""
Instantiate an event tracker backend from the full module path to
the backend class. Useful when setting backends from configuration
files.
"""
# Parse backend name
try:
parts = name.split('.')
module_name = '.'.join(parts[:-1])
class_name = parts[-1]
except IndexError:
raise ValueError('Invalid event track backend %s' % name)
# Get and verify the backend class
try:
module = import_module(module_name)
cls = getattr(module, class_name)
if not inspect.isclass(cls) or not issubclass(cls, BaseBackend):
raise TypeError
except (ValueError, AttributeError, TypeError, ImportError):
raise ValueError('Cannot find event track backend %s' % name)
backend = cls(**options)
return backend
def send(event):
"""
Send an event object to all the initialized backends.
"""
for backend in backends.itervalues():
backend.send(event)
_initialize_backends_from_django_settings()
import json import json
import logging import logging
import pytz
import datetime import datetime
import dateutil.parser import dateutil.parser
import pytz
from pytz import UTC
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.http import HttpResponse from django.http import HttpResponse
from django.shortcuts import redirect from django.shortcuts import redirect
from django.conf import settings from django.conf import settings
from mitxmako.shortcuts import render_to_response
from django_future.csrf import ensure_csrf_cookie from django_future.csrf import ensure_csrf_cookie
from track.models import TrackingLog
from pytz import UTC
log = logging.getLogger("tracking") from mitxmako.shortcuts import render_to_response
LOGFIELDS = ['username', 'ip', 'event_source', 'event_type', 'event', 'agent', 'page', 'time', 'host'] from track import tracker
from track.models import TrackingLog
def log_event(event): def log_event(event):
"""Write tracking event to log file, and optionally to TrackingLog model.""" """Capture a event by sending it to the register trackers"""
event_str = json.dumps(event) tracker.send(event)
log.info(event_str[:settings.TRACK_MAX_EVENT])
if settings.MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
event['time'] = dateutil.parser.parse(event['time'])
tldat = TrackingLog(**dict((x, event[x]) for x in LOGFIELDS))
try:
tldat.save()
except Exception as err:
log.exception(err)
def user_track(request): def user_track(request):
...@@ -69,6 +61,7 @@ def user_track(request): ...@@ -69,6 +61,7 @@ def user_track(request):
} }
log_event(event) log_event(event)
return HttpResponse('success') return HttpResponse('success')
...@@ -96,8 +89,9 @@ def server_track(request, event_type, event, page=None): ...@@ -96,8 +89,9 @@ def server_track(request, event_type, event, page=None):
"host": request.META['SERVER_NAME'], "host": request.META['SERVER_NAME'],
} }
if event_type.startswith("/event_logs") and request.user.is_staff: # don't log if event_type.startswith("/event_logs") and request.user.is_staff:
return return # don't log
log_event(event) log_event(event)
......
...@@ -254,3 +254,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT, ...@@ -254,3 +254,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT,
CELERY_BROKER_PASSWORD, CELERY_BROKER_PASSWORD,
CELERY_BROKER_HOSTNAME, CELERY_BROKER_HOSTNAME,
CELERY_BROKER_VHOST) CELERY_BROKER_VHOST)
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
...@@ -277,10 +277,6 @@ RSS_TIMEOUT = 600 ...@@ -277,10 +277,6 @@ RSS_TIMEOUT = 600
STATIC_GRAB = False STATIC_GRAB = False
DEV_CONTENT = True DEV_CONTENT = True
# FIXME: Should we be doing this truncation?
TRACK_MAX_EVENT = 10000
DEBUG_TRACK_LOG = False
MITX_ROOT_URL = '' MITX_ROOT_URL = ''
LOGIN_REDIRECT_URL = MITX_ROOT_URL + '/accounts/login' LOGIN_REDIRECT_URL = MITX_ROOT_URL + '/accounts/login'
...@@ -312,6 +308,32 @@ COURSE_SETTINGS = { ...@@ -312,6 +308,32 @@ COURSE_SETTINGS = {
# TODO (vshnayder): Will probably need to change as we get real access control in. # TODO (vshnayder): Will probably need to change as we get real access control in.
LMS_MIGRATION_ALLOWED_IPS = [] LMS_MIGRATION_ALLOWED_IPS = []
############################## EVENT TRACKING #################################
# FIXME: Should we be doing this truncation?
TRACK_MAX_EVENT = 10000
DEBUG_TRACK_LOG = False
TRACKING_BACKENDS = {
'logger': {
'ENGINE': 'track.backends.logger.LoggerBackend',
'OPTIONS': {
'name': 'tracking'
}
}
}
# Backawrds compatibility with ENABLE_SQL_TRACKING_LOGS feature flag.
# In the future, adding the backend to TRACKING_BACKENDS enough.
if MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
TRACKING_BACKENDS.update({
'sql': {
'ENGINE': 'track.backends.django.DjangoBackend'
}
})
######################## subdomain specific settings ########################### ######################## subdomain specific settings ###########################
COURSE_LISTINGS = {} COURSE_LISTINGS = {}
SUBDOMAIN_BRANDING = {} SUBDOMAIN_BRANDING = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment