Commit fbc243db by Carlos Andrés Rocha

Add event tracking with configurable backends

parent 2ddaf908
......@@ -5,8 +5,11 @@ These are notable changes in edx-platform. This is a rolling list of changes,
in roughly chronological order, most recent first. Add your entries at or near
the top. Include a label indicating the component affected.
Common: Added configurable backends for tracking events. Tracking events using
the python logging module is the default backend. Support for MongoDB and a
Django database is also available.
Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components
Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components
can be included to courses.
LMS: Added alphabetical sorting of forum categories and subcategories.
......@@ -287,4 +290,3 @@ Common: Updated CodeJail.
Common: Allow setting of authentication session cookie name.
LMS: Option to email students when enroll/un-enroll them.
......@@ -161,3 +161,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT,
CELERY_BROKER_PASSWORD,
CELERY_BROKER_HOSTNAME,
CELERY_BROKER_VHOST)
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
......@@ -218,9 +218,6 @@ USE_L10N = True
# Localization strings (e.g. django.po) are under this directory
LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # mitx/conf/locale/
# Tracking
TRACK_MAX_EVENT = 10000
# Messages
MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage'
......@@ -391,3 +388,16 @@ MKTG_URL_LINK_MAP = {
}
COURSES_WITH_UNSAFE_CODE = []
############################## EVENT TRACKING #################################
TRACK_MAX_EVENT = 10000
TRACKING_BACKENDS = {
'logger': {
'ENGINE': 'track.backends.logger.LoggerBackend',
'OPTIONS': {
'name': 'tracking'
}
}
}
"""
Event tracking backend module.
Contains the base class for event trackers, and implementation of some
backends.
"""
from __future__ import absolute_import
import abc
# pylint: disable=unused-argument
class BaseBackend(object):
"""
Abstract Base Class for event tracking backends.
"""
__metaclass__ = abc.ABCMeta
def __init__(self, **kwargs):
pass
@abc.abstractmethod
def send(self, event):
"""Send event to tracker."""
pass
"""
Event tracker backend that saves events to a Django database.
"""
# TODO: this module is very specific to the event schema, and is only
# brought here for legacy support. It should be updated when the
# schema changes or eventually deprecated.
from __future__ import absolute_import
import logging
import dateutil
from django.db import models
from track.backends import BaseBackend
log = logging.getLogger('track.backends.django')
LOGFIELDS = [
'username',
'ip',
'event_source',
'event_type',
'event',
'agent',
'page',
'time',
'host'
]
class TrackingLog(models.Model):
"""Defines the fields that are stored in the tracking log database."""
dtcreated = models.DateTimeField('creation date', auto_now_add=True)
username = models.CharField(max_length=32, blank=True)
ip = models.CharField(max_length=32, blank=True)
event_source = models.CharField(max_length=32)
event_type = models.CharField(max_length=512, blank=True)
event = models.TextField(blank=True)
agent = models.CharField(max_length=256, blank=True)
page = models.CharField(max_length=512, blank=True, null=True)
time = models.DateTimeField('event time')
host = models.CharField(max_length=64, blank=True)
class Meta:
app_label = 'track'
db_table = 'track_trackinglog'
def __unicode__(self):
fmt = (
u"[{self.time}] {self.username}@{self.ip}: "
u"{self.event_source}| {self.event_type} | "
u"{self.page} | {self.event}"
)
return fmt.format(self=self)
class DjangoBackend(BaseBackend):
"""Event tracker backend that saves to a Django database"""
def __init__(self, name='default', **options):
"""
Configure database used by the backend.
:Parameters:
- `name` is the name of the database as specified in the project
settings.
"""
super(DjangoBackend, self).__init__(**options)
self.name = name
def send(self, event):
event['time'] = dateutil.parser.parse(event['time'])
field_values = {x: event.get(x, '') for x in LOGFIELDS}
tldat = TrackingLog(**field_values)
try:
tldat.save(using=self.name)
except Exception as e: # pylint: disable=broad-except
log.exception(e)
"""Event tracker backend that saves events to a python logger."""
from __future__ import absolute_import
import logging
import json
from django.conf import settings
from track.backends import BaseBackend
log = logging.getLogger('track.backends.logger')
class LoggerBackend(BaseBackend):
"""Event tracker backend that uses a python logger.
Events are logged to the INFO level as JSON strings.
"""
def __init__(self, name, **kwargs):
"""Event tracker backend that uses a python logger.
:Parameters:
- `name`: identifier of the logger, which should have
been configured using the default python mechanisms.
"""
super(LoggerBackend, self).__init__(**kwargs)
self.event_logger = logging.getLogger(name)
def send(self, event):
event_str = json.dumps(event)
# TODO: remove trucation of the serialized event, either at a
# higher level during the emittion of the event, or by
# providing warnings when the events exceed certain size.
event_str = event_str[:settings.TRACK_MAX_EVENT]
self.event_logger.info(event_str)
"""MongoDB event tracker backend."""
from __future__ import absolute_import
import logging
import pymongo
from pymongo import MongoClient
from pymongo.errors import PyMongoError
from track.backends import BaseBackend
log = logging.getLogger('track.backends.mongodb')
class MongoBackend(BaseBackend):
"""Class for a MongoDB event tracker Backend"""
def __init__(self, **kwargs):
"""
Connect to a MongoDB.
:Parameters:
- `host`: hostname
- `port`: port
- `user`: collection username
- `password`: collection user password
- `database`: name of the database
- `collection`: name of the collection
- `extra`: parameters to pymongo.MongoClient not listed above
"""
super(MongoBackend, self).__init__(**kwargs)
# Extract connection parameters from kwargs
host = kwargs.get('host', 'localhost')
port = kwargs.get('port', 27017)
user = kwargs.get('user', '')
password = kwargs.get('password', '')
db_name = kwargs.get('database', 'track')
collection_name = kwargs.get('collection', 'events')
# Other mongo connection arguments
extra = kwargs.get('extra', {})
# By default disable write acknowledgments, reducing the time
# blocking during an insert
extra['w'] = extra.get('w', 0)
# Make timezone aware by default
extra['tz_aware'] = extra.get('tz_aware', True)
# Connect to database and get collection
self.connection = MongoClient(
host=host,
port=port,
**extra
)
self.collection = self.connection[db_name][collection_name]
if user or password:
self.collection.database.authenticate(user, password)
self._create_indexes()
def _create_indexes(self):
# WARNING: The collection will be locked during the index
# creation. If the collection has a large number of
# documents in it, the operation can take a long time.
# TODO: The creation of indexes can be moved to a Django
# management command or equivalent. There is also an option to
# run the indexing on the background, without locking.
self.collection.ensure_index([('time', pymongo.DESCENDING)])
self.collection.ensure_index('event_type')
def send(self, event):
try:
self.collection.insert(event, manipulate=False)
except PyMongoError:
msg = 'Error inserting to MongoDB event tracker backend'
log.exception(msg)
from __future__ import absolute_import
from django.test import TestCase
from track.backends.django import DjangoBackend, TrackingLog
class TestDjangoBackend(TestCase):
def setUp(self):
self.backend = DjangoBackend()
def test_django_backend(self):
event = {
'username': 'test',
'time': '2013-01-01T12:01:00-05:00'
}
self.backend.send(event)
results = list(TrackingLog.objects.all())
self.assertEqual(len(results), 1)
self.assertEqual(results[0].username, 'test')
# Check if time is stored in UTC
self.assertEqual(str(results[0].time), '2013-01-01 17:01:00+00:00')
from __future__ import absolute_import
import json
import logging
from django.test import TestCase
from track.backends.logger import LoggerBackend
class TestLoggerBackend(TestCase):
def setUp(self):
self.handler = MockLoggingHandler()
self.handler.setLevel(logging.INFO)
logger_name = 'track.backends.logger.test'
logger = logging.getLogger(logger_name)
logger.addHandler(self.handler)
self.backend = LoggerBackend(name=logger_name)
def test_logger_backend(self):
self.handler.reset()
# Send a couple of events and check if they were recorded
# by the logger. The events are serialized to JSON.
event = {'test': True}
event_as_json = json.dumps(event)
self.backend.send(event)
self.backend.send(event)
self.assertEqual(
self.handler.messages['info'],
[event_as_json, event_as_json]
)
class MockLoggingHandler(logging.Handler):
"""
Mock logging handler.
Stores records in a dictionry of lists by level.
"""
def __init__(self, *args, **kwargs):
super(MockLoggingHandler, self).__init__(*args, **kwargs)
self.messages = None
self.reset()
def emit(self, record):
level = record.levelname.lower()
message = record.getMessage()
self.messages[level].append(message)
def reset(self):
self.messages = {
'debug': [],
'info': [],
'warning': [],
'error': [],
'critical': [],
}
from __future__ import absolute_import
from uuid import uuid4
import pymongo
from django.test import TestCase
from track.backends.mongodb import MongoBackend
class TestMongoBackend(TestCase):
def setUp(self):
# Use a random database name to prevent problems with tests running
# simultenousely against the same mongo instance
database = '_track_backends_mongodb_{0}'.format(uuid4().hex)
collection = '_test'
self.connection = pymongo.MongoClient()
self.database = self.connection[database]
self.collection = self.database[collection]
# During tests, wait until mongo acknowledged the write
write_concern = 1
self.backend = MongoBackend(
database=database,
collection=collection,
w=write_concern
)
def test_mongo_backend(self):
self.backend.send({'test': 1})
self.backend.send({'test': 2})
# Get all the objects in the db ignoring _id
results = list(self.collection.find({}, {'_id': False}))
self.assertEqual(len(results), 2)
self.assertEqual(results, [{'test': 1}, {'test': 2}])
def tearDown(self):
self.connection.drop_database(self.database)
from django.db import models
class TrackingLog(models.Model):
"""Defines the fields that are stored in the tracking log database"""
dtcreated = models.DateTimeField('creation date', auto_now_add=True)
username = models.CharField(max_length=32, blank=True)
ip = models.CharField(max_length=32, blank=True)
event_source = models.CharField(max_length=32)
event_type = models.CharField(max_length=512, blank=True)
event = models.TextField(blank=True)
agent = models.CharField(max_length=256, blank=True)
page = models.CharField(max_length=512, blank=True, null=True)
time = models.DateTimeField('event time')
host = models.CharField(max_length=64, blank=True)
def __unicode__(self):
fmt = (
u"[{self.time}] {self.username}@{self.ip}: "
u"{self.event_source}| {self.event_type} | "
u"{self.page} | {self.event}"
)
return fmt.format(self=self)
from track.backends.django import TrackingLog
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
import track.tracker as tracker
from track.backends import BaseBackend
SIMPLE_SETTINGS = {
'default': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
'OPTIONS': {
'flag': True
}
}
}
MULTI_SETTINGS = {
'first': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
},
'second': {
'ENGINE': 'track.tests.test_tracker.DummyBackend',
}
}
class TestTrackerInstantiation(TestCase):
"""Test that a helper function can instantiate backends from their name."""
def setUp(self):
# pylint: disable=protected-access
self.get_backend = tracker._instantiate_backend_from_name
def test_instatiate_backend(self):
name = 'track.tests.test_tracker.DummyBackend'
options = {'flag': True}
backend = self.get_backend(name, options)
self.assertIsInstance(backend, DummyBackend)
self.assertTrue(backend.flag)
def test_instatiate_backends_with_invalid_values(self):
def get_invalid_backend(name, parameters):
return self.get_backend(name, parameters)
options = {}
name = 'track.backends.logger'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'track.backends.logger.Foo'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'this.package.does.not.exists'
self.assertRaises(ValueError, get_invalid_backend, name, options)
name = 'unittest.TestCase'
self.assertRaises(ValueError, get_invalid_backend, name, options)
class TestTrackerDjangoInstantiation(TestCase):
"""Test if backends are initialized properly from Django settings."""
@override_settings(TRACKING_BACKENDS=SIMPLE_SETTINGS)
def test_django_simple_settings(self):
"""Test configuration of a simple backend"""
backends = self._reload_backends()
self.assertEqual(len(backends), 1)
tracker.send({})
self.assertEqual(backends.values()[0].count, 1)
@override_settings(TRACKING_BACKENDS=MULTI_SETTINGS)
def test_django_multi_settings(self):
"""Test if multiple backends can be configured properly."""
backends = self._reload_backends().values()
self.assertEqual(len(backends), 2)
event_count = 10
for _ in xrange(event_count):
tracker.send({})
self.assertEqual(backends[0].count, event_count)
self.assertEqual(backends[1].count, event_count)
@override_settings(TRACKING_BACKENDS=MULTI_SETTINGS)
def test_django_remove_settings(self):
"""Test if a backend can be remove by setting it to None."""
settings.TRACKING_BACKENDS.update({'second': None})
backends = self._reload_backends()
self.assertEqual(len(backends), 1)
def _reload_backends(self):
# pylint: disable=protected-access
# Reset backends
tracker._initialize_backends_from_django_settings()
return tracker.backends
class DummyBackend(BaseBackend):
def __init__(self, **options):
super(DummyBackend, self).__init__(**options)
self.flag = options.get('flag', False)
self.count = 0
# pylint: disable=unused-argument
def send(self, event):
self.count += 1
"""
Module that tracks analytics events by sending them to different
configurable backends.
The backends can be configured using Django settings as the example
below::
TRACKING_BACKENDS = {
'tracker_name': {
'ENGINE': 'class.name.for.backend',
'OPTIONS': {
'host': ... ,
'port': ... ,
...
}
}
}
"""
import inspect
from importlib import import_module
from django.conf import settings
from track.backends import BaseBackend
__all__ = ['send']
backends = {}
def _initialize_backends_from_django_settings():
"""
Initialize the event tracking backends according to the
configuration in django settings
"""
backends.clear()
config = getattr(settings, 'TRACKING_BACKENDS', {})
for name, values in config.iteritems():
# Ignore empty values to turn-off default tracker backends
if values:
engine = values['ENGINE']
options = values.get('OPTIONS', {})
backends[name] = _instantiate_backend_from_name(engine, options)
def _instantiate_backend_from_name(name, options):
"""
Instantiate an event tracker backend from the full module path to
the backend class. Useful when setting backends from configuration
files.
"""
# Parse backend name
try:
parts = name.split('.')
module_name = '.'.join(parts[:-1])
class_name = parts[-1]
except IndexError:
raise ValueError('Invalid event track backend %s' % name)
# Get and verify the backend class
try:
module = import_module(module_name)
cls = getattr(module, class_name)
if not inspect.isclass(cls) or not issubclass(cls, BaseBackend):
raise TypeError
except (ValueError, AttributeError, TypeError, ImportError):
raise ValueError('Cannot find event track backend %s' % name)
backend = cls(**options)
return backend
def send(event):
"""
Send an event object to all the initialized backends.
"""
for backend in backends.itervalues():
backend.send(event)
_initialize_backends_from_django_settings()
import json
import logging
import pytz
import datetime
import dateutil.parser
import pytz
from pytz import UTC
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse
from django.shortcuts import redirect
from django.conf import settings
from mitxmako.shortcuts import render_to_response
from django_future.csrf import ensure_csrf_cookie
from track.models import TrackingLog
from pytz import UTC
log = logging.getLogger("tracking")
from mitxmako.shortcuts import render_to_response
LOGFIELDS = ['username', 'ip', 'event_source', 'event_type', 'event', 'agent', 'page', 'time', 'host']
from track import tracker
from track.models import TrackingLog
def log_event(event):
"""Write tracking event to log file, and optionally to TrackingLog model."""
event_str = json.dumps(event)
log.info(event_str[:settings.TRACK_MAX_EVENT])
if settings.MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
event['time'] = dateutil.parser.parse(event['time'])
tldat = TrackingLog(**dict((x, event[x]) for x in LOGFIELDS))
try:
tldat.save()
except Exception as err:
log.exception(err)
"""Capture a event by sending it to the register trackers"""
tracker.send(event)
def user_track(request):
......@@ -69,6 +61,7 @@ def user_track(request):
}
log_event(event)
return HttpResponse('success')
......@@ -96,8 +89,9 @@ def server_track(request, event_type, event, page=None):
"host": request.META['SERVER_NAME'],
}
if event_type.startswith("/event_logs") and request.user.is_staff: # don't log
return
if event_type.startswith("/event_logs") and request.user.is_staff:
return # don't log
log_event(event)
......
......@@ -254,3 +254,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT,
CELERY_BROKER_PASSWORD,
CELERY_BROKER_HOSTNAME,
CELERY_BROKER_VHOST)
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
......@@ -277,10 +277,6 @@ RSS_TIMEOUT = 600
STATIC_GRAB = False
DEV_CONTENT = True
# FIXME: Should we be doing this truncation?
TRACK_MAX_EVENT = 10000
DEBUG_TRACK_LOG = False
MITX_ROOT_URL = ''
LOGIN_REDIRECT_URL = MITX_ROOT_URL + '/accounts/login'
......@@ -312,6 +308,32 @@ COURSE_SETTINGS = {
# TODO (vshnayder): Will probably need to change as we get real access control in.
LMS_MIGRATION_ALLOWED_IPS = []
############################## EVENT TRACKING #################################
# FIXME: Should we be doing this truncation?
TRACK_MAX_EVENT = 10000
DEBUG_TRACK_LOG = False
TRACKING_BACKENDS = {
'logger': {
'ENGINE': 'track.backends.logger.LoggerBackend',
'OPTIONS': {
'name': 'tracking'
}
}
}
# Backawrds compatibility with ENABLE_SQL_TRACKING_LOGS feature flag.
# In the future, adding the backend to TRACKING_BACKENDS enough.
if MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'):
TRACKING_BACKENDS.update({
'sql': {
'ENGINE': 'track.backends.django.DjangoBackend'
}
})
######################## subdomain specific settings ###########################
COURSE_LISTINGS = {}
SUBDOMAIN_BRANDING = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment