shim.py 4.01 KB
Newer Older
1 2
"""Map new event context values to old top-level field values. Ensures events can be parsed by legacy parsers."""

3 4 5 6 7 8 9 10 11
import json
import logging

from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey


log = logging.getLogger(__name__)

12 13 14 15 16 17 18 19 20 21 22 23 24
CONTEXT_FIELDS_TO_INCLUDE = [
    'username',
    'session',
    'ip',
    'agent',
    'host'
]


class LegacyFieldMappingProcessor(object):
    """Ensures all required fields are included in emitted events"""

    def __call__(self, event):
25
        context = event.get('context', {})
26 27
        if 'context' in event:
            for field in CONTEXT_FIELDS_TO_INCLUDE:
28
                self.move_from_context(field, event)
29
            remove_shim_context(event)
30 31 32 33 34 35 36

        if 'data' in event:
            event['event'] = event['data']
            del event['data']
        else:
            event['event'] = {}

37 38 39 40
        if 'timestamp' in context:
            event['time'] = context['timestamp']
            del context['timestamp']
        elif 'timestamp' in event:
41
            event['time'] = event['timestamp']
42 43

        if 'timestamp' in event:
44 45
            del event['timestamp']

46 47 48 49 50 51 52 53 54 55 56 57
        self.move_from_context('event_type', event, event.get('name', ''))
        self.move_from_context('event_source', event, 'server')
        self.move_from_context('page', event, None)

    def move_from_context(self, field, event, default_value=''):
        """Move a field from the context to the top level of the event."""
        context = event.get('context', {})
        if field in context:
            event[field] = context[field]
            del context[field]
        else:
            event[field] = default_value
58 59 60 61 62


def remove_shim_context(event):
    if 'context' in event:
        context = event['context']
63 64 65 66 67
        # These fields are present elsewhere in the event at this point
        context_fields_to_remove = set(CONTEXT_FIELDS_TO_INCLUDE)
        # This field is only used for Segment.io web analytics and does not concern researchers
        context_fields_to_remove.add('client_id')
        for field in context_fields_to_remove:
68 69
            if field in context:
                del context[field]
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129


NAME_TO_EVENT_TYPE_MAP = {
    'edx.video.played': 'play_video',
    'edx.video.paused': 'pause_video',
    'edx.video.stopped': 'stop_video',
    'edx.video.loaded': 'load_video',
    'edx.video.transcript.shown': 'show_transcript',
    'edx.video.transcript.hidden': 'hide_transcript',
}


class VideoEventProcessor(object):
    """
    Converts new format video events into the legacy video event format.

    Mobile devices cannot actually emit events that exactly match their counterparts emitted by the LMS javascript
    video player. Instead of attempting to get them to do that, we instead insert a shim here that converts the events
    they *can* easily emit and converts them into the legacy format.

    TODO: Remove this shim and perform the conversion as part of some batch canonicalization process.

    """

    def __call__(self, event):
        name = event.get('name')
        if not name:
            return

        if name not in NAME_TO_EVENT_TYPE_MAP:
            return

        event['event_type'] = NAME_TO_EVENT_TYPE_MAP[name]

        if 'event' not in event:
            return

        payload = event['event']

        if 'module_id' in payload:
            module_id = payload['module_id']
            try:
                usage_key = UsageKey.from_string(module_id)
            except InvalidKeyError:
                log.warning('Unable to parse module_id "%s"', module_id, exc_info=True)
            else:
                payload['id'] = usage_key.html_id()

            del payload['module_id']

        if 'current_time' in payload:
            payload['currentTime'] = payload.pop('current_time')

        event['event'] = json.dumps(payload)

        if 'context' not in event:
            return

        context = event['context']

130 131
        if 'open_in_browser_url' in context:
            page, _sep, _tail = context.pop('open_in_browser_url').rpartition('/')
132
            event['page'] = page