Commit f6c8db32 by Brian Wilson

Refactor course enrollment calculations.

Omit days with no change in user status.  Add tests for reduce.  Rename time-related functions.
parent c1164ce5
...@@ -12,7 +12,7 @@ Example command lines: ...@@ -12,7 +12,7 @@ Example command lines:
(local using s3) (local using s3)
python course_enroll.py --local-scheduler CourseEnrollmentTotalsPerDay python course_enroll.py --local-scheduler CourseEnrollmentTotalsPerDay
--name mytest --src s3://edx-analytics-test-data/data --include 'tracking*' --name mytest --src s3://edx-analytics-test-data/data --include 'tracking*'
--dest s3://edx-analytics-scratch/output --dest s3://edx-analytics-scratch/output
""" """
...@@ -26,172 +26,93 @@ import edx.analytics.util.eventlog as eventlog ...@@ -26,172 +26,93 @@ import edx.analytics.util.eventlog as eventlog
from edx.analytics.tasks.pathutil import get_target_for_url, PathSetTask from edx.analytics.tasks.pathutil import get_target_for_url, PathSetTask
################################ def get_explicit_enrollment_output(line):
# Task Map-Reduce definitions """
################################ Generates output values for explicit enrollment events.
Output format: (course_id, user_id), (timestamp, action_value)
class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask): where action_value = 1 (enrolled) or -1 (unenrolled)
"""Calculates daily change in enrollment for a user in a course, given raw event log input.""" and timestamp is in ISO format, with resolution to the second.
def get_implicit_enrollment_output(self, item): Returns None if there is no valid enrollment event on the line.
""" """
Generates output values for implicit enrollment events. # Before parsing, check that the line contains something that
# suggests it's an enrollment event.
if 'edx.course.enrollment' not in line:
return None
Output format: (course_id, username), (datetime, action_value) # try to parse the line into a dict:
item = eventlog.parse_eventlog_item(line)
if item is None:
# The line didn't parse. For this specific purpose,
# we can assume that all enrollment-related lines would parse,
# and these non-parsing lines would get skipped anyway.
return None
where action_value = 1 (enrolled) or -1 (unenrolled) # get event type, and check that it exists:
event_type = item.get('event_type')
if event_type is None:
eventlog.log_item("encountered event with no event_type", item)
return None
Returns None if the enrollment event on the line is not valid. # convert the type to a value:
""" if event_type == 'edx.course.enrollment.activated':
action_value = 1
elif event_type == 'edx.course.enrollment.deactivated':
action_value = -1
else:
# not an enrollment event...
return None
event_data = eventlog.get_event_data(item) # get the timestamp:
if event_data is None: datetime = eventlog.get_datetime(item)
# Assume it's already logged (and with more specifics). if datetime is None:
return None eventlog.log_item("encountered event with bad datetime", item)
return None
# The args are part of the POST request. timestamp = eventlog.get_timestamp(datetime)
# The course_id is stored in a list, so just take the first value:
post_args = event_data['POST']
if 'course_id' not in post_args: # Enrollment parameters like course_id and user_id may be stored
eventlog.log_item("encountered event with no course_id in post args", item) # in the context and also in the data. Pick the data. For
return None # course_id, we expect the values to be the same. However, for
# user_id, the values may not be the same. This is because the
course_id = post_args['course_id'][0] # context contains the name and id of the user making a particular
if len(course_id) == 0: # request, but it is not necessarily the id of the user being
eventlog.log_item("encountered event with zero-length course_id in post args", item) # enrolled. For example, Studio provides authors with the ability
return None # to add staff to their courses, and these staff get enrolled in
# the course while the author is listed in the context.
# This is a hack, due to a bug in luigi/hadoop.py:
# In JobTask.writer(), it calls "\t".join(map(str, flatten(output))) # Get the event data:
# which returns a UnicodeEncodeError when output contains non-ascii characters. event_data = eventlog.get_event_data(item)
# For now, just log and skip such course_ids. Create a separate story in future if event_data is None:
# to make sure that Luigi handles non-ascii characters in general. # Assume it's already logged (and with more specifics).
try: return None
str(course_id)
except:
eventlog.log_item("encountered event with non-ascii course_id in post args", item)
return None
# The value of action is expected to be 'enroll' or 'unenroll', but is
# stored in a list. We just take the first value (but log if there are more).
if 'enrollment_action' not in post_args:
eventlog.log_item("encountered event with no enrollment_action in post args", item)
return None
actions = post_args['enrollment_action']
if len(actions) != 1:
eventlog.log_item("encountered event with multiple enrollment_actions in post args", item, "WARNING")
action = actions[0]
if action == 'enroll':
action_value = 1
elif action == 'unenroll':
action_value = -1
else:
eventlog.log_item("encountered event with unrecognized value for enrollment_action in post args", item, "WARNING")
return None
# get additional data: timestamp and username:
timestamp = eventlog.get_timestamp(item)
if timestamp is None:
# bad format?
eventlog.log_item("encountered event with bad timestamp", item)
return None
if 'username' not in item:
# bad format?
eventlog.log_item("encountered implicit enrollment event with no username", item, "WARNING")
return None
username = item['username']
return (course_id, username), (eventlog.get_datetime_string(timestamp), action_value)
def get_explicit_enrollment_output(self, item, event_type):
"""
Generates output values for explicit enrollment events.
Output format: (course_id, username), (datetime, action_value) # Get the course_id from the data, and validate.
course_id = event_data['course_id']
if not eventlog.is_valid_course_id(course_id):
eventlog.log_item("encountered explicit enrollment event with bogus course_id", item)
return None
where action_value = 1 (enrolled) or -1 (unenrolled) # Get the user_id from the data:
if 'user_id' not in event_data:
eventlog.log_item("encountered explicit enrollment event with no user_id", item)
return None
user_id = event_data['user_id']
Returns None if the enrollment event on the line is not valid. # For now, ignore the enrollment 'mode' (e.g. 'honor').
"""
# convert the type to a value:
if event_type == 'edx.course.enrollment.activated':
action_value = 1
elif event_type == 'edx.course.enrollment.deactivated':
action_value = -1
# Data is stored in the context, but it's also in the data.
# Pick one.
event_data = eventlog.get_event_data(item)
if event_data is None:
# Assume it's already logged (and with more specifics).
return None
course_id = event_data['course_id']
# for now, ignore the enrollment 'mode' (e.g. 'honor')
# get additional data:
timestamp = eventlog.get_timestamp(item)
if timestamp is None:
# bad format?
eventlog.log_item("encountered event with bad timestamp", item)
return None
# there is also a user_id in the event_data, but who knows if
# it's the same as the username? But for old events, we don't have
# such a user_id, and I don't think we're planning on loading such a mapping.
if 'username' not in item:
# bad format?
eventlog.log_item("encountered explicit enrollment event with no username", item)
return None
username = item['username']
return (course_id, username), (eventlog.get_datetime_string(timestamp), action_value)
def get_enrollment_event(self, line):
"""
Generates output values for explicit enrollment events.
Output format: (course_id, username), (datetime, action_value) return (course_id, user_id), (timestamp, action_value)
where action_value = 1 (enrolled) or -1 (unenrolled)
Returns None if there is no enrollment event on the line. ################################
""" # Task Map-Reduce definitions
# Before parsing, check that the line contains something that ################################
# suggests it's an enrollment event.
if 'edx.course.enrollment' not in line and '/change_enrollment' not in line:
return None class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask):
"""Calculates daily change in enrollment for a user in a course, given raw event log input."""
# try to parse the line into a dict:
item = eventlog.parse_eventlog_item(line)
if item is None:
# The line didn't parse. For this specific purpose,
# we can assume that all enrollment-related lines would parse,
# and these non-parsing lines would get skipped anyway.
return None
# get event type, and check that it exists:
event_type = item.get('event_type')
if event_type is None:
eventlog.log_item("encountered event with no event_type", item)
return None
# check if it is an 'explicit' enrollment event:
if (event_type == 'edx.course.enrollment.activated' or
event_type == 'edx.course.enrollment.deactivated'):
return self.get_explicit_enrollment_output(item, event_type)
# check if it is an 'implicit' enrollment event:
if event_type == '/change_enrollment':
return self.get_implicit_enrollment_output(item)
# Not an enrollment event...
return None
def mapper(self, line): def mapper(self, line):
""" """
...@@ -205,7 +126,7 @@ class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask): ...@@ -205,7 +126,7 @@ class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask):
edX/DemoX/Demo_Course dummyuser 2013-09-10 -1 edX/DemoX/Demo_Course dummyuser 2013-09-10 -1
""" """
parsed_tuple = self.get_enrollment_event(line) parsed_tuple = get_explicit_enrollment_output(line)
if parsed_tuple is not None: if parsed_tuple is not None:
# sys.stderr.write("Found tuple in mapper: " + str(parsed_tuple) + '\n') # sys.stderr.write("Found tuple in mapper: " + str(parsed_tuple) + '\n')
yield parsed_tuple yield parsed_tuple
...@@ -241,30 +162,45 @@ class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask): ...@@ -241,30 +162,45 @@ class BaseCourseEnrollmentEventsPerDay(luigi.hadoop.JobTask):
""" """
# sys.stderr.write("Found key in reducer: " + str(key) + '\n') # sys.stderr.write("Found key in reducer: " + str(key) + '\n')
course_id, username = key course_id, username = key
# Sort input values (by timestamp) to easily detect the end of a day.
sorted_values = sorted(values) sorted_values = sorted(values)
prev_date = None
prev_change = 0
net_change = 0
for (datetime, change_value) in sorted_values: # Convert timestamps to dates, so we can group them by day.
# get the day's date from the event timestamp: fn = eventlog.get_datestamp_from_timestamp
this_date = eventlog.get_date_from_datetime(datetime) values = [(fn(timestamp), value) for timestamp, value in sorted_values]
# if the date is different, then output the previous date:
if this_date != prev_date and prev_date is not None:
# sys.stderr.write("outputting date and value: " + str(prev_date) + " " + str(net_change) + '\n')
yield (course_id, prev_date), net_change
net_change = 0
# sys.stderr.write("accumulating date and value: " + str(this_date) + " " + str(change_value) + '\n') # Add a stop item to ensure we process the last entry.
# accumulate the new numbers: values = values + [(None, None)]
prev_date = this_date
if change_value != prev_change:
net_change += change_value
prev_change = change_value
if prev_date is not None: # The enrollment state for each student: {1 : enrolled, -1: unenrolled}
yield (course_id, prev_date), net_change # Assume students start in an unknown state, so that whatever happens on
# the first day will get output.
state, prev_state = 0, 0
prev_date = None
import sys
for (this_date, action) in values:
# Before we process a new date, report the state if it has
# changed from the previously reported, if any.
if this_date != prev_date and prev_date is not None:
if state != prev_state:
# sys.stderr.write("outputting date and value: " + str(prev_date) + " " + str(state) + '\n')
prev_state = state
yield (course_id, prev_date), state
# sys.stderr.write("accumulating date and value: " + str(this_date) + " " + str(action) + '\n')
# Consecutive changes of the same kind don't affect the state.
if action != state:
state = action
# If this is the first entry, then we need to infer what
# the previous state was before the first entry arrives.
# For this, we take the opposite of the first entry.
if prev_date is None:
prev_state = -1 if action == 1 else 1
prev_date = this_date
class BaseCourseEnrollmentChangesPerDay(luigi.hadoop.JobTask): class BaseCourseEnrollmentChangesPerDay(luigi.hadoop.JobTask):
"""Calculates daily changes in enrollment, given per-user net changes by date.""" """Calculates daily changes in enrollment, given per-user net changes by date."""
...@@ -300,10 +236,10 @@ class BaseCourseEnrollmentChangesPerDay(luigi.hadoop.JobTask): ...@@ -300,10 +236,10 @@ class BaseCourseEnrollmentChangesPerDay(luigi.hadoop.JobTask):
Output value: sum(changes) Output value: sum(changes)
""" """
# sys.stderr.write("Found key in second reducer: " + str(key) + '\n') # sys.stderr.write("Found key in second reducer: " + str(key) + '\n')
sum_value = 0 count = 0
for value in values: for value in values:
sum_value += int(value) count += int(value)
yield key, sum_value yield key, count
class BaseCourseEnrollmentTotalsPerDay(luigi.hadoop.JobTask): class BaseCourseEnrollmentTotalsPerDay(luigi.hadoop.JobTask):
...@@ -453,9 +389,10 @@ class FirstCourseEnrollmentEventsPerDay(BaseCourseEnrollmentEventsPerDay): ...@@ -453,9 +389,10 @@ class FirstCourseEnrollmentEventsPerDay(BaseCourseEnrollmentEventsPerDay):
# sys.stderr.write("Found key in reducer: " + str(key) + '\n') # sys.stderr.write("Found key in reducer: " + str(key) + '\n')
course_id, username = key course_id, username = key
sorted_values = sorted(values) sorted_values = sorted(values)
for (datetime, change_value) in sorted_values: for (timestamp, change_value) in sorted_values:
# get the day's date from the event timestamp: # get the day's date from the event's timestamp:
this_date = eventlog.get_date_from_datetime(datetime) this_date = eventlog.get_datestamp_from_timestamp(timestamp)
# if it's an enrollment, output it and we're done.
if change_value > 0: if change_value > 0:
yield (course_id, this_date), change_value yield (course_id, this_date), change_value
return return
...@@ -483,40 +420,12 @@ class FirstCourseEnrollmentChangesPerDay(BaseCourseEnrollmentChangesPerDay): ...@@ -483,40 +420,12 @@ class FirstCourseEnrollmentChangesPerDay(BaseCourseEnrollmentChangesPerDay):
import edx.analytics.util import edx.analytics.util
return [boto, edx.analytics.util, cjson] return [boto, edx.analytics.util, cjson]
class FirstCourseEnrollmentTotalsPerDay(BaseCourseEnrollmentTotalsPerDay):
name = luigi.Parameter()
src = luigi.Parameter()
dest = luigi.Parameter()
include = luigi.Parameter(is_list=True, default=('*',))
run_locally = luigi.BooleanParameter()
def requires(self):
return FirstCourseEnrollmentChangesPerDay(self.name, self.src, self.dest, self.include, self.run_locally)
def output(self):
# generate a single output file
output_name = 'first_course_enrollment_totals_per_day_{name}'.format(name=self.name)
return get_target_for_url(self.dest, output_name, self.run_locally)
def extra_modules(self):
import boto
import cjson
import edx.analytics.util
return [boto, edx.analytics.util, cjson]
################################ ################################
# Running tasks # Running tasks
################################ ################################
def main(): def main():
import argparse
import boto
import cjson
luigi.hadoop.attach(boto, argparse, cjson)
luigi.run() luigi.run()
......
...@@ -72,7 +72,7 @@ class PathSetTask(luigi.Task): ...@@ -72,7 +72,7 @@ class PathSetTask(luigi.Task):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(PathSetTask, self).__init__(*args, **kwargs) super(PathSetTask, self).__init__(*args, **kwargs)
self.s3 = boto.connect_s3() self.s3 = None
def requires(self): def requires(self):
if self.src.startswith('s3'): if self.src.startswith('s3'):
...@@ -103,6 +103,10 @@ class PathSetTask(luigi.Task): ...@@ -103,6 +103,10 @@ class PathSetTask(luigi.Task):
def _generate_sources(self): def _generate_sources(self):
bucket_name, root = get_s3_bucket_key_names(self.src) bucket_name, root = get_s3_bucket_key_names(self.src)
# connect lazily, only if necessary:
if self.s3 is None:
self.s3 = boto.connect_s3()
bucket = self.s3.get_bucket(bucket_name) bucket = self.s3.get_bucket(bucket_name)
keys = (s.key for s in bucket.list(root) if s.size > 0) keys = (s.key for s in bucket.list(root) if s.size > 0)
......
"""
Tests for tasks that collect enrollment events.
"""
import unittest
from edx.analytics.tasks.course_enroll import (
BaseCourseEnrollmentEventsPerDay,
BaseCourseEnrollmentChangesPerDay,
BaseCourseEnrollmentTotalsPerDay,
)
from datetime import datetime
class CourseEnrollEventReduceTest(unittest.TestCase):
"""
Tests to verify that event log parsing works correctly.
"""
def setUp(self):
self.task = BaseCourseEnrollmentEventsPerDay()
self.key = ('course', 'user')
def _get_reducer_output(self, values):
"""Run reducer with provided values hardcoded key."""
return list(self.task.reducer(self.key, values))
def test_no_events(self):
self.assertEquals(self._get_reducer_output([]), [])
def test_single_enrollment(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', 1),
]),
[
(('course', '2013-01-01'), 1),
])
def test_single_unenrollment(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', -1),
]),
[
(('course', '2013-01-01'), -1),
])
def test_multiple_events_on_same_day(self):
# run first with no output expected:
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', 1),
('2013-01-01T00:00:02', -1),
('2013-01-01T00:00:03', 1),
('2013-01-01T00:00:04', -1),
]),
[
])
# then run with output expected:
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', 1),
('2013-01-01T00:00:02', -1),
('2013-01-01T00:00:03', -1),
('2013-01-01T00:00:04', 1),
]),
[
(('course', '2013-01-01'), 1),
])
def test_multiple_events_out_of_order(self):
# Make sure that events are sorted by the reducer.
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:04', -1),
('2013-01-01T00:00:03', 1),
('2013-01-01T00:00:01', 1),
('2013-01-01T00:00:02', -1),
]),
[
])
def test_multiple_enroll_events_on_same_day(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', 1),
('2013-01-01T00:00:02', 1),
('2013-01-01T00:00:03', 1),
('2013-01-01T00:00:04', 1),
]),
[
(('course', '2013-01-01'), 1),
])
def test_multiple_unenroll_events_on_same_day(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', -1),
('2013-01-01T00:00:02', -1),
('2013-01-01T00:00:03', -1),
('2013-01-01T00:00:04', -1),
]),
[
(('course', '2013-01-01'), -1),
])
def test_multiple_enroll_events_on_many_days(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T00:00:01', 1),
('2013-01-01T00:00:02', 1),
('2013-01-02T00:00:03', 1),
('2013-01-02T00:00:04', 1),
('2013-01-04T00:00:05', 1),
]),
[
(('course', '2013-01-01'), 1),
])
def test_multiple_events_on_many_days(self):
# Run with an arbitrary list of events.
self.assertEquals(self._get_reducer_output(
[
('2013-01-01T1', 1),
('2013-01-01T2', -1),
('2013-01-01T3', 1),
('2013-01-01T4', -1),
('2013-01-02', 1),
('2013-01-03', 1),
('2013-01-04T1', 1),
('2013-01-04T2', -1),
('2013-01-05', -1),
('2013-01-06', -1),
('2013-01-07', 1),
('2013-01-08T1', 1),
('2013-01-08T2', 1),
('2013-01-09T1', -1),
('2013-01-09T2', -1),
]),
[
(('course', '2013-01-02'), 1),
(('course', '2013-01-04'), -1),
(('course', '2013-01-07'), 1),
(('course', '2013-01-09'), -1),
])
class CourseEnrollChangesReduceTest(unittest.TestCase):
"""
Verify that BaseCourseEnrollmentChangesPerDay.reduce() works correctly.
"""
def setUp(self):
self.task = BaseCourseEnrollmentChangesPerDay()
self.key = ('course', '2013-01-01')
def _get_reducer_output(self, values):
"""Run reducer with provided values hardcoded key."""
return list(self.task.reducer(self.key, values))
def test_no_user_counts(self):
self.assertEquals(self._get_reducer_output([]), [(self.key, 0)])
def test_single_user_count(self):
self.assertEquals(self._get_reducer_output([1]), [(self.key, 1)])
def test_multiple_user_count(self):
inputs = [1, 1, 1, -1, 1]
self.assertEquals(self._get_reducer_output(inputs), [(self.key, 3)])
class CourseEnrollTotalsReduceTest(unittest.TestCase):
"""
Verify that BaseCourseEnrollmentTotalsPerDay.reduce() works correctly.
"""
def setUp(self):
self.task = BaseCourseEnrollmentTotalsPerDay()
self.key = 'course'
def _get_reducer_output(self, values):
"""Run reducer with provided values hardcoded key."""
return list(self.task.reducer(self.key, values))
def test_no_user_counts(self):
self.assertEquals(self._get_reducer_output([]), [])
def test_single_user_count(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01', 5),
]),
[
(self.key, '2013-01-01', 5),
])
def test_multiple_user_count(self):
self.assertEquals(self._get_reducer_output(
[
('2013-01-01', 5),
('2013-01-02', 8),
('2013-01-03', 4),
('2013-01-04', 9),
]),
[
(self.key, '2013-01-01', 5),
(self.key, '2013-01-02', 13),
(self.key, '2013-01-03', 17),
(self.key, '2013-01-04', 26),
])
...@@ -8,26 +8,44 @@ import re ...@@ -8,26 +8,44 @@ import re
PATTERN_JSON = re.compile(r'^.*?(\{.*\})\s*$') PATTERN_JSON = re.compile(r'^.*?(\{.*\})\s*$')
# borrowed from modulestore/parsers.py:
ALLOWED_ID_CHARS = r'[a-zA-Z0-9_\-~.:]'
PATTERN_COURSEID = re.compile(r'^' + ALLOWED_ID_CHARS + r'+$')
def get_datetime_string(timestamp): def is_valid_course_id(course_id):
return timestamp.strftime('%Y-%m-%dT%H:%M:%S') """
Determines if a course_id from an event log is possibly legitimate.
Applies two tests:
def get_date_string(timestamp): * Course Id can be split into org/coursename/runname using '/' as delimiter.
return timestamp.strftime('%Y-%m-%d') * Components of id contain only "allowed" characters as defined in modulestore/parsers.py.
Note this will need to be updated as split-mongo changes are rolled out
def get_date_from_datetime(datetime_string): that permit a broader set of id values.
return datetime_string.split('T')[0] """
components = course_id.split('/')
if len(components) != 3:
return False
return all(PATTERN_COURSEID.match(component) for component in components)
def json_decode(line): def json_decode(line):
"""Wrapper to decode JSON string in implementation-independent way.""" """Wrapper to decode JSON string in an implementation-independent way."""
return cjson.decode(line) return cjson.decode(line)
def parse_eventlog_item(line, nested=False): def parse_eventlog_item(line, nested=False):
""" Parse a tracking log input line as JSON to create a dict representation.""" """
Parse a tracking log input line as JSON to create a dict representation.
Arguments:
* line: the eventlog text
* nested: boolean flag permitting this to be called recursively.
Apparently some eventlog entries are pure JSON, while others are
JSON that are prepended by a timestamp.
"""
try: try:
parsed = json_decode(line) parsed = json_decode(line)
except: except:
...@@ -48,11 +66,32 @@ def parse_eventlog_item(line, nested=False): ...@@ -48,11 +66,32 @@ def parse_eventlog_item(line, nested=False):
def log_item(msg, item, level='ERROR'): def log_item(msg, item, level='ERROR'):
"""Writes a message about an eventlog item."""
sys.stderr.write("{level}: {msg}: {item}\n".format(msg=msg, item=item, level=level)) sys.stderr.write("{level}: {msg}: {item}\n".format(msg=msg, item=item, level=level))
def get_timestamp(item): # Time-related terminology:
# * datetime: a datetime object.
# * timestamp: a string, with date and time (to second), in ISO format.
# * datestamp: a string with only date information, in ISO format.
def get_timestamp(datetime):
"""Returns a string with the datetime value of the provided datetime object."""
return datetime.strftime('%Y-%m-%dT%H:%M:%S')
def get_datestamp(datetime):
"""Returns a string with the date value of the provided datetime object."""
return datetime.strftime('%Y-%m-%d')
def get_datestamp_from_timestamp(timestamp):
"""Returns a string with the date value of the provided ISO datetime string."""
return timestamp.split('T')[0]
def get_datetime(item):
"""Returns a datetime object from an event item, if present."""
try: try:
timestamp = item['time'] timestamp = item['time']
removed_ms = timestamp.split('.')[0] removed_ms = timestamp.split('.')[0]
...@@ -62,6 +101,11 @@ def get_timestamp(item): ...@@ -62,6 +101,11 @@ def get_timestamp(item):
def get_event_data(item): def get_event_data(item):
"""
Returns event data from an event log item as a dict object.
Returns None if not found.
"""
event_value = item.get('event') event_value = item.get('event')
if event_value is None: if event_value is None:
...@@ -69,7 +113,7 @@ def get_event_data(item): ...@@ -69,7 +113,7 @@ def get_event_data(item):
return None return None
if isinstance(event_value, basestring): if isinstance(event_value, basestring):
# If the value is a string, try to parse as JSON into a dict:. # If the value is a string, try to parse as JSON into a dict.
try: try:
event_value = json_decode(event_value) event_value = json_decode(event_value)
except: except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment