Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-analytics-data-api
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-analytics-data-api
Commits
f724d3de
Commit
f724d3de
authored
Jan 29, 2014
by
Brian Wilson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add test coverage for course_enroll and eventlog. Fix pylint/pep8 in PR.
parent
f6c8db32
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
261 additions
and
148 deletions
+261
-148
edx/analytics/tasks/course_enroll.py
+0
-0
edx/analytics/tasks/pathutil.py
+10
-32
edx/analytics/tasks/s3.py
+6
-44
edx/analytics/tasks/s3_util.py
+68
-0
edx/analytics/tasks/tests/test_course_enroll.py
+96
-53
edx/analytics/util/eventlog.py
+20
-17
edx/analytics/util/tests/test_eventlog.py
+61
-2
No files found.
edx/analytics/tasks/course_enroll.py
View file @
f724d3de
This diff is collapsed.
Click to expand it.
edx/analytics/tasks/pathutil.py
View file @
f724d3de
...
...
@@ -17,15 +17,7 @@ import luigi.s3
import
luigi.hdfs
import
luigi.format
def
get_s3_bucket_key_names
(
url
):
"""Extract bucket_name and root from S3 URL."""
parts
=
urlparse
(
url
)
return
(
parts
.
netloc
.
strip
(
'/'
),
parts
.
path
.
strip
(
'/'
))
def
join_as_s3_url
(
bucket
,
root
,
path
):
"""Construct a URL for accessing S3, given its components."""
return
's3://{bucket}/{root}/{path}'
.
format
(
bucket
=
bucket
,
root
=
root
,
path
=
path
)
from
s3_util
import
join_as_s3_url
,
generate_s3_sources
class
LocalPathTask
(
luigi
.
ExternalTask
):
...
...
@@ -68,15 +60,22 @@ class PathSetTask(luigi.Task):
"""
src
=
luigi
.
Parameter
()
include
=
luigi
.
Parameter
(
is_list
=
True
,
default
=
(
'*'
,))
# TODO: modify this to get default values from a configuration file,
# and use that to determine whether running in a cluster or locally.
# It will be decoupled from the use of S3PathTask/HDFSPathTask.
# Instead, these will be distinguished by different protocol names.
run_locally
=
luigi
.
BooleanParameter
()
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
PathSetTask
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
self
.
s3
=
None
self
.
s3
_conn
=
None
def
requires
(
self
):
if
self
.
src
.
startswith
(
's3'
):
for
bucket
,
root
,
path
in
self
.
_generate_sources
():
# connect lazily as needed:
if
self
.
s3_conn
is
None
:
self
.
s3_conn
=
boto
.
connect_s3
()
for
bucket
,
root
,
path
in
generate_s3_sources
(
self
.
s3_conn
,
self
.
src
,
self
.
include
):
source
=
join_as_s3_url
(
bucket
,
root
,
path
)
if
self
.
run_locally
:
yield
luigi
.
s3
.
S3PathTask
(
source
)
...
...
@@ -100,27 +99,6 @@ class PathSetTask(luigi.Task):
def
output
(
self
):
return
[
task
.
output
()
for
task
in
self
.
requires
()]
def
_generate_sources
(
self
):
bucket_name
,
root
=
get_s3_bucket_key_names
(
self
.
src
)
# connect lazily, only if necessary:
if
self
.
s3
is
None
:
self
.
s3
=
boto
.
connect_s3
()
bucket
=
self
.
s3
.
get_bucket
(
bucket_name
)
keys
=
(
s
.
key
for
s
in
bucket
.
list
(
root
)
if
s
.
size
>
0
)
# remove root
paths
=
(
k
.
lstrip
(
root
)
.
strip
(
'/'
)
for
k
in
keys
)
paths
=
self
.
_filter_matches
(
paths
)
return
((
bucket
.
name
,
root
,
path
)
for
path
in
paths
)
def
_filter_matches
(
self
,
names
):
patterns
=
self
.
include
fn
=
lambda
n
:
any
(
fnmatch
(
n
,
p
)
for
p
in
patterns
)
return
(
n
for
n
in
names
if
fn
(
n
))
def
get_target_for_url
(
dest
,
output_name
,
run_locally
=
False
):
"""
...
...
edx/analytics/tasks/s3.py
View file @
f724d3de
import
os.path
from
fnmatch
import
fnmatch
from
urlparse
import
urlparse
import
boto
import
luigi
import
luigi.s3
from
s3_util
import
join_as_s3_url
,
get_s3_bucket_key_names
,
generate_s3_sources
,
get_s3_key
class
S3Copy
(
luigi
.
Task
):
"""
...
...
@@ -47,8 +46,8 @@ class S3Copy(luigi.Task):
if
not
dst
.
exists
():
return
False
src_key
=
self
.
_get_s3_key
(
src
.
path
)
dst_key
=
self
.
_get_s3_key
(
dst
.
path
)
src_key
=
get_s3_key
(
self
.
s3
,
src
.
path
)
dst_key
=
get_s3_key
(
self
.
s3
,
dst
.
path
)
if
dst_key
.
size
!=
src_key
.
size
:
return
False
...
...
@@ -63,19 +62,13 @@ class S3Copy(luigi.Task):
src_url
=
self
.
input
()
.
path
dst_url
=
self
.
output
()
.
path
src_key
=
self
.
_get_s3_key
(
src_url
)
src_key
=
get_s3_key
(
self
.
s3
,
src_url
)
dst_bucket_name
,
dst_key_name
=
get_s3_bucket_key_names
(
dst_url
)
# The copy overwrit
t
es the destination. The task checks if
# The copy overwrites the destination. The task checks if
# that is necessary during the `complete()` call.
src_key
.
copy
(
dst_bucket_name
,
dst_key_name
)
def
_get_s3_key
(
self
,
url
):
bucket_name
,
key_name
=
get_s3_bucket_key_names
(
url
)
bucket
=
self
.
s3
.
get_bucket
(
bucket_name
)
key
=
bucket
.
get_key
(
key_name
)
return
key
class
S3Sync
(
luigi
.
Task
):
"""
...
...
@@ -112,7 +105,7 @@ class S3Sync(luigi.Task):
return
[
boto
]
def
requires
(
self
):
for
bucket
,
root
,
path
in
self
.
_generate_sources
(
):
for
bucket
,
root
,
path
in
generate_s3_sources
(
self
.
s3
,
self
.
source
,
self
.
include
):
source
=
join_as_s3_url
(
bucket
,
root
,
path
)
destination
=
os
.
path
.
join
(
self
.
destination
,
path
)
yield
S3Copy
(
source
,
destination
)
...
...
@@ -121,34 +114,3 @@ class S3Sync(luigi.Task):
for
task
in
self
.
requires
():
yield
task
.
output
()
def
_generate_sources
(
self
):
bucket_name
,
root
=
get_s3_bucket_key_names
(
self
.
source
)
bucket
=
self
.
s3
.
get_bucket
(
bucket_name
)
keys
=
(
s
.
key
for
s
in
bucket
.
list
(
root
)
if
s
.
size
>
0
)
# Make paths relative by removing root
paths
=
(
k
.
lstrip
(
root
)
.
strip
(
'/'
)
for
k
in
keys
)
# Filter only paths that match the include patterns
paths
=
self
.
_filter_matches
(
paths
)
return
((
bucket
.
name
,
root
,
path
)
for
path
in
paths
)
def
_filter_matches
(
self
,
names
):
patterns
=
self
.
include
# Return only key names that match any of the include patterns
fn
=
lambda
n
:
any
(
fnmatch
(
n
,
p
)
for
p
in
patterns
)
return
(
n
for
n
in
names
if
fn
(
n
))
def
get_s3_bucket_key_names
(
url
):
"""Extract the bucket and key names from a S3 URL"""
parts
=
urlparse
(
url
)
return
(
parts
.
netloc
.
strip
(
'/'
),
parts
.
path
.
strip
(
'/'
))
def
join_as_s3_url
(
bucket
,
root
,
path
):
"""Combine bucket name, root path and relative path into a S3 URL"""
return
's3://{0}/{1}/{2}'
.
format
(
bucket
,
root
,
path
)
edx/analytics/tasks/s3_util.py
0 → 100644
View file @
f724d3de
"""
Utility methods for interacting with S3 via boto.
"""
from
fnmatch
import
fnmatch
from
urlparse
import
urlparse
import
boto
def
get_s3_bucket_key_names
(
url
):
"""Extract the bucket and key names from a S3 URL"""
parts
=
urlparse
(
url
)
return
(
parts
.
netloc
.
strip
(
'/'
),
parts
.
path
.
strip
(
'/'
))
def
join_as_s3_url
(
bucket
,
root
,
path
):
"""Combine bucket name, root path and relative path into a S3 URL"""
return
's3://{0}/{1}/{2}'
.
format
(
bucket
,
root
,
path
)
def
get_s3_key
(
s3_conn
,
url
):
"""Returns an S3 key for use in further boto actions."""
bucket_name
,
key_name
=
get_s3_bucket_key_names
(
url
)
bucket
=
s3_conn
.
get_bucket
(
bucket_name
)
key
=
bucket
.
get_key
(
key_name
)
return
key
def
generate_s3_sources
(
s3_conn
,
source
,
patterns
):
"""
Returns a list of S3 sources that match filters.
Args:
s3_conn: a boto connection to S3.
source: a url to S3.
patterns: a list of strings, each of which defines a pattern to match.
Yields:
(bucket, root, path) tuples for each matching file on S3.
where `bucket` and `root` are derived from the source url,
and `path` is a matching path relative to the `source`.
Does not include zero-length files.
"""
bucket_name
,
root
=
get_s3_bucket_key_names
(
source
)
bucket
=
s3_conn
.
get_bucket
(
bucket_name
)
# Skip keys that have zero size. This allows directories
# to be skipped, but also skips legitimate files that are
# also zero-length.
keys
=
(
s
.
key
for
s
in
bucket
.
list
(
root
)
if
s
.
size
>
0
)
# Make paths relative by removing root
paths
=
(
k
[
len
(
root
):]
.
lstrip
(
'/'
)
for
k
in
keys
)
# Filter only paths that match the include patterns
paths
=
_filter_matches
(
patterns
,
paths
)
return
((
bucket
.
name
,
root
,
path
)
for
path
in
paths
)
def
_filter_matches
(
patterns
,
names
):
"""Return only key names that match any of the include patterns."""
fn
=
lambda
n
:
any
(
fnmatch
(
n
,
p
)
for
p
in
patterns
)
return
(
n
for
n
in
names
if
fn
(
n
))
edx/analytics/tasks/tests/test_course_enroll.py
View file @
f724d3de
...
...
@@ -3,24 +3,111 @@ Tests for tasks that collect enrollment events.
"""
import
unittest
import
json
from
edx.analytics.tasks.course_enroll
import
(
BaseCourseEnrollmentEventsPerDay
,
BaseCourseEnrollmentChangesPerDay
,
BaseCourseEnrollmentTotalsPerDay
,
CourseEnrollmentEventsPerDayMixin
,
CourseEnrollmentChangesPerDayMixin
,
)
from
datetime
import
datetime
class
CourseEnrollEventMapTest
(
unittest
.
TestCase
):
"""
Tests to verify that event log parsing by mapper works correctly.
"""
def
setUp
(
self
):
self
.
task
=
CourseEnrollmentEventsPerDayMixin
()
self
.
course_id
=
"MITx/8.02x/2013_Spring"
self
.
user_id
=
21
self
.
timestamp
=
"2013-12-17T15:38:32"
def
_create_event_log_line
(
self
,
**
kwargs
):
"""Create an event log with test values, as a JSON string."""
return
json
.
dumps
(
self
.
_create_event_dict
(
**
kwargs
))
def
_create_event_dict
(
self
,
**
kwargs
):
"""Create an event log with test values, as a dict."""
# Define default values for event log entry.
org_id
=
self
.
course_id
.
split
(
'/'
)[
0
]
event_dict
=
{
"username"
:
"test_user"
,
"host"
:
"test_host"
,
"event_source"
:
"server"
,
"event_type"
:
"edx.course.enrollment.activated"
,
"context"
:
{
"course_id"
:
self
.
course_id
,
"org_id"
:
org_id
,
"user_id"
:
self
.
user_id
,
},
"time"
:
"{}.805444+00:00"
.
format
(
self
.
timestamp
),
"ip"
:
"127.0.0.1"
,
"event"
:
{
"course_id"
:
self
.
course_id
,
"user_id"
:
self
.
user_id
,
"mode"
:
"honor"
,
},
"agent"
:
"blah, blah, blah"
,
"page"
:
None
}
event_dict
.
update
(
**
kwargs
)
return
event_dict
def
assert_no_output_for
(
self
,
line
):
self
.
assertEquals
(
list
(
self
.
task
.
mapper
(
line
)),
[])
def
test_non_enrollment_event
(
self
):
line
=
'this is garbage'
self
.
assert_no_output_for
(
line
)
def
test_unparseable_enrollment_event
(
self
):
line
=
'this is garbage but contains edx.course.enrollment'
self
.
assert_no_output_for
(
line
)
def
test_missing_event_type
(
self
):
event_dict
=
self
.
_create_event_dict
()
del
event_dict
[
'event_type'
]
line
=
json
.
dumps
(
event_dict
)
self
.
assert_no_output_for
(
line
)
def
test_nonenroll_event_type
(
self
):
line
=
self
.
_create_event_log_line
(
event_type
=
'edx.course.enrollment.unknown'
)
self
.
assert_no_output_for
(
line
)
def
test_bad_datetime
(
self
):
line
=
self
.
_create_event_log_line
(
time
=
'this is a bogus time'
)
self
.
assert_no_output_for
(
line
)
def
test_bad_event_data
(
self
):
line
=
self
.
_create_event_log_line
(
event
=
[
"not an event"
])
self
.
assert_no_output_for
(
line
)
def
test_illegal_course_id
(
self
):
line
=
self
.
_create_event_log_line
(
event
=
{
"course_id"
:
";;;;bad/id/val"
,
"user_id"
:
self
.
user_id
})
self
.
assert_no_output_for
(
line
)
def
test_missing_user_id
(
self
):
line
=
self
.
_create_event_log_line
(
event
=
{
"course_id"
:
self
.
course_id
})
self
.
assert_no_output_for
(
line
)
def
test_good_enroll_event
(
self
):
line
=
self
.
_create_event_log_line
()
event
=
list
(
self
.
task
.
mapper
(
line
))
expected
=
[((
self
.
course_id
,
self
.
user_id
),
(
self
.
timestamp
,
1
))]
self
.
assertEquals
(
event
,
expected
)
def
test_good_unenroll_event
(
self
):
line
=
self
.
_create_event_log_line
(
event_type
=
'edx.course.enrollment.deactivated'
)
event
=
list
(
self
.
task
.
mapper
(
line
))
expected
=
[((
self
.
course_id
,
self
.
user_id
),
(
self
.
timestamp
,
-
1
))]
self
.
assertEquals
(
event
,
expected
)
class
CourseEnrollEventReduceTest
(
unittest
.
TestCase
):
"""
Tests to verify that event
log parsing
works correctly.
Tests to verify that event
s-per-day-per-user reducer
works correctly.
"""
def
setUp
(
self
):
self
.
task
=
BaseCourseEnrollmentEventsPerDay
()
self
.
task
=
CourseEnrollmentEventsPerDayMixin
()
self
.
key
=
(
'course'
,
'user'
)
def
_get_reducer_output
(
self
,
values
):
...
...
@@ -71,7 +158,6 @@ class CourseEnrollEventReduceTest(unittest.TestCase):
((
'course'
,
'2013-01-01'
),
1
),
])
def
test_multiple_events_out_of_order
(
self
):
# Make sure that events are sorted by the reducer.
self
.
assertEquals
(
self
.
_get_reducer_output
(
...
...
@@ -96,7 +182,6 @@ class CourseEnrollEventReduceTest(unittest.TestCase):
((
'course'
,
'2013-01-01'
),
1
),
])
def
test_multiple_unenroll_events_on_same_day
(
self
):
self
.
assertEquals
(
self
.
_get_reducer_output
(
[
...
...
@@ -122,7 +207,6 @@ class CourseEnrollEventReduceTest(unittest.TestCase):
((
'course'
,
'2013-01-01'
),
1
),
])
def
test_multiple_events_on_many_days
(
self
):
# Run with an arbitrary list of events.
self
.
assertEquals
(
self
.
_get_reducer_output
(
...
...
@@ -153,10 +237,10 @@ class CourseEnrollEventReduceTest(unittest.TestCase):
class
CourseEnrollChangesReduceTest
(
unittest
.
TestCase
):
"""
Verify that
BaseCourseEnrollmentChangesPerDay
.reduce() works correctly.
Verify that
CourseEnrollmentChangesPerDayMixin
.reduce() works correctly.
"""
def
setUp
(
self
):
self
.
task
=
BaseCourseEnrollmentChangesPerDay
()
self
.
task
=
CourseEnrollmentChangesPerDayMixin
()
self
.
key
=
(
'course'
,
'2013-01-01'
)
def
_get_reducer_output
(
self
,
values
):
...
...
@@ -172,44 +256,3 @@ class CourseEnrollChangesReduceTest(unittest.TestCase):
def
test_multiple_user_count
(
self
):
inputs
=
[
1
,
1
,
1
,
-
1
,
1
]
self
.
assertEquals
(
self
.
_get_reducer_output
(
inputs
),
[(
self
.
key
,
3
)])
class
CourseEnrollTotalsReduceTest
(
unittest
.
TestCase
):
"""
Verify that BaseCourseEnrollmentTotalsPerDay.reduce() works correctly.
"""
def
setUp
(
self
):
self
.
task
=
BaseCourseEnrollmentTotalsPerDay
()
self
.
key
=
'course'
def
_get_reducer_output
(
self
,
values
):
"""Run reducer with provided values hardcoded key."""
return
list
(
self
.
task
.
reducer
(
self
.
key
,
values
))
def
test_no_user_counts
(
self
):
self
.
assertEquals
(
self
.
_get_reducer_output
([]),
[])
def
test_single_user_count
(
self
):
self
.
assertEquals
(
self
.
_get_reducer_output
(
[
(
'2013-01-01'
,
5
),
]),
[
(
self
.
key
,
'2013-01-01'
,
5
),
])
def
test_multiple_user_count
(
self
):
self
.
assertEquals
(
self
.
_get_reducer_output
(
[
(
'2013-01-01'
,
5
),
(
'2013-01-02'
,
8
),
(
'2013-01-03'
,
4
),
(
'2013-01-04'
,
9
),
]),
[
(
self
.
key
,
'2013-01-01'
,
5
),
(
self
.
key
,
'2013-01-02'
,
13
),
(
self
.
key
,
'2013-01-03'
,
17
),
(
self
.
key
,
'2013-01-04'
,
26
),
])
edx/analytics/util/eventlog.py
View file @
f724d3de
...
...
@@ -12,6 +12,7 @@ PATTERN_JSON = re.compile(r'^.*?(\{.*\})\s*$')
ALLOWED_ID_CHARS
=
r'[a-zA-Z0-9_\-~.:]'
PATTERN_COURSEID
=
re
.
compile
(
r'^'
+
ALLOWED_ID_CHARS
+
r'+$'
)
def
is_valid_course_id
(
course_id
):
"""
Determines if a course_id from an event log is possibly legitimate.
...
...
@@ -24,6 +25,7 @@ def is_valid_course_id(course_id):
Note this will need to be updated as split-mongo changes are rolled out
that permit a broader set of id values.
"""
# TODO: [split-mongo] verify after course_id name changes.
components
=
course_id
.
split
(
'/'
)
if
len
(
components
)
!=
3
:
return
False
...
...
@@ -32,6 +34,7 @@ def is_valid_course_id(course_id):
def
json_decode
(
line
):
"""Wrapper to decode JSON string in an implementation-independent way."""
# TODO: Verify correctness of cjson
return
cjson
.
decode
(
line
)
...
...
@@ -48,25 +51,26 @@ def parse_eventlog_item(line, nested=False):
"""
try
:
parsed
=
json_decode
(
line
)
except
:
except
Exception
:
if
not
nested
:
json_match
=
PATTERN_JSON
.
match
(
line
)
if
json_match
:
return
parse_eventlog_item
(
json_match
.
group
(
1
),
nested
=
True
)
#
Seem to be truncated in input data at 10000 for some log files, 2043 for others...
#
First filter out common ones:
#
if 'save_problem_check' not in line:
#
sys.stderr.write("ERROR: encountered event with bad json: length = {len} start={start}\n".format(len=len(line), start=line[:40]))
# Even that leaves too many to log.
#
TODO: Might be good going forward to collect stats on the length of truncation and the counts for
# d
ifferent event "names" (normalized event_type values)
.
#
TODO: There are too many to be logged. It might be useful
#
at some point to collect stats on the length of truncation
#
and the counts for different event "names" (normalized
#
event_type values).
#
Note that empirically some seem to be truncated in input
# d
ata at 10000 characters, 2043 for others..
.
return
None
return
parsed
def
log_item
(
msg
,
item
,
level
=
'ERROR'
):
"""Writes a message about an eventlog item."""
# TODO: replace this with real logging.
sys
.
stderr
.
write
(
"{level}: {msg}: {item}
\n
"
.
format
(
msg
=
msg
,
item
=
item
,
level
=
level
))
...
...
@@ -75,28 +79,28 @@ def log_item(msg, item, level='ERROR'):
# * timestamp: a string, with date and time (to second), in ISO format.
# * datestamp: a string with only date information, in ISO format.
def
get_timestamp
(
datetime
):
def
datetime_to_timestamp
(
datetime_obj
):
"""Returns a string with the datetime value of the provided datetime object."""
return
datetime
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S'
)
return
datetime
_obj
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S'
)
def
get_datestamp
(
datetime
):
def
datetime_to_datestamp
(
datetime_obj
):
"""Returns a string with the date value of the provided datetime object."""
return
datetime
.
strftime
(
'
%
Y-
%
m-
%
d'
)
return
datetime
_obj
.
strftime
(
'
%
Y-
%
m-
%
d'
)
def
get_datestamp_from_tim
estamp
(
timestamp
):
def
timestamp_to_dat
estamp
(
timestamp
):
"""Returns a string with the date value of the provided ISO datetime string."""
return
timestamp
.
split
(
'T'
)[
0
]
def
get_
date
time
(
item
):
def
get_
event_
time
(
item
):
"""Returns a datetime object from an event item, if present."""
try
:
timestamp
=
item
[
'time'
]
removed_ms
=
timestamp
.
split
(
'.'
)[
0
]
return
datetime
.
datetime
.
strptime
(
removed_ms
,
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S'
)
except
:
except
Exception
:
return
None
...
...
@@ -116,7 +120,7 @@ def get_event_data(item):
# If the value is a string, try to parse as JSON into a dict.
try
:
event_value
=
json_decode
(
event_value
)
except
:
except
Exception
:
log_item
(
"encountered event with unparsable event value"
,
item
)
return
None
...
...
@@ -126,4 +130,3 @@ def get_event_data(item):
else
:
log_item
(
"encountered event data with unrecognized type"
,
item
)
return
None
edx/analytics/util/tests/test_eventlog.py
View file @
f724d3de
...
...
@@ -7,9 +7,28 @@ import unittest
import
edx.analytics.util.eventlog
as
eventlog
class
EventLogTest
(
unittest
.
TestCase
):
class
CourseIdTest
(
unittest
.
TestCase
):
"""
Tests to verify that event log parsing works correctly.
Verify that course_id filtering works correctly.
"""
def
test_normal_course_id
(
self
):
course_id
=
"org/course_id/course_run"
self
.
assertTrue
(
eventlog
.
is_valid_course_id
(
course_id
))
def
test_course_id_without_components
(
self
):
course_id
=
"org:course_id:course_run"
self
.
assertFalse
(
eventlog
.
is_valid_course_id
(
course_id
))
def
test_course_id_with_nonascii
(
self
):
course_id
=
u"org/course
\ufffd
_id/course_run"
self
.
assertFalse
(
eventlog
.
is_valid_course_id
(
course_id
))
class
ParseEventLogTest
(
unittest
.
TestCase
):
"""
Verify that event log parsing works correctly.
"""
def
test_parse_valid_eventlog_item
(
self
):
...
...
@@ -34,3 +53,43 @@ class EventLogTest(unittest.TestCase):
self
.
assertEquals
(
result
[
'username'
],
u'b
\ufffd
b'
)
class
TimestampTest
(
unittest
.
TestCase
):
"""Verify timestamp-related functions."""
def
test_datestamp_from_timestamp
(
self
):
timestamp
=
"2013-12-17T15:38:32"
self
.
assertEquals
(
eventlog
.
timestamp_to_datestamp
(
timestamp
),
"2013-12-17"
)
def
test_missing_datetime
(
self
):
item
=
{
"something else"
:
"not an event"
}
self
.
assertIsNone
(
eventlog
.
get_event_time
(
item
))
def
test_good_datetime
(
self
):
item
=
{
"time"
:
"2013-12-17T15:38:32.805444+00:00"
}
dt_value
=
eventlog
.
get_event_time
(
item
)
self
.
assertEquals
(
eventlog
.
datetime_to_timestamp
(
dt_value
),
"2013-12-17T15:38:32"
)
self
.
assertEquals
(
eventlog
.
datetime_to_datestamp
(
dt_value
),
"2013-12-17"
)
class
GetEventDataTest
(
unittest
.
TestCase
):
"""Verify that get_event_data works as expected."""
def
test_missing_event_data
(
self
):
item
=
{
"something else"
:
"not an event"
}
self
.
assertIsNone
(
eventlog
.
get_event_data
(
item
))
def
test_get_bad_string_event_data
(
self
):
item
=
{
"event"
:
"a string but not JSON"
}
self
.
assertIsNone
(
eventlog
.
get_event_data
(
item
))
def
test_get_json_string_event_data
(
self
):
item
=
{
"event"
:
'{ "a string": "that is JSON"}'
}
self
.
assertEquals
(
eventlog
.
get_event_data
(
item
),
{
"a string"
:
"that is JSON"
})
def
test_event_data_with_unknown_type
(
self
):
item
=
{
"event"
:
[
"a list"
,
"of strings"
]}
self
.
assertIsNone
(
eventlog
.
get_event_data
(
item
))
def
test_get_dict_event_data
(
self
):
item
=
{
"event"
:
{
"a dict"
:
"that has strings"
}}
self
.
assertEquals
(
eventlog
.
get_event_data
(
item
),
{
"a dict"
:
"that has strings"
})
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment