Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-analytics-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-analytics-pipeline
Commits
9c367a65
Commit
9c367a65
authored
Jun 19, 2017
by
Andrew Zafft
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/zafft/DE-77' into zafft/big-query-load-events
parents
95865b2f
fd36b56b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
249 additions
and
38 deletions
+249
-38
edx/analytics/tasks/common/pathutil.py
+12
-4
edx/analytics/tasks/warehouse/load_internal_reporting_events.py
+85
-32
edx/analytics/tasks/warehouse/tests/test_load_internal_reporting_events.py
+152
-2
No files found.
edx/analytics/tasks/common/pathutil.py
View file @
9c367a65
...
...
@@ -297,10 +297,18 @@ class EventLogSelectionMixin(EventLogSelectionDownstreamMixin):
def
get_event_time
(
self
,
event
):
"""Returns time information from event if present, else returns None."""
try
:
return
event
[
'time'
]
except
KeyError
:
return
None
if
'time'
in
event
:
return
event
[
"time"
]
if
'timestamp'
in
event
:
return
event
[
'timestamp'
]
if
'requestTime'
in
event
:
return
event
[
'requestTime'
]
log
.
info
(
"Error found no usable time value for event "
+
event
)
return
None
def
get_map_input_file
(
self
):
"""Get the name of the input file from Hadoop."""
...
...
edx/analytics/tasks/warehouse/load_internal_reporting_events.py
View file @
9c367a65
This diff is collapsed.
Click to expand it.
edx/analytics/tasks/warehouse/tests/test_load_internal_reporting_events.py
View file @
9c367a65
"""Test processing of events for loading into Hive, etc."""
import
unittest
import
json
from
ddt
import
ddt
,
data
import
luigi
...
...
@@ -82,7 +83,7 @@ class TrackingEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin,
if
not
date
:
date
=
self
.
DEFAULT_DATE
self
.
task
=
TrackingEventRecordDataTask
(
date
=
luigi
.
Date
Parameter
()
.
parse
(
date
),
interval
=
luigi
.
DateInterval
Parameter
()
.
parse
(
date
),
output_root
=
'/fake/output'
,
)
self
.
task
.
init_local
()
...
...
@@ -266,7 +267,7 @@ class SegmentEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin,
if
not
date
:
date
=
self
.
DEFAULT_DATE
self
.
task
=
SegmentEventRecordDataTask
(
date
=
luigi
.
Date
Parameter
()
.
parse
(
date
),
interval
=
luigi
.
DateInterval
Parameter
()
.
parse
(
date
),
output_root
=
'/fake/output'
,
)
self
.
task
.
init_local
()
...
...
@@ -301,6 +302,14 @@ class SegmentEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin,
self
.
assertNotEquals
(
timestamp
,
None
)
@data
(
{
'sentAt'
:
'2013-12-17T15:38:32.805444Z'
},
)
def
test_validtimestamps
(
self
,
kwargs
):
actual_record
=
self
.
_get_event_record_from_mapper
(
kwargs
)
timestamp
=
getattr
(
actual_record
,
'timestamp'
)
self
.
assertNotEquals
(
timestamp
,
None
)
@data
(
{
'sentAt'
:
'2016-07-26 05:11:37 a.m. +000A'
},
)
def
test_unparsable_timestamps
(
self
,
kwargs
):
...
...
@@ -351,3 +360,144 @@ class SegmentEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin,
expected_key
,
expected_value
)
@ddt
class
SegmentEventRecordTaskMapTimeMappingTest
(
InitializeOpaqueKeysMixin
,
MapperTestMixin
,
unittest
.
TestCase
):
"""Base class for test analysis of detailed student engagement"""
DEFAULT_USER_ID
=
10
DEFAULT_TIMESTAMP
=
"2013-12-17T15:38:32"
DEFAULT_DATE
=
"2013-12-17"
DEFAULT_ANONYMOUS_ID
=
"abcdef12-3456-789a-bcde-f0123456789a"
DEFAULT_PROJECT
=
"segment_test"
def
setUp
(
self
):
super
(
SegmentEventRecordTaskMapTimeMappingTest
,
self
)
.
setUp
()
self
.
initialize_ids
()
self
.
event_templates
=
{
'android_screen'
:
{
"messageId"
:
"fake_message_id"
,
"type"
:
"screen"
,
"channel"
:
"server"
,
"context"
:
{
"app"
:
{
"build"
:
82
,
"name"
:
"edX"
,
"namespace"
:
"org.edx.mobile"
,
"version"
:
"2.3.0"
,
},
"traits"
:
{
"anonymousId"
:
self
.
DEFAULT_ANONYMOUS_ID
},
"library"
:
{
"name"
:
"analytics-android"
,
"version"
:
"3.4.0"
,
},
"os"
:
{
"name"
:
"Android"
,
"version"
:
"5.1.1"
,
},
"timezone"
:
"America/New_York"
,
"screen"
:
{
"density"
:
3.5
,
"width"
:
1440
,
"height"
:
2560
,
},
"userAgent"
:
"Dalvik/2.1.0 (Linux; U; Android 5.1.1; SAMSUNG-SM-N920A Build/LMY47X)"
,
"locale"
:
"en-US"
,
"device"
:
{
"id"
:
"fake_device_id"
,
"manufacturer"
:
"samsung"
,
"model"
:
"SAMSUNG-SM-N920A"
,
"name"
:
"noblelteatt"
,
"advertisingId"
:
"fake_advertising_id"
,
"adTrackingEnabled"
:
False
,
"type"
:
"android"
,
},
"network"
:
{
"wifi"
:
True
,
"carrier"
:
"AT&T"
,
"bluetooth"
:
False
,
"cellular"
:
False
,
},
"ip"
:
"98.236.220.148"
},
"anonymousId"
:
self
.
DEFAULT_ANONYMOUS_ID
,
"integrations"
:
{
"All"
:
True
,
"Google Analytics"
:
False
,
},
"category"
:
""
,
"name"
:
"Launch"
,
"properties"
:
{
"data"
:
{
},
"device-orientation"
:
"portrait"
,
"navigation-mode"
:
"full"
,
"context"
:
{
"app_name"
:
"edx.mobileapp.android"
,
},
"category"
:
"screen"
,
"label"
:
"Launch"
,
},
"writeKey"
:
"dummy_write_key"
,
"projectId"
:
self
.
DEFAULT_PROJECT
,
"timestamp"
:
"{0}.796Z"
.
format
(
self
.
DEFAULT_TIMESTAMP
),
"sentAt"
:
"{0}.000Z"
.
format
(
self
.
DEFAULT_TIMESTAMP
),
"originalTimestamp"
:
"{0}-0400"
.
format
(
self
.
DEFAULT_TIMESTAMP
),
"version"
:
2
,
}
}
self
.
default_event_template
=
'android_screen'
self
.
create_task
()
def
create_task
(
self
,
date
=
None
):
# pylint: disable=arguments-differ
"""Allow arguments to be passed to the task constructor."""
if
not
date
:
date
=
self
.
DEFAULT_DATE
self
.
task
=
SegmentEventRecordDataTask
(
interval
=
luigi
.
DateIntervalParameter
()
.
parse
(
date
),
output_root
=
'/fake/output'
,
)
self
.
task
.
init_local
()
def
_get_event_record_from_mapper
(
self
,
kwargs
):
"""Returns an EventRecord constructed from mapper output."""
line
=
self
.
create_event_log_line
(
**
kwargs
)
mapper_output
=
tuple
(
self
.
task
.
mapper
(
line
))
self
.
assertEquals
(
len
(
mapper_output
),
1
)
row
=
mapper_output
[
0
]
self
.
assertEquals
(
len
(
row
),
2
)
_actual_key
,
actual_value
=
row
return
EventRecord
.
from_tsv
(
actual_value
)
@data
(
{
'notRequestTime'
:
"2013-12-17T15:38:32.805444Z"
},
)
def
test_error_on_missing_timestamps
(
self
,
kwargs
):
line
=
self
.
create_event_log_line
(
**
kwargs
)
line_json
=
json
.
loads
(
line
)
timestamp
=
self
.
task
.
get_event_arrival_time
(
line_json
)
self
.
assertEquals
(
timestamp
,
None
)
@data
(
{
'requestTime'
:
"2014-12-01T15:38:32.805444Z"
},
)
def
test_defaulting_to_requestTime_timestamps
(
self
,
kwargs
):
line
=
self
.
create_event_log_line
(
**
kwargs
)
line_json
=
json
.
loads
(
line
)
timestamp
=
self
.
task
.
get_event_arrival_time
(
line_json
)
self
.
assertEquals
(
timestamp
,
"2014-12-01T15:38:32.805444Z"
)
@data
(
{
'receivedAt'
:
"2013-12-17T15:38:32.805444Z"
,
'requestTime'
:
"2014-12-18T15:38:32.805444Z"
},
)
def
test_defaulting_to_requestTime_timestamps
(
self
,
kwargs
):
line
=
self
.
create_event_log_line
(
**
kwargs
)
line_json
=
json
.
loads
(
line
)
timestamp
=
self
.
task
.
get_event_arrival_time
(
line_json
)
self
.
assertEquals
(
timestamp
,
"2013-12-17T15:38:32.805444+00:00"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment