Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
E
edx-analytics-pipeline
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
edx-analytics-pipeline
Commits
39dbbe00
Commit
39dbbe00
authored
May 18, 2017
by
Hassan
Committed by
GitHub
May 18, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #396 from edx/hassan/acceptance-tests-validation
Fixed acceptance tests validation.
parents
795694d5
fda68608
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
103 additions
and
60 deletions
+103
-60
edx/analytics/tasks/tests/acceptance/__init__.py
+15
-0
edx/analytics/tasks/tests/acceptance/fixtures/output/acceptance_expected_d_user.csv
+2
-2
edx/analytics/tasks/tests/acceptance/fixtures/output/expected_financial_report.csv
+0
-0
edx/analytics/tasks/tests/acceptance/test_course_catalog.py
+16
-7
edx/analytics/tasks/tests/acceptance/test_financial_reports.py
+16
-16
edx/analytics/tasks/tests/acceptance/test_internal_reporting_certificate.py
+17
-11
edx/analytics/tasks/tests/acceptance/test_internal_reporting_country.py
+5
-4
edx/analytics/tasks/tests/acceptance/test_internal_reporting_user.py
+19
-13
edx/analytics/tasks/tests/acceptance/test_lms_courseware_link_clicked.py
+13
-7
No files found.
edx/analytics/tasks/tests/acceptance/__init__.py
View file @
39dbbe00
...
@@ -5,6 +5,7 @@ import logging
...
@@ -5,6 +5,7 @@ import logging
import
os
import
os
import
shutil
import
shutil
import
unittest
import
unittest
import
csv
from
luigi.s3
import
S3Client
from
luigi.s3
import
S3Client
import
pandas
import
pandas
...
@@ -112,6 +113,20 @@ def modify_target_for_local_server(target):
...
@@ -112,6 +113,20 @@ def modify_target_for_local_server(target):
return
target
return
target
def
coerce_columns_to_string
(
row
):
# Vertica response includes datatypes in some columns i-e. datetime, Decimal etc. so convert
# them into string before comparison with expected output.
return
[
str
(
x
)
for
x
in
row
]
def
read_csv_fixture_as_list
(
fixture_file_path
):
with
open
(
fixture_file_path
)
as
fixture_file
:
reader
=
csv
.
reader
(
fixture_file
)
next
(
reader
)
# skip header
fixture_data
=
list
(
reader
)
return
fixture_data
class
AcceptanceTestCase
(
unittest
.
TestCase
):
class
AcceptanceTestCase
(
unittest
.
TestCase
):
acceptance
=
1
acceptance
=
1
...
...
edx/analytics/tasks/tests/acceptance/fixtures/output/acceptance_expected_d_user.csv
View file @
39dbbe00
...
@@ -2,4 +2,4 @@ user_id,user_year_of_birth,user_level_of_education,user_gender,user_email,user_u
...
@@ -2,4 +2,4 @@ user_id,user_year_of_birth,user_level_of_education,user_gender,user_email,user_u
1,1984,a,m,honor@example.com,honor,2014-06-27 16:02:38,UNKNOWN
1,1984,a,m,honor@example.com,honor,2014-06-27 16:02:38,UNKNOWN
2,1975,b,m,audit@example.com,audit,2014-06-27 16:02:39,IE
2,1975,b,m,audit@example.com,audit,2014-06-27 16:02:39,IE
3,2000,b,,verified@example.com,verified,2014-06-27 16:02:41,UNKNOWN
3,2000,b,,verified@example.com,verified,2014-06-27 16:02:41,UNKNOWN
4,2000,,,staff@example.com,staff,2014-06-27 16:02:43,TH
4,2000,,None,staff@example.com,staff,2014-06-27 16:02:43,TH
\ No newline at end of file
\ No newline at end of file
edx/analytics/tasks/tests/acceptance/fixtures/output/expected_financial_report.csv
View file @
39dbbe00
This diff is collapsed.
Click to expand it.
edx/analytics/tasks/tests/acceptance/test_course_catalog.py
View file @
39dbbe00
...
@@ -4,6 +4,7 @@ End to end test of the course catalog tasks.
...
@@ -4,6 +4,7 @@ End to end test of the course catalog tasks.
import
logging
import
logging
import
os
import
os
import
datetime
import
pandas
import
pandas
...
@@ -50,16 +51,24 @@ class CourseSubjectsAcceptanceTest(BaseCourseCatalogAcceptanceTest):
...
@@ -50,16 +51,24 @@ class CourseSubjectsAcceptanceTest(BaseCourseCatalogAcceptanceTest):
def
validate_output
(
self
):
def
validate_output
(
self
):
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns
=
[
'row_number'
,
'course_id'
,
'date'
,
'subject_uri'
,
'subject_title'
,
'subject_language'
]
with
self
.
vertica
.
cursor
()
as
cursor
:
with
self
.
vertica
.
cursor
()
as
cursor
:
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'expected_subjects_for_acceptance.csv'
)
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'expected_subjects_for_acceptance.csv'
)
expected
=
pandas
.
read_csv
(
expected_output_csv
,
parse_dates
=
True
)
def
convert_date
(
date_string
):
"""Convert date string to a date object."""
return
datetime
.
datetime
.
strptime
(
date_string
,
'
%
Y-
%
m-
%
d'
)
.
date
()
expected
=
pandas
.
read_csv
(
expected_output_csv
,
converters
=
{
'date'
:
convert_date
})
cursor
.
execute
(
"SELECT * FROM {schema}.d_course_subjects;"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
cursor
.
execute
(
"SELECT * FROM {schema}.d_course_subjects;"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
database_subjects
=
cursor
.
fetchall
()
database_subjects
=
cursor
.
fetchall
()
subjects
=
pandas
.
DataFrame
(
database_subjects
,
columns
=
[
'row_number'
,
'course_id'
,
'date'
,
'subject_uri'
,
subjects
=
pandas
.
DataFrame
(
database_subjects
,
columns
=
columns
)
'subject_title'
,
'subject_language'
])
for
frame
in
(
subjects
,
expected
):
frame
.
sort
([
'row_number'
],
inplace
=
True
,
ascending
=
[
True
])
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
self
.
assert_data_frames_equal
(
subjects
,
expected
)
self
.
assertTrue
(
all
(
subjects
==
expected
))
except
ValueError
:
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
edx/analytics/tasks/tests/acceptance/test_financial_reports.py
View file @
39dbbe00
...
@@ -10,7 +10,10 @@ import luigi
...
@@ -10,7 +10,10 @@ import luigi
import
pandas
import
pandas
from
pandas.util.testing
import
assert_frame_equal
,
assert_series_equal
from
pandas.util.testing
import
assert_frame_equal
,
assert_series_equal
from
edx.analytics.tasks.tests.acceptance
import
AcceptanceTestCase
,
when_vertica_available
,
when_vertica_not_available
from
edx.analytics.tasks.tests.acceptance
import
(
AcceptanceTestCase
,
when_vertica_available
,
when_vertica_not_available
,
coerce_columns_to_string
,
read_csv_fixture_as_list
)
from
edx.analytics.tasks.util.url
import
url_path_join
from
edx.analytics.tasks.util.url
import
url_path_join
from
edx.analytics.tasks.warehouse.financial.reconcile
import
LoadInternalReportingOrderTransactionsToWarehouse
from
edx.analytics.tasks.warehouse.financial.reconcile
import
LoadInternalReportingOrderTransactionsToWarehouse
...
@@ -58,27 +61,24 @@ class FinancialReportsAcceptanceTest(AcceptanceTestCase):
...
@@ -58,27 +61,24 @@ class FinancialReportsAcceptanceTest(AcceptanceTestCase):
with
self
.
vertica
.
cursor
()
as
cursor
:
with
self
.
vertica
.
cursor
()
as
cursor
:
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'expected_financial_report.csv'
)
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'expected_financial_report.csv'
)
expected
=
pandas
.
read_csv
(
expected_output_csv
,
parse_dates
=
True
)
expected_output_data
=
read_csv_fixture_as_list
(
expected_output_csv
)
expected
=
pandas
.
DataFrame
(
expected_output_data
,
columns
=
columns
)
cursor
.
execute
(
"SELECT {columns} FROM {schema}.f_orderitem_transactions"
.
format
(
cursor
.
execute
(
"SELECT {columns} FROM {schema}.f_orderitem_transactions"
.
format
(
columns
=
','
.
join
(
columns
),
columns
=
','
.
join
(
columns
),
schema
=
self
.
vertica
.
schema_name
schema
=
self
.
vertica
.
schema_name
))
))
response
=
cursor
.
fetchall
()
response
=
cursor
.
fetchall
()
f_orderitem_transactions
=
pandas
.
DataFrame
(
response
,
columns
=
columns
)
f_orderitem_transactions
=
pandas
.
DataFrame
(
map
(
coerce_columns_to_string
,
response
),
columns
=
columns
)
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
self
.
assertTrue
(
all
(
f_orderitem_transactions
==
expected
))
for
frame
in
(
f_orderitem_transactions
,
expected
):
except
ValueError
:
frame
.
sort
([
'payment_ref_id'
,
'transaction_type'
],
inplace
=
True
,
ascending
=
[
True
,
False
])
buf
=
StringIO
()
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
f_orderitem_transactions
.
to_csv
(
buf
)
print
'Actual:'
self
.
assert_data_frames_equal
(
f_orderitem_transactions
,
expected
)
print
buf
.
getvalue
()
buf
.
seek
(
0
)
expected
.
to_csv
(
buf
)
print
'Expected:'
print
buf
.
getvalue
()
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
@when_vertica_not_available
@when_vertica_not_available
def
test_end_to_end_without_vertica
(
self
):
def
test_end_to_end_without_vertica
(
self
):
...
...
edx/analytics/tasks/tests/acceptance/test_internal_reporting_certificate.py
View file @
39dbbe00
...
@@ -7,7 +7,9 @@ import os
...
@@ -7,7 +7,9 @@ import os
import
pandas
import
pandas
from
edx.analytics.tasks.tests.acceptance
import
AcceptanceTestCase
,
when_vertica_available
from
edx.analytics.tasks.tests.acceptance
import
(
AcceptanceTestCase
,
when_vertica_available
,
coerce_columns_to_string
,
read_csv_fixture_as_list
)
log
=
logging
.
getLogger
(
__name__
)
log
=
logging
.
getLogger
(
__name__
)
...
@@ -33,18 +35,22 @@ class InternalReportingCertificateLoadAcceptanceTest(AcceptanceTestCase):
...
@@ -33,18 +35,22 @@ class InternalReportingCertificateLoadAcceptanceTest(AcceptanceTestCase):
def
validate_output
(
self
):
def
validate_output
(
self
):
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns
=
[
'user_id'
,
'course_id'
,
'is_certified'
,
'certificate_mode'
,
'final_grade'
,
'has_passed'
,
'created_date'
,
'modified_date'
]
with
self
.
vertica
.
cursor
()
as
cursor
:
with
self
.
vertica
.
cursor
()
as
cursor
:
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'acceptance_expected_d_user_course_certificate.csv'
)
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'acceptance_expected_d_user_course_certificate.csv'
)
expected
=
pandas
.
read_csv
(
expected_output_csv
,
parse_dates
=
True
)
expected_output_data
=
read_csv_fixture_as_list
(
expected_output_csv
)
expected
=
pandas
.
DataFrame
(
expected_output_data
,
columns
=
columns
)
cursor
.
execute
(
"SELECT * FROM {schema}.d_user_course_certificate"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
cursor
.
execute
(
"SELECT * FROM {schema}.d_user_course_certificate"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
response
=
cursor
.
fetchall
()
response
=
cursor
.
fetchall
()
d_user_course_certificate
=
pandas
.
DataFrame
(
response
,
columns
=
[
d_user_course_certificate
=
pandas
.
DataFrame
(
map
(
coerce_columns_to_string
,
response
),
columns
=
columns
)
'user_id'
,
'course_id'
,
'is_certified'
,
'certificate_mode'
,
'final_grade'
,
'has_passed'
,
'created_date'
,
'modified_date'
,
for
frame
in
(
d_user_course_certificate
,
expected
):
])
frame
.
sort
([
'user_id'
],
inplace
=
True
,
ascending
=
[
True
])
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
self
.
assertTrue
(
all
(
d_user_course_certificate
==
expected
))
self
.
assert_data_frames_equal
(
d_user_course_certificate
,
expected
)
except
ValueError
:
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
edx/analytics/tasks/tests/acceptance/test_internal_reporting_country.py
View file @
39dbbe00
...
@@ -45,7 +45,8 @@ class InternalReportingCountryLoadAcceptanceTest(AcceptanceTestCase):
...
@@ -45,7 +45,8 @@ class InternalReportingCountryLoadAcceptanceTest(AcceptanceTestCase):
response
=
cursor
.
fetchall
()
response
=
cursor
.
fetchall
()
d_country
=
pandas
.
DataFrame
(
response
,
columns
=
[
'country_name'
,
'user_last_location_country_code'
])
d_country
=
pandas
.
DataFrame
(
response
,
columns
=
[
'country_name'
,
'user_last_location_country_code'
])
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
for
frame
in
(
d_country
,
expected
):
self
.
assertTrue
(
all
(
d_country
==
expected
))
frame
.
sort
([
'country_name'
],
inplace
=
True
,
ascending
=
[
True
])
except
ValueError
:
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
self
.
assert_data_frames_equal
(
d_country
,
expected
)
edx/analytics/tasks/tests/acceptance/test_internal_reporting_user.py
View file @
39dbbe00
...
@@ -8,7 +8,9 @@ import os
...
@@ -8,7 +8,9 @@ import os
import
pandas
import
pandas
from
edx.analytics.tasks.tests.acceptance
import
AcceptanceTestCase
,
when_vertica_available
from
edx.analytics.tasks.tests.acceptance
import
(
AcceptanceTestCase
,
when_vertica_available
,
coerce_columns_to_string
,
read_csv_fixture_as_list
)
log
=
logging
.
getLogger
(
__name__
)
log
=
logging
.
getLogger
(
__name__
)
...
@@ -18,8 +20,8 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
...
@@ -18,8 +20,8 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
"""End-to-end test of the workflow to load the internal reporting warehouse's user table."""
"""End-to-end test of the workflow to load the internal reporting warehouse's user table."""
INPUT_FILE
=
'location_by_course_tracking.log'
INPUT_FILE
=
'location_by_course_tracking.log'
INTERVAL
=
'2014-07-21-2014-07-2
1
'
INTERVAL
=
'2014-07-21-2014-07-2
2
'
DATE
=
'2014-07-2
1
'
DATE
=
'2014-07-2
2
'
def
setUp
(
self
):
def
setUp
(
self
):
super
(
InternalReportingUserLoadAcceptanceTest
,
self
)
.
setUp
()
super
(
InternalReportingUserLoadAcceptanceTest
,
self
)
.
setUp
()
...
@@ -51,18 +53,22 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
...
@@ -51,18 +53,22 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
def
validate_output
(
self
):
def
validate_output
(
self
):
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
"""Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns
=
[
'user_id'
,
'user_year_of_birth'
,
'user_level_of_education'
,
'user_gender'
,
'user_email'
,
'user_username'
,
'user_account_creation_time'
,
'user_last_location_country_code'
]
with
self
.
vertica
.
cursor
()
as
cursor
:
with
self
.
vertica
.
cursor
()
as
cursor
:
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'acceptance_expected_d_user.csv'
)
expected_output_csv
=
os
.
path
.
join
(
self
.
data_dir
,
'output'
,
'acceptance_expected_d_user.csv'
)
expected
=
pandas
.
read_csv
(
expected_output_csv
,
parse_dates
=
True
)
expected_output_data
=
read_csv_fixture_as_list
(
expected_output_csv
)
expected
=
pandas
.
DataFrame
(
expected_output_data
,
columns
=
columns
)
cursor
.
execute
(
"SELECT * FROM {schema}.d_user"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
cursor
.
execute
(
"SELECT * FROM {schema}.d_user"
.
format
(
schema
=
self
.
vertica
.
schema_name
))
response
=
cursor
.
fetchall
()
response
=
cursor
.
fetchall
()
d_user
=
pandas
.
DataFrame
(
response
,
columns
=
[
'user_id'
,
'user_year_of_birth'
,
'user_level_of_education'
,
d_user
=
pandas
.
DataFrame
(
map
(
coerce_columns_to_string
,
response
),
columns
=
columns
)
'user_gender'
,
'user_email'
,
'user_username'
,
'user_account_creation_time'
,
for
frame
in
(
d_user
,
expected
):
'user_last_location_country_code'
])
frame
.
sort
([
'user_id'
],
inplace
=
True
,
ascending
=
[
True
])
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
self
.
assertTrue
(
all
(
d_user
==
expected
))
self
.
assert_data_frames_equal
(
d_user
,
expected
)
except
ValueError
:
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
edx/analytics/tasks/tests/acceptance/test_lms_courseware_link_clicked.py
View file @
39dbbe00
...
@@ -3,7 +3,7 @@ End-to-end test of the workflow to load the warehouse's lms_courseware_link_clic
...
@@ -3,7 +3,7 @@ End-to-end test of the workflow to load the warehouse's lms_courseware_link_clic
"""
"""
from
datetime
import
dat
e
import
datetim
e
import
os
import
os
import
logging
import
logging
...
@@ -23,7 +23,7 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
...
@@ -23,7 +23,7 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
"""
"""
INPUT_FILE
=
'lms_courseware_link_clicked_acceptance_tracking.log'
INPUT_FILE
=
'lms_courseware_link_clicked_acceptance_tracking.log'
DATE
=
date
(
2016
,
6
,
13
)
DATE
=
date
time
.
date
(
2016
,
6
,
13
)
@when_vertica_available
@when_vertica_available
def
test_lms_courseware_link_clicked
(
self
):
def
test_lms_courseware_link_clicked
(
self
):
...
@@ -46,7 +46,12 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
...
@@ -46,7 +46,12 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
'output'
,
'output'
,
'acceptance_expected_lms_courseware_link_clicked_events.csv'
'acceptance_expected_lms_courseware_link_clicked_events.csv'
)
)
expected
=
pandas
.
read_csv
(
expected_output_csv
,
parse_dates
=
True
)
def
convert_date
(
date_string
):
"""Convert date string to a date object."""
return
datetime
.
datetime
.
strptime
(
date_string
,
'
%
Y-
%
m-
%
d'
)
.
date
()
expected
=
pandas
.
read_csv
(
expected_output_csv
,
converters
=
{
'event_date'
:
convert_date
})
cursor
.
execute
(
cursor
.
execute
(
"SELECT * FROM {schema}.lms_courseware_link_clicked_events ORDER BY course_id, event_date"
"SELECT * FROM {schema}.lms_courseware_link_clicked_events ORDER BY course_id, event_date"
...
@@ -65,7 +70,8 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
...
@@ -65,7 +70,8 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
]
]
)
)
try
:
# A ValueError will be thrown if the column names don't match or the two data frames are not square.
for
frame
in
(
lms_courseware_link_clicked_events
,
expected
):
self
.
assertTrue
(
all
(
lms_courseware_link_clicked_events
==
expected
))
frame
.
sort
([
'record_number'
],
inplace
=
True
,
ascending
=
[
True
])
except
ValueError
:
frame
.
reset_index
(
drop
=
True
,
inplace
=
True
)
self
.
fail
(
"Expected and returned data frames have different shapes or labels."
)
self
.
assert_data_frames_equal
(
lms_courseware_link_clicked_events
,
expected
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment