Commit 39dbbe00 by Hassan Committed by GitHub

Merge pull request #396 from edx/hassan/acceptance-tests-validation

Fixed acceptance tests validation.
parents 795694d5 fda68608
...@@ -5,6 +5,7 @@ import logging ...@@ -5,6 +5,7 @@ import logging
import os import os
import shutil import shutil
import unittest import unittest
import csv
from luigi.s3 import S3Client from luigi.s3 import S3Client
import pandas import pandas
...@@ -112,6 +113,20 @@ def modify_target_for_local_server(target): ...@@ -112,6 +113,20 @@ def modify_target_for_local_server(target):
return target return target
def coerce_columns_to_string(row):
# Vertica response includes datatypes in some columns i-e. datetime, Decimal etc. so convert
# them into string before comparison with expected output.
return [str(x) for x in row]
def read_csv_fixture_as_list(fixture_file_path):
with open(fixture_file_path) as fixture_file:
reader = csv.reader(fixture_file)
next(reader) # skip header
fixture_data = list(reader)
return fixture_data
class AcceptanceTestCase(unittest.TestCase): class AcceptanceTestCase(unittest.TestCase):
acceptance = 1 acceptance = 1
......
...@@ -2,4 +2,4 @@ user_id,user_year_of_birth,user_level_of_education,user_gender,user_email,user_u ...@@ -2,4 +2,4 @@ user_id,user_year_of_birth,user_level_of_education,user_gender,user_email,user_u
1,1984,a,m,honor@example.com,honor,2014-06-27 16:02:38,UNKNOWN 1,1984,a,m,honor@example.com,honor,2014-06-27 16:02:38,UNKNOWN
2,1975,b,m,audit@example.com,audit,2014-06-27 16:02:39,IE 2,1975,b,m,audit@example.com,audit,2014-06-27 16:02:39,IE
3,2000,b,,verified@example.com,verified,2014-06-27 16:02:41,UNKNOWN 3,2000,b,,verified@example.com,verified,2014-06-27 16:02:41,UNKNOWN
4,2000,,,staff@example.com,staff,2014-06-27 16:02:43,TH 4,2000,,None,staff@example.com,staff,2014-06-27 16:02:43,TH
\ No newline at end of file \ No newline at end of file
...@@ -4,6 +4,7 @@ End to end test of the course catalog tasks. ...@@ -4,6 +4,7 @@ End to end test of the course catalog tasks.
import logging import logging
import os import os
import datetime
import pandas import pandas
...@@ -50,16 +51,24 @@ class CourseSubjectsAcceptanceTest(BaseCourseCatalogAcceptanceTest): ...@@ -50,16 +51,24 @@ class CourseSubjectsAcceptanceTest(BaseCourseCatalogAcceptanceTest):
def validate_output(self): def validate_output(self):
"""Validates the output, comparing it to a csv of all the expected output from this workflow.""" """Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns = ['row_number', 'course_id', 'date', 'subject_uri', 'subject_title', 'subject_language']
with self.vertica.cursor() as cursor: with self.vertica.cursor() as cursor:
expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_subjects_for_acceptance.csv') expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_subjects_for_acceptance.csv')
expected = pandas.read_csv(expected_output_csv, parse_dates=True)
def convert_date(date_string):
"""Convert date string to a date object."""
return datetime.datetime.strptime(date_string, '%Y-%m-%d').date()
expected = pandas.read_csv(expected_output_csv, converters={'date': convert_date})
cursor.execute("SELECT * FROM {schema}.d_course_subjects;".format(schema=self.vertica.schema_name)) cursor.execute("SELECT * FROM {schema}.d_course_subjects;".format(schema=self.vertica.schema_name))
database_subjects = cursor.fetchall() database_subjects = cursor.fetchall()
subjects = pandas.DataFrame(database_subjects, columns=['row_number', 'course_id', 'date', 'subject_uri', subjects = pandas.DataFrame(database_subjects, columns=columns)
'subject_title', 'subject_language'])
for frame in (subjects, expected):
frame.sort(['row_number'], inplace=True, ascending=[True])
frame.reset_index(drop=True, inplace=True)
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square. self.assert_data_frames_equal(subjects, expected)
self.assertTrue(all(subjects == expected))
except ValueError:
self.fail("Expected and returned data frames have different shapes or labels.")
...@@ -10,7 +10,10 @@ import luigi ...@@ -10,7 +10,10 @@ import luigi
import pandas import pandas
from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas.util.testing import assert_frame_equal, assert_series_equal
from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase, when_vertica_available, when_vertica_not_available from edx.analytics.tasks.tests.acceptance import (
AcceptanceTestCase, when_vertica_available, when_vertica_not_available, coerce_columns_to_string,
read_csv_fixture_as_list
)
from edx.analytics.tasks.util.url import url_path_join from edx.analytics.tasks.util.url import url_path_join
from edx.analytics.tasks.warehouse.financial.reconcile import LoadInternalReportingOrderTransactionsToWarehouse from edx.analytics.tasks.warehouse.financial.reconcile import LoadInternalReportingOrderTransactionsToWarehouse
...@@ -58,27 +61,24 @@ class FinancialReportsAcceptanceTest(AcceptanceTestCase): ...@@ -58,27 +61,24 @@ class FinancialReportsAcceptanceTest(AcceptanceTestCase):
with self.vertica.cursor() as cursor: with self.vertica.cursor() as cursor:
expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_financial_report.csv') expected_output_csv = os.path.join(self.data_dir, 'output', 'expected_financial_report.csv')
expected = pandas.read_csv(expected_output_csv, parse_dates=True)
expected_output_data = read_csv_fixture_as_list(expected_output_csv)
expected = pandas.DataFrame(expected_output_data, columns=columns)
cursor.execute("SELECT {columns} FROM {schema}.f_orderitem_transactions".format( cursor.execute("SELECT {columns} FROM {schema}.f_orderitem_transactions".format(
columns=','.join(columns), columns=','.join(columns),
schema=self.vertica.schema_name schema=self.vertica.schema_name
)) ))
response = cursor.fetchall() response = cursor.fetchall()
f_orderitem_transactions = pandas.DataFrame(response, columns=columns)
f_orderitem_transactions = pandas.DataFrame(map(coerce_columns_to_string, response), columns=columns)
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square.
self.assertTrue(all(f_orderitem_transactions == expected)) for frame in (f_orderitem_transactions, expected):
except ValueError: frame.sort(['payment_ref_id', 'transaction_type'], inplace=True, ascending=[True, False])
buf = StringIO() frame.reset_index(drop=True, inplace=True)
f_orderitem_transactions.to_csv(buf)
print 'Actual:' self.assert_data_frames_equal(f_orderitem_transactions, expected)
print buf.getvalue()
buf.seek(0)
expected.to_csv(buf)
print 'Expected:'
print buf.getvalue()
self.fail("Expected and returned data frames have different shapes or labels.")
@when_vertica_not_available @when_vertica_not_available
def test_end_to_end_without_vertica(self): def test_end_to_end_without_vertica(self):
......
...@@ -7,7 +7,9 @@ import os ...@@ -7,7 +7,9 @@ import os
import pandas import pandas
from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase, when_vertica_available from edx.analytics.tasks.tests.acceptance import (
AcceptanceTestCase, when_vertica_available, coerce_columns_to_string, read_csv_fixture_as_list
)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -33,18 +35,22 @@ class InternalReportingCertificateLoadAcceptanceTest(AcceptanceTestCase): ...@@ -33,18 +35,22 @@ class InternalReportingCertificateLoadAcceptanceTest(AcceptanceTestCase):
def validate_output(self): def validate_output(self):
"""Validates the output, comparing it to a csv of all the expected output from this workflow.""" """Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns = ['user_id', 'course_id', 'is_certified', 'certificate_mode', 'final_grade', 'has_passed',
'created_date', 'modified_date']
with self.vertica.cursor() as cursor: with self.vertica.cursor() as cursor:
expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user_course_certificate.csv') expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user_course_certificate.csv')
expected = pandas.read_csv(expected_output_csv, parse_dates=True)
expected_output_data = read_csv_fixture_as_list(expected_output_csv)
expected = pandas.DataFrame(expected_output_data, columns=columns)
cursor.execute("SELECT * FROM {schema}.d_user_course_certificate".format(schema=self.vertica.schema_name)) cursor.execute("SELECT * FROM {schema}.d_user_course_certificate".format(schema=self.vertica.schema_name))
response = cursor.fetchall() response = cursor.fetchall()
d_user_course_certificate = pandas.DataFrame(response, columns=[ d_user_course_certificate = pandas.DataFrame(map(coerce_columns_to_string, response), columns=columns)
'user_id', 'course_id', 'is_certified', 'certificate_mode',
'final_grade', 'has_passed', 'created_date', 'modified_date', for frame in (d_user_course_certificate, expected):
]) frame.sort(['user_id'], inplace=True, ascending=[True])
frame.reset_index(drop=True, inplace=True)
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square.
self.assertTrue(all(d_user_course_certificate == expected)) self.assert_data_frames_equal(d_user_course_certificate, expected)
except ValueError:
self.fail("Expected and returned data frames have different shapes or labels.")
...@@ -45,7 +45,8 @@ class InternalReportingCountryLoadAcceptanceTest(AcceptanceTestCase): ...@@ -45,7 +45,8 @@ class InternalReportingCountryLoadAcceptanceTest(AcceptanceTestCase):
response = cursor.fetchall() response = cursor.fetchall()
d_country = pandas.DataFrame(response, columns=['country_name', 'user_last_location_country_code']) d_country = pandas.DataFrame(response, columns=['country_name', 'user_last_location_country_code'])
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square. for frame in (d_country, expected):
self.assertTrue(all(d_country == expected)) frame.sort(['country_name'], inplace=True, ascending=[True])
except ValueError: frame.reset_index(drop=True, inplace=True)
self.fail("Expected and returned data frames have different shapes or labels.")
self.assert_data_frames_equal(d_country, expected)
...@@ -8,7 +8,9 @@ import os ...@@ -8,7 +8,9 @@ import os
import pandas import pandas
from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase, when_vertica_available from edx.analytics.tasks.tests.acceptance import (
AcceptanceTestCase, when_vertica_available, coerce_columns_to_string, read_csv_fixture_as_list
)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -18,8 +20,8 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase): ...@@ -18,8 +20,8 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
"""End-to-end test of the workflow to load the internal reporting warehouse's user table.""" """End-to-end test of the workflow to load the internal reporting warehouse's user table."""
INPUT_FILE = 'location_by_course_tracking.log' INPUT_FILE = 'location_by_course_tracking.log'
INTERVAL = '2014-07-21-2014-07-21' INTERVAL = '2014-07-21-2014-07-22'
DATE = '2014-07-21' DATE = '2014-07-22'
def setUp(self): def setUp(self):
super(InternalReportingUserLoadAcceptanceTest, self).setUp() super(InternalReportingUserLoadAcceptanceTest, self).setUp()
...@@ -51,18 +53,22 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase): ...@@ -51,18 +53,22 @@ class InternalReportingUserLoadAcceptanceTest(AcceptanceTestCase):
def validate_output(self): def validate_output(self):
"""Validates the output, comparing it to a csv of all the expected output from this workflow.""" """Validates the output, comparing it to a csv of all the expected output from this workflow."""
columns = ['user_id', 'user_year_of_birth', 'user_level_of_education', 'user_gender', 'user_email',
'user_username', 'user_account_creation_time', 'user_last_location_country_code']
with self.vertica.cursor() as cursor: with self.vertica.cursor() as cursor:
expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user.csv') expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user.csv')
expected = pandas.read_csv(expected_output_csv, parse_dates=True)
expected_output_data = read_csv_fixture_as_list(expected_output_csv)
expected = pandas.DataFrame(expected_output_data, columns=columns)
cursor.execute("SELECT * FROM {schema}.d_user".format(schema=self.vertica.schema_name)) cursor.execute("SELECT * FROM {schema}.d_user".format(schema=self.vertica.schema_name))
response = cursor.fetchall() response = cursor.fetchall()
d_user = pandas.DataFrame(response, columns=['user_id', 'user_year_of_birth', 'user_level_of_education', d_user = pandas.DataFrame(map(coerce_columns_to_string, response), columns=columns)
'user_gender', 'user_email', 'user_username',
'user_account_creation_time', for frame in (d_user, expected):
'user_last_location_country_code']) frame.sort(['user_id'], inplace=True, ascending=[True])
frame.reset_index(drop=True, inplace=True)
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square.
self.assertTrue(all(d_user == expected)) self.assert_data_frames_equal(d_user, expected)
except ValueError:
self.fail("Expected and returned data frames have different shapes or labels.")
...@@ -3,7 +3,7 @@ End-to-end test of the workflow to load the warehouse's lms_courseware_link_clic ...@@ -3,7 +3,7 @@ End-to-end test of the workflow to load the warehouse's lms_courseware_link_clic
""" """
from datetime import date import datetime
import os import os
import logging import logging
...@@ -23,7 +23,7 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase): ...@@ -23,7 +23,7 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
""" """
INPUT_FILE = 'lms_courseware_link_clicked_acceptance_tracking.log' INPUT_FILE = 'lms_courseware_link_clicked_acceptance_tracking.log'
DATE = date(2016, 6, 13) DATE = datetime.date(2016, 6, 13)
@when_vertica_available @when_vertica_available
def test_lms_courseware_link_clicked(self): def test_lms_courseware_link_clicked(self):
...@@ -46,7 +46,12 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase): ...@@ -46,7 +46,12 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
'output', 'output',
'acceptance_expected_lms_courseware_link_clicked_events.csv' 'acceptance_expected_lms_courseware_link_clicked_events.csv'
) )
expected = pandas.read_csv(expected_output_csv, parse_dates=True)
def convert_date(date_string):
"""Convert date string to a date object."""
return datetime.datetime.strptime(date_string, '%Y-%m-%d').date()
expected = pandas.read_csv(expected_output_csv, converters={'event_date': convert_date})
cursor.execute( cursor.execute(
"SELECT * FROM {schema}.lms_courseware_link_clicked_events ORDER BY course_id, event_date" "SELECT * FROM {schema}.lms_courseware_link_clicked_events ORDER BY course_id, event_date"
...@@ -65,7 +70,8 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase): ...@@ -65,7 +70,8 @@ class LmsCoursewareLinkClickedAcceptanceTest(AcceptanceTestCase):
] ]
) )
try: # A ValueError will be thrown if the column names don't match or the two data frames are not square. for frame in (lms_courseware_link_clicked_events, expected):
self.assertTrue(all(lms_courseware_link_clicked_events == expected)) frame.sort(['record_number'], inplace=True, ascending=[True])
except ValueError: frame.reset_index(drop=True, inplace=True)
self.fail("Expected and returned data frames have different shapes or labels.")
self.assert_data_frames_equal(lms_courseware_link_clicked_events, expected)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment