Commit 6f6c7605 by Brian Wilson Committed by Andrew Zafft

Fix isort and pep8 errors.

There were two cases of "E731 do not assign a lambda expression, use a
def", but I just added E731 to the exclusion list rather than
changing that.
parent baac6d29
...@@ -28,6 +28,7 @@ except ImportError: ...@@ -28,6 +28,7 @@ except ImportError:
RETRY_LIMIT = 500 RETRY_LIMIT = 500
WAIT_DURATION = 5 WAIT_DURATION = 5
def wait_for_job(job, check_error_result=True): def wait_for_job(job, check_error_result=True):
counter = 0 counter = 0
while True: while True:
...@@ -138,6 +139,7 @@ class BigQueryTarget(luigi.Target): ...@@ -138,6 +139,7 @@ class BigQueryTarget(luigi.Target):
return len(query.rows) == 1 return len(query.rows) == 1
class BigQueryLoadDownstreamMixin(OverwriteOutputMixin): class BigQueryLoadDownstreamMixin(OverwriteOutputMixin):
dataset_id = luigi.Parameter() dataset_id = luigi.Parameter()
......
...@@ -346,7 +346,7 @@ class MysqlInsertTask(MysqlInsertTaskMixin, luigi.Task): ...@@ -346,7 +346,7 @@ class MysqlInsertTask(MysqlInsertTaskMixin, luigi.Task):
# commit only if both operations completed successfully. # commit only if both operations completed successfully.
connection.commit() connection.commit()
except: except Exception:
connection.rollback() connection.rollback()
raise raise
finally: finally:
......
...@@ -287,7 +287,7 @@ class EventLogSelectionMixin(EventLogSelectionDownstreamMixin): ...@@ -287,7 +287,7 @@ class EventLogSelectionMixin(EventLogSelectionDownstreamMixin):
date_string = event_time.split("T")[0] date_string = event_time.split("T")[0]
if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string: if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string:
## self.incr_counter('Event', 'Discard Outside Date Interval', 1) # Slow: self.incr_counter('Event', 'Discard Outside Date Interval', 1)
return None return None
return event, date_string return event, date_string
......
...@@ -471,7 +471,6 @@ ENABLE;""".format(schema=self.schema, table=self.table, column=column, expressio ...@@ -471,7 +471,6 @@ ENABLE;""".format(schema=self.schema, table=self.table, column=column, expressio
else: else:
raise raise
def run(self): def run(self):
""" """
Inserts data generated by the copy command into target table. Inserts data generated by the copy command into target table.
......
...@@ -11,8 +11,10 @@ from itertools import groupby ...@@ -11,8 +11,10 @@ from itertools import groupby
from operator import itemgetter from operator import itemgetter
import luigi import luigi
from luigi.hive import HiveQueryTask
from edx.analytics.tasks.common.mapreduce import MapReduceJobTask, MapReduceJobTaskMixin, MultiOutputMapReduceJobTask from edx.analytics.tasks.common.mapreduce import MapReduceJobTask, MapReduceJobTaskMixin, MultiOutputMapReduceJobTask
from edx.analytics.tasks.common.mysql_load import MysqlInsertTask
from edx.analytics.tasks.common.pathutil import EventLogSelectionDownstreamMixin, EventLogSelectionMixin from edx.analytics.tasks.common.pathutil import EventLogSelectionDownstreamMixin, EventLogSelectionMixin
from edx.analytics.tasks.insights.calendar_task import CalendarTableTask from edx.analytics.tasks.insights.calendar_task import CalendarTableTask
from edx.analytics.tasks.insights.database_imports import ( from edx.analytics.tasks.insights.database_imports import (
...@@ -21,19 +23,12 @@ from edx.analytics.tasks.insights.database_imports import ( ...@@ -21,19 +23,12 @@ from edx.analytics.tasks.insights.database_imports import (
from edx.analytics.tasks.insights.enrollments import CourseEnrollmentPartitionTask from edx.analytics.tasks.insights.enrollments import CourseEnrollmentPartitionTask
from edx.analytics.tasks.util import eventlog from edx.analytics.tasks.util import eventlog
from edx.analytics.tasks.util.hive import ( from edx.analytics.tasks.util.hive import (
BareHiveTableTask, BareHiveTableTask, HivePartition, HivePartitionTask, HiveTableFromQueryTask, HiveTableTask, WarehouseMixin,
HivePartition,
HiveTableFromQueryTask,
HiveTableTask,
WarehouseMixin,
HivePartitionTask,
hive_database_name hive_database_name
) )
from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin
from edx.analytics.tasks.util.record import DateField, IntegerField, Record, StringField
from edx.analytics.tasks.util.url import get_target_from_url, url_path_join from edx.analytics.tasks.util.url import get_target_from_url, url_path_join
from edx.analytics.tasks.common.mysql_load import MysqlInsertTask
from edx.analytics.tasks.util.record import Record, StringField, IntegerField, DateField
from luigi.hive import HiveQueryTask
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -46,7 +41,7 @@ class StudentEngagementIntervalTypeRecord(Record): ...@@ -46,7 +41,7 @@ class StudentEngagementIntervalTypeRecord(Record):
""" """
end_date = DateField(description='End date of the interval being analyzed.') end_date = DateField(description='End date of the interval being analyzed.')
course_id = StringField( nullable=False, length=255, description='Identifier of course run.') course_id = StringField(nullable=False, length=255, description='Identifier of course run.')
username = StringField( username = StringField(
nullable=False, nullable=False,
length=255, length=255,
......
...@@ -18,10 +18,12 @@ log = logging.getLogger(__name__) ...@@ -18,10 +18,12 @@ log = logging.getLogger(__name__)
class DatabaseImportMixin(SqoopImportMixin): class DatabaseImportMixin(SqoopImportMixin):
"""Provides parameters for accessing RDBMS databases and determining date to assign to Hive partition.""" """Provides parameters for accessing RDBMS databases and determining date to assign to Hive partition."""
import_date = luigi.DateParameter( import_date = luigi.DateParameter(
default=None, default=None,
description='Date to assign to Hive partition. Default is today\'s date, UTC.', description='Date to assign to Hive partition. Default is today\'s date, UTC.',
) )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(DatabaseImportMixin, self).__init__(*args, **kwargs) super(DatabaseImportMixin, self).__init__(*args, **kwargs)
......
...@@ -884,6 +884,7 @@ class EnrollmentByGenderMysqlTask(OverwriteHiveAndMysqlDownstreamMixin, CourseEn ...@@ -884,6 +884,7 @@ class EnrollmentByGenderMysqlTask(OverwriteHiveAndMysqlDownstreamMixin, CourseEn
('course_id', 'date'), ('course_id', 'date'),
] ]
class EnrollmentByBirthYearRecord(Record): class EnrollmentByBirthYearRecord(Record):
"""Summarizes a course's enrollments by birth year and date.""" """Summarizes a course's enrollments by birth year and date."""
date = StringField(length=255, nullable=False, description='Enrollment date.') date = StringField(length=255, nullable=False, description='Enrollment date.')
...@@ -1045,6 +1046,7 @@ class EnrollmentByEducationLevelTableTask(BareHiveTableTask): # pragma: no cove ...@@ -1045,6 +1046,7 @@ class EnrollmentByEducationLevelTableTask(BareHiveTableTask): # pragma: no cove
def columns(self): def columns(self):
return EnrollmentByEducationLevelRecord.get_hive_schema() return EnrollmentByEducationLevelRecord.get_hive_schema()
class EnrollmentByEducationLevelPartitionTask(HivePartitionTask): # pragma: no cover class EnrollmentByEducationLevelPartitionTask(HivePartitionTask): # pragma: no cover
"""Creates storage partition for the `course_enrollment_education_level_daily` Hive table.""" """Creates storage partition for the `course_enrollment_education_level_daily` Hive table."""
...@@ -1145,7 +1147,8 @@ class EnrollmentByEducationLevelDataTask(CourseEnrollmentDownstreamMixin, Overwr ...@@ -1145,7 +1147,8 @@ class EnrollmentByEducationLevelDataTask(CourseEnrollmentDownstreamMixin, Overwr
class EnrollmentByEducationLevelMysqlTask( class EnrollmentByEducationLevelMysqlTask(
OverwriteHiveAndMysqlDownstreamMixin, OverwriteHiveAndMysqlDownstreamMixin,
CourseEnrollmentDownstreamMixin, CourseEnrollmentDownstreamMixin,
MysqlInsertTask): MysqlInsertTask
):
""" """
Breakdown of enrollments by education level as reported by the user. Breakdown of enrollments by education level as reported by the user.
...@@ -1532,7 +1535,8 @@ class CourseMetaSummaryEnrollmentDataTask( ...@@ -1532,7 +1535,8 @@ class CourseMetaSummaryEnrollmentDataTask(
OverwriteHiveAndMysqlDownstreamMixin, OverwriteHiveAndMysqlDownstreamMixin,
CourseSummaryEnrollmentDownstreamMixin, CourseSummaryEnrollmentDownstreamMixin,
LoadInternalReportingCourseCatalogMixin, LoadInternalReportingCourseCatalogMixin,
OverwriteAwareHiveQueryDataTask): # pragma: no cover OverwriteAwareHiveQueryDataTask
): # pragma: no cover
""" """
Aggregates data from the various course_enrollment tables into `course_meta_summary_enrollment` Hive table. Aggregates data from the various course_enrollment tables into `course_meta_summary_enrollment` Hive table.
...@@ -1654,7 +1658,8 @@ class CourseMetaSummaryEnrollmentDataTask( ...@@ -1654,7 +1658,8 @@ class CourseMetaSummaryEnrollmentDataTask(
class CourseMetaSummaryEnrollmentIntoMysql( class CourseMetaSummaryEnrollmentIntoMysql(
OverwriteHiveAndMysqlDownstreamMixin, OverwriteHiveAndMysqlDownstreamMixin,
CourseSummaryEnrollmentDownstreamMixin, CourseSummaryEnrollmentDownstreamMixin,
MysqlInsertTask): MysqlInsertTask
):
""" """
Creates the course_meta_summary_enrollment sql table. Creates the course_meta_summary_enrollment sql table.
......
...@@ -33,7 +33,6 @@ except ImportError: ...@@ -33,7 +33,6 @@ except ImportError:
numpy = None # pylint: disable=invalid-name numpy = None # pylint: disable=invalid-name
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -181,7 +180,7 @@ class ModuleEngagementDataTask(EventLogSelectionMixin, OverwriteOutputMixin, Map ...@@ -181,7 +180,7 @@ class ModuleEngagementDataTask(EventLogSelectionMixin, OverwriteOutputMixin, Map
elif event_type == 'play_video': elif event_type == 'play_video':
entity_type = 'video' entity_type = 'video'
user_actions.append('viewed') user_actions.append('viewed')
entity_id = event_data.get('id', '').strip() # we have seen id values with leading newlines entity_id = event_data.get('id', '').strip() # We have seen id values with leading newlines.
elif event_type.startswith('edx.forum.'): elif event_type.startswith('edx.forum.'):
entity_type = 'discussion' entity_type = 'discussion'
if event_type.endswith('.created'): if event_type.endswith('.created'):
......
...@@ -6,9 +6,10 @@ from edx.analytics.tasks.insights.enrollments import CourseGradeByModeDataTask, ...@@ -6,9 +6,10 @@ from edx.analytics.tasks.insights.enrollments import CourseGradeByModeDataTask,
class TestCourseGradeByModeDataTask(TestCase): class TestCourseGradeByModeDataTask(TestCase):
def test_requires(self): def test_requires(self):
"""The CourseGradeByModeDataTask should require the CourseGradeByModePartitionTask # The CourseGradeByModeDataTask should require the CourseGradeByModePartitionTask
and the ImportPersistentCourseGradeTask.""" # and the ImportPersistentCourseGradeTask.
a_date = datetime(2017, 1, 1) a_date = datetime(2017, 1, 1)
the_warehouse_path = '/tmp/foo' the_warehouse_path = '/tmp/foo'
data_task = CourseGradeByModeDataTask(date=a_date, warehouse_path=the_warehouse_path) data_task = CourseGradeByModeDataTask(date=a_date, warehouse_path=the_warehouse_path)
......
...@@ -41,7 +41,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te ...@@ -41,7 +41,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
}, },
"time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP), "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
"ip": "127.0.0.1", "ip": "127.0.0.1",
"event": '{"id": "%s", "currentTime": 23.4398, "code": "87389iouhdfh", "duration": %s}' %( "event": '{"id": "%s", "currentTime": 23.4398, "code": "87389iouhdfh", "duration": %s}' % (
self.video_id, self.video_id,
self.video_duration self.video_duration
), ),
...@@ -60,7 +60,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te ...@@ -60,7 +60,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
}, },
"time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP), "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
"ip": "127.0.0.1", "ip": "127.0.0.1",
"event": '{"id": "%s", "currentTime": 28, "code": "87389iouhdfh", "duration": %s}' %( "event": '{"id": "%s", "currentTime": 28, "code": "87389iouhdfh", "duration": %s}' % (
self.video_id, self.video_id,
self.video_duration self.video_duration
), ),
...@@ -79,7 +79,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te ...@@ -79,7 +79,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
}, },
"time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP), "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
"ip": "127.0.0.1", "ip": "127.0.0.1",
"event": '{"id": "%s", "currentTime": 100, "code": "87389iouhdfh", "duration": %s}' %( "event": '{"id": "%s", "currentTime": 100, "code": "87389iouhdfh", "duration": %s}' % (
self.video_id, self.video_id,
self.video_duration self.video_duration
), ),
...@@ -98,7 +98,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te ...@@ -98,7 +98,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
}, },
"time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP), "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
"ip": "127.0.0.1", "ip": "127.0.0.1",
"event": '{"id": "%s", "old_time": 14, "new_time": 10, "code": "87389iouhdfh", "duration": %s}' %( "event": '{"id": "%s", "old_time": 14, "new_time": 10, "code": "87389iouhdfh", "duration": %s}' % (
self.video_id, self.video_id,
self.video_duration self.video_duration
), ),
......
...@@ -42,7 +42,6 @@ class UserActivityTask(OverwriteOutputMixin, WarehouseMixin, EventLogSelectionMi ...@@ -42,7 +42,6 @@ class UserActivityTask(OverwriteOutputMixin, WarehouseMixin, EventLogSelectionMi
output_root = None output_root = None
def mapper(self, line): def mapper(self, line):
value = self.get_event_and_date_string(line) value = self.get_event_and_date_string(line)
if value is None: if value is None:
......
...@@ -44,8 +44,12 @@ OVERRIDE_CONFIGURATION_FILE = 'override.cfg' ...@@ -44,8 +44,12 @@ OVERRIDE_CONFIGURATION_FILE = 'override.cfg'
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--additional-config', help='additional configuration file to be loaded after default/override', parser.add_argument(
default=None, action='append') '--additional-config',
help='additional configuration file to be loaded after default/override',
default=None,
action='append'
)
arguments, _extra_args = parser.parse_known_args() arguments, _extra_args = parser.parse_known_args()
# We get a cleaned command-line arguments list, free of the arguments *we* care about, since Luigi will throw # We get a cleaned command-line arguments list, free of the arguments *we* care about, since Luigi will throw
...@@ -76,8 +80,7 @@ def main(): ...@@ -76,8 +80,7 @@ def main():
else: else:
log.debug('Configuration file \'%s\' does not exist!', additional_config) log.debug('Configuration file \'%s\' does not exist!', additional_config)
# Tell luigi what dependencies to pass to the Hadoop nodes:
# Tell luigi what dependencies to pass to the Hadoop nodes
# - edx.analytics.tasks is used to load the pipeline code, since we cannot trust all will be loaded automatically. # - edx.analytics.tasks is used to load the pipeline code, since we cannot trust all will be loaded automatically.
# - boto is used for all direct interactions with s3. # - boto is used for all direct interactions with s3.
# - cjson is used for all parsing event logs. # - cjson is used for all parsing event logs.
...@@ -113,7 +116,7 @@ def get_cleaned_command_line_args(): ...@@ -113,7 +116,7 @@ def get_cleaned_command_line_args():
if v == '--additional-config': if v == '--additional-config':
# Clear out the flag, and clear out the value attached to it. # Clear out the flag, and clear out the value attached to it.
modified_arg_list[i] = None modified_arg_list[i] = None
modified_arg_list[i+1] = None modified_arg_list[i + 1] = None
return list(filter(lambda x: x is not None, modified_arg_list)) return list(filter(lambda x: x is not None, modified_arg_list))
......
...@@ -20,6 +20,7 @@ REMOTE_LOG_DIR = '/var/log/analytics-tasks' ...@@ -20,6 +20,7 @@ REMOTE_LOG_DIR = '/var/log/analytics-tasks'
REMOTE_CONFIG_DIR_BASE = 'config' REMOTE_CONFIG_DIR_BASE = 'config'
REMOTE_CODE_DIR_BASE = 'repo' REMOTE_CODE_DIR_BASE = 'repo'
def main(): def main():
"""Parse arguments and run the remote task.""" """Parse arguments and run the remote task."""
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
......
...@@ -28,7 +28,7 @@ class DatabaseService(object): ...@@ -28,7 +28,7 @@ class DatabaseService(object):
with closing(conn.cursor()) as cur: with closing(conn.cursor()) as cur:
try: try:
yield cur yield cur
except: except Exception:
conn.rollback() conn.rollback()
raise raise
else: else:
......
...@@ -34,7 +34,7 @@ class VerticaService(object): ...@@ -34,7 +34,7 @@ class VerticaService(object):
with closing(conn.cursor()) as cur: with closing(conn.cursor()) as cur:
try: try:
yield cur yield cur
except: except Exception:
conn.rollback() conn.rollback()
raise raise
else: else:
......
...@@ -73,7 +73,7 @@ def analyze_log_file(filename): ...@@ -73,7 +73,7 @@ def analyze_log_file(filename):
parser = LogFileParser(log_file, message_pattern=MESSAGE_START_PATTERN, message_factory=create_log_message) parser = LogFileParser(log_file, message_pattern=MESSAGE_START_PATTERN, message_factory=create_log_message)
try: try:
return analyze_log(parser) return analyze_log(parser)
except: except Exception:
sys.stderr.write('Exception on line {0}\n'.format(parser.line_number)) sys.stderr.write('Exception on line {0}\n'.format(parser.line_number))
raise raise
...@@ -188,9 +188,9 @@ def analyze_hadoop_job(starting_message, parser): ...@@ -188,9 +188,9 @@ def analyze_hadoop_job(starting_message, parser):
while message: while message:
message = parser.next_message() message = parser.next_message()
if ('Job complete:' in message.content or job_complete = ('Job complete:' in message.content or 'completed successfully' in message.content)
'completed successfully' in message.content or 'Ended Job = ' in message.content): if job_complete or 'Ended Job = ' in message.content:
if 'Job complete:' in message.content or 'completed successfully' in message.content: if job_complete:
move_measure = analyze_output_move(parser) move_measure = analyze_output_move(parser)
if move_measure: if move_measure:
yield move_measure yield move_measure
...@@ -254,5 +254,6 @@ def sqoop_parameter_parser(raw_params): ...@@ -254,5 +254,6 @@ def sqoop_parameter_parser(raw_params):
if table_param_match: if table_param_match:
return {'table': table_param_match.group('name')} return {'table': table_param_match.group('name')}
if __name__ == '__main__': if __name__ == '__main__':
analyze() analyze()
...@@ -150,6 +150,7 @@ def create_directory(output_dir): ...@@ -150,6 +150,7 @@ def create_directory(output_dir):
elif exc.errno != errno.EEXIST or os.path.isdir(output_dir): elif exc.errno != errno.EEXIST or os.path.isdir(output_dir):
raise raise
# These event_type values are known to have the possibility that the # These event_type values are known to have the possibility that the
# user_id in context be different from the user_id in event payload. # user_id in context be different from the user_id in event payload.
# In these cases, the context user_id represents the user performing the # In these cases, the context user_id represents the user performing the
......
...@@ -17,6 +17,7 @@ key_cache = {} # pylint: disable=invalid-name ...@@ -17,6 +17,7 @@ key_cache = {} # pylint: disable=invalid-name
DEFAULT_HADOOP_COUNTER_FUNC = lambda x: None DEFAULT_HADOOP_COUNTER_FUNC = lambda x: None
def get_key_from_target(key_file_target): def get_key_from_target(key_file_target):
"""Get the contents of the key file pointed to by the target""" """Get the contents of the key file pointed to by the target"""
......
...@@ -24,7 +24,7 @@ def copy_file_to_file(src_file, output_file, progress=None): ...@@ -24,7 +24,7 @@ def copy_file_to_file(src_file, output_file, progress=None):
if progress: if progress:
try: try:
progress(len(transfer_buffer)) progress(len(transfer_buffer))
except: # pylint: disable=bare-except except Exception: # pylint: disable=bare-except
pass pass
else: else:
break break
......
...@@ -419,6 +419,7 @@ def find_email_context(text, log_context=DEFAULT_LOG_CONTEXT): ...@@ -419,6 +419,7 @@ def find_email_context(text, log_context=DEFAULT_LOG_CONTEXT):
"""Development: Find context phrases that might indicate the presence of an email address nearby.""" """Development: Find context phrases that might indicate the presence of an email address nearby."""
return find_all_matches(EMAIL_CONTEXT, text, "EMAIL_CONTEXT", log_context) return find_all_matches(EMAIL_CONTEXT, text, "EMAIL_CONTEXT", log_context)
# Find names. # Find names.
NAME_CONTEXT = re.compile( NAME_CONTEXT = re.compile(
r'\b(hi|hello|sincerely|yours truly|Dear|Mr|Ms|Mrs|regards|cordially|best wishes|cheers|my name)\b', r'\b(hi|hello|sincerely|yours truly|Dear|Mr|Ms|Mrs|regards|cordially|best wishes|cheers|my name)\b',
......
...@@ -375,7 +375,7 @@ class Record(object): ...@@ -375,7 +375,7 @@ class Record(object):
schema = [] schema = []
for field_name, field_obj in cls.get_fields().items(): for field_name, field_obj in cls.get_fields().items():
mode = 'NULLABLE' if field_obj.nullable else 'REQUIRED' mode = 'NULLABLE' if field_obj.nullable else 'REQUIRED'
description=getattr(field_obj, 'description', None) description = getattr(field_obj, 'description', None)
schema.append(SchemaField(field_name, field_obj.bigquery_type, description=description, mode=mode)) schema.append(SchemaField(field_name, field_obj.bigquery_type, description=description, mode=mode))
return schema return schema
......
...@@ -662,8 +662,8 @@ class CourseDataTask(BaseCourseRunMetadataTask): ...@@ -662,8 +662,8 @@ class CourseDataTask(BaseCourseRunMetadataTask):
marketing_url=course_run.get('marketing_url'), marketing_url=course_run.get('marketing_url'),
min_effort=course_run.get('min_effort'), min_effort=course_run.get('min_effort'),
max_effort=course_run.get('max_effort'), max_effort=course_run.get('max_effort'),
announcement_time = DateTimeField().deserialize_from_string(course_run.get('announcement')), announcement_time=DateTimeField().deserialize_from_string(course_run.get('announcement')),
reporting_type = course_run.get('reporting_type'), reporting_type=course_run.get('reporting_type'),
) )
output_file.write(record.to_separated_values(sep=u'\t')) output_file.write(record.to_separated_values(sep=u'\t'))
output_file.write('\n') output_file.write('\n')
......
...@@ -871,7 +871,6 @@ class TrackingEventRecordDataTask(EventLogSelectionMixin, BaseEventRecordDataTas ...@@ -871,7 +871,6 @@ class TrackingEventRecordDataTask(EventLogSelectionMixin, BaseEventRecordDataTas
project_name = self.PROJECT_NAME project_name = self.PROJECT_NAME
event_dict = {} event_dict = {}
self.add_calculated_event_entry(event_dict, 'input_file', self.get_map_input_file()) self.add_calculated_event_entry(event_dict, 'input_file', self.get_map_input_file())
self.add_calculated_event_entry(event_dict, 'event_type', event_type) self.add_calculated_event_entry(event_dict, 'event_type', event_type)
...@@ -1233,7 +1232,6 @@ class SegmentEventRecordDataTask(SegmentEventLogSelectionMixin, BaseEventRecordD ...@@ -1233,7 +1232,6 @@ class SegmentEventRecordDataTask(SegmentEventLogSelectionMixin, BaseEventRecordD
self.add_calculated_event_entry(event_dict, 'event_source', event_source) self.add_calculated_event_entry(event_dict, 'event_source', event_source)
self.add_calculated_event_entry(event_dict, 'event_category', event_category) self.add_calculated_event_entry(event_dict, 'event_category', event_category)
event_mapping = self.get_event_mapping() event_mapping = self.get_event_mapping()
self.add_event_info(event_dict, event_mapping, event) self.add_event_info(event_dict, event_mapping, event)
......
...@@ -102,7 +102,7 @@ class TestCourseSubjects(TestCase): ...@@ -102,7 +102,7 @@ class TestCourseSubjects(TestCase):
def test_course_no_subjects(self): def test_course_no_subjects(self):
"""With a course with no subjects, we expect a row with NULLs.""" """With a course with no subjects, we expect a row with NULLs."""
course_with_no_subjects = [{"course_runs": [ {"key": "foo"} ], "subjects": [{}]}] course_with_no_subjects = [{"course_runs": [{"key": "foo"}], "subjects": [{}]}]
data = self.run_task(course_with_no_subjects) data = self.run_task(course_with_no_subjects)
# We expect an entry in the list of courses, since there is a course in the catalog. # We expect an entry in the list of courses, since there is a course in the catalog.
self.assertEquals(data.shape[0], 1) self.assertEquals(data.shape[0], 1)
...@@ -120,7 +120,7 @@ class TestCourseSubjects(TestCase): ...@@ -120,7 +120,7 @@ class TestCourseSubjects(TestCase):
"""With a course with one subject, we expect to see that subject.""" """With a course with one subject, we expect to see that subject."""
input_data = [ input_data = [
{ {
"course_runs": [ {"key": "foo"} ], "course_runs": [{"key": "foo"}],
"subjects": [{"slug": "testing", "name": "Testing"}] "subjects": [{"slug": "testing", "name": "Testing"}]
} }
] ]
...@@ -141,7 +141,7 @@ class TestCourseSubjects(TestCase): ...@@ -141,7 +141,7 @@ class TestCourseSubjects(TestCase):
"""With a course with two subjects, we expect to see both of those subjects.""" """With a course with two subjects, we expect to see both of those subjects."""
input_data = [ input_data = [
{ {
"course_runs": [ {"key": "foo"} ], "course_runs": [{"key": "foo"}],
"subjects": [ "subjects": [
{"slug": "testing", "name": "Testing"}, {"slug": "testing", "name": "Testing"},
{"slug": "bar", "name": "Bar"}, {"slug": "bar", "name": "Bar"},
...@@ -174,7 +174,7 @@ class TestCourseSubjects(TestCase): ...@@ -174,7 +174,7 @@ class TestCourseSubjects(TestCase):
"""With multiple courses, we expect to see subject information for all of them.""" """With multiple courses, we expect to see subject information for all of them."""
input_data = [ input_data = [
{ {
"course_runs": [ {"key": "foo"}, {"key": "bar"} ], "course_runs": [{"key": "foo"}, {"key": "bar"}],
"subjects": [{"slug": "testing", "name": "Testing"}] "subjects": [{"slug": "testing", "name": "Testing"}]
}, },
] ]
...@@ -202,11 +202,11 @@ class TestCourseSubjects(TestCase): ...@@ -202,11 +202,11 @@ class TestCourseSubjects(TestCase):
"""With multiple courses, we expect to see subject information for all of them.""" """With multiple courses, we expect to see subject information for all of them."""
input_data = [ input_data = [
{ {
"course_runs": [ {"key": "foo"} ], "course_runs": [{"key": "foo"}],
"subjects": [{"slug": "testing", "name": "Testing"}] "subjects": [{"slug": "testing", "name": "Testing"}]
}, },
{ {
"course_runs": [ {"key": "bar"} ], "course_runs": [{"key": "bar"}],
"subjects": [{"slug": "testing", "name": "Testing"}] "subjects": [{"slug": "testing", "name": "Testing"}]
} }
] ]
...@@ -241,7 +241,7 @@ class TestCourseSubjects(TestCase): ...@@ -241,7 +241,7 @@ class TestCourseSubjects(TestCase):
"subjects": [{"slug": "testing", "name": "Testing"}] "subjects": [{"slug": "testing", "name": "Testing"}]
}, },
{ {
"course_runs": [ {"key": "bar"} ], "course_runs": [{"key": "bar"}],
"subjects": [{"slug": "testing"}] "subjects": [{"slug": "testing"}]
} }
] ]
......
...@@ -91,6 +91,7 @@ class BaseTrackingEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMi ...@@ -91,6 +91,7 @@ class BaseTrackingEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMi
) )
self.task.init_local() self.task.init_local()
@ddt @ddt
class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase): class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase):
"""Test class for emission of tracking log events in EventRecord format.""" """Test class for emission of tracking log events in EventRecord format."""
...@@ -170,6 +171,7 @@ class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittes ...@@ -170,6 +171,7 @@ class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittes
expected_value = EventRecord(**expected_dict).to_separated_values() expected_value = EventRecord(**expected_dict).to_separated_values()
self.assert_single_map_output(event, expected_key, expected_value) self.assert_single_map_output(event, expected_key, expected_value)
@ddt @ddt
class TrackingJsonEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase): class TrackingJsonEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase):
"""Test class for emission of tracking log events in JsonEventRecord format.""" """Test class for emission of tracking log events in JsonEventRecord format."""
......
...@@ -99,5 +99,5 @@ mapreduce.engine = ...@@ -99,5 +99,5 @@ mapreduce.engine =
emu = edx.analytics.tasks.common.mapreduce:EmulatedMapReduceJobRunner emu = edx.analytics.tasks.common.mapreduce:EmulatedMapReduceJobRunner
[pycodestyle] [pycodestyle]
ignore=E501 ignore=E501,E731
max_line_length=119 max_line_length=119
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment