Fix isort and pep8 errors.

There were two cases of "E731 do not assign a lambda expression, use a def", but I just added E731 to the exclusion list rather than changing that.

Fix isort and pep8 errors.
There were two cases of "E731 do not assign a lambda expression, use a def", but I just added E731 to the exclusion list rather than changing that.
6f6c7605 · Brian Wilson · Andrew Zafft · baac6d29 · 6f6c7605 · 6f6c7605
Commit 6f6c7605 authored Dec 21, 2017 by Brian Wilson Committed by Andrew Zafft Dec 27, 2017
29 changed files
--- a/edx/analytics/tasks/common/bigquery_load.py
+++ b/edx/analytics/tasks/common/bigquery_load.py
@@ -28,6 +28,7 @@ except ImportError:
 RETRY_LIMIT = 500
 WAIT_DURATION = 5
 def wait_for_job(job, check_error_result=True):
    counter = 0
    while True:
@@ -138,6 +139,7 @@ class BigQueryTarget(luigi.Target):
        return len(query.rows) == 1
 class BigQueryLoadDownstreamMixin(OverwriteOutputMixin):
    dataset_id = luigi.Parameter()

--- a/edx/analytics/tasks/common/mysql_load.py
+++ b/edx/analytics/tasks/common/mysql_load.py
@@ -346,7 +346,7 @@ class MysqlInsertTask(MysqlInsertTaskMixin, luigi.Task):
            # commit only if both operations completed successfully.
            connection.commit()
-        except:
+        except Exception:
            connection.rollback()
            raise
        finally:

--- a/edx/analytics/tasks/common/pathutil.py
+++ b/edx/analytics/tasks/common/pathutil.py
@@ -287,7 +287,7 @@ class EventLogSelectionMixin(EventLogSelectionDownstreamMixin):
        date_string = event_time.split("T")[0]
        if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string:
-            ## self.incr_counter('Event', 'Discard Outside Date Interval', 1)
+            # Slow: self.incr_counter('Event', 'Discard Outside Date Interval', 1)
            return None
        return event, date_string

--- a/edx/analytics/tasks/common/vertica_load.py
+++ b/edx/analytics/tasks/common/vertica_load.py
@@ -471,7 +471,6 @@ ENABLE;""".format(schema=self.schema, table=self.table, column=column, expressio
                else:
                    raise
    def run(self):
        """
        Inserts data generated by the copy command into target table.

--- a/edx/analytics/tasks/data_api/student_engagement.py
+++ b/edx/analytics/tasks/data_api/student_engagement.py
@@ -11,8 +11,10 @@ from itertools import groupby
 from operator import itemgetter
 import luigi
+from luigi.hive import HiveQueryTask
 from edx.analytics.tasks.common.mapreduce import MapReduceJobTask, MapReduceJobTaskMixin, MultiOutputMapReduceJobTask
+from edx.analytics.tasks.common.mysql_load import MysqlInsertTask
 from edx.analytics.tasks.common.pathutil import EventLogSelectionDownstreamMixin, EventLogSelectionMixin
 from edx.analytics.tasks.insights.calendar_task import CalendarTableTask
 from edx.analytics.tasks.insights.database_imports import (
@@ -21,19 +23,12 @@ from edx.analytics.tasks.insights.database_imports import (
 from edx.analytics.tasks.insights.enrollments import CourseEnrollmentPartitionTask
 from edx.analytics.tasks.util import eventlog
 from edx.analytics.tasks.util.hive import (
-    BareHiveTableTask,
+    BareHiveTableTask, HivePartition, HivePartitionTask, HiveTableFromQueryTask, HiveTableTask, WarehouseMixin,
-    HivePartition,
-    HiveTableFromQueryTask,
-    HiveTableTask,
-    WarehouseMixin,
-    HivePartitionTask,
    hive_database_name
 )
 from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin
+from edx.analytics.tasks.util.record import DateField, IntegerField, Record, StringField
 from edx.analytics.tasks.util.url import get_target_from_url, url_path_join
-from edx.analytics.tasks.common.mysql_load import MysqlInsertTask
-from edx.analytics.tasks.util.record import Record, StringField, IntegerField, DateField
-from luigi.hive import HiveQueryTask
 log = logging.getLogger(__name__)
@@ -46,7 +41,7 @@ class StudentEngagementIntervalTypeRecord(Record):
    """
    end_date = DateField(description='End date of the interval being analyzed.')
-    course_id = StringField( nullable=False, length=255, description='Identifier of course run.')
+    course_id = StringField(nullable=False, length=255, description='Identifier of course run.')
    username = StringField(
        nullable=False,
        length=255,

--- a/edx/analytics/tasks/insights/database_imports.py
+++ b/edx/analytics/tasks/insights/database_imports.py
@@ -18,10 +18,12 @@ log = logging.getLogger(__name__)
 class DatabaseImportMixin(SqoopImportMixin):
    """Provides parameters for accessing RDBMS databases and determining date to assign to Hive partition."""
    import_date = luigi.DateParameter(
        default=None,
        description='Date to assign to Hive partition.  Default is today\'s date, UTC.',
    )
    def __init__(self, *args, **kwargs):
        super(DatabaseImportMixin, self).__init__(*args, **kwargs)

--- a/edx/analytics/tasks/insights/enrollments.py
+++ b/edx/analytics/tasks/insights/enrollments.py
@@ -884,6 +884,7 @@ class EnrollmentByGenderMysqlTask(OverwriteHiveAndMysqlDownstreamMixin, CourseEn
            ('course_id', 'date'),
        ]
 class EnrollmentByBirthYearRecord(Record):
    """Summarizes a course's enrollments by birth year and date."""
    date = StringField(length=255, nullable=False, description='Enrollment date.')
@@ -1045,6 +1046,7 @@ class EnrollmentByEducationLevelTableTask(BareHiveTableTask):  # pragma: no cove
    def columns(self):
        return EnrollmentByEducationLevelRecord.get_hive_schema()
 class EnrollmentByEducationLevelPartitionTask(HivePartitionTask):  # pragma: no cover
    """Creates storage partition for the `course_enrollment_education_level_daily` Hive table."""
@@ -1145,7 +1147,8 @@ class EnrollmentByEducationLevelDataTask(CourseEnrollmentDownstreamMixin, Overwr
 class EnrollmentByEducationLevelMysqlTask(
    OverwriteHiveAndMysqlDownstreamMixin,
    CourseEnrollmentDownstreamMixin,
-    MysqlInsertTask):
+    MysqlInsertTask
+):
    """
    Breakdown of enrollments by education level as reported by the user.
@@ -1532,7 +1535,8 @@ class CourseMetaSummaryEnrollmentDataTask(
    OverwriteHiveAndMysqlDownstreamMixin,
    CourseSummaryEnrollmentDownstreamMixin,
    LoadInternalReportingCourseCatalogMixin,
-    OverwriteAwareHiveQueryDataTask):  # pragma: no cover
+    OverwriteAwareHiveQueryDataTask
+):  # pragma: no cover
    """
    Aggregates data from the various course_enrollment tables into `course_meta_summary_enrollment` Hive table.
@@ -1654,7 +1658,8 @@ class CourseMetaSummaryEnrollmentDataTask(
 class CourseMetaSummaryEnrollmentIntoMysql(
    OverwriteHiveAndMysqlDownstreamMixin,
    CourseSummaryEnrollmentDownstreamMixin,
-    MysqlInsertTask):
+    MysqlInsertTask
+):
    """
    Creates the course_meta_summary_enrollment sql table.

--- a/edx/analytics/tasks/insights/module_engagement.py
+++ b/edx/analytics/tasks/insights/module_engagement.py
@@ -33,7 +33,6 @@ except ImportError:
    numpy = None  # pylint: disable=invalid-name
 log = logging.getLogger(__name__)
@@ -181,7 +180,7 @@ class ModuleEngagementDataTask(EventLogSelectionMixin, OverwriteOutputMixin, Map
        elif event_type == 'play_video':
            entity_type = 'video'
            user_actions.append('viewed')
-            entity_id = event_data.get('id', '').strip() # we have seen id values with leading newlines
+            entity_id = event_data.get('id', '').strip()  # We have seen id values with leading newlines.
        elif event_type.startswith('edx.forum.'):
            entity_type = 'discussion'
            if event_type.endswith('.created'):

--- a/edx/analytics/tasks/insights/tests/test_grades.py
+++ b/edx/analytics/tasks/insights/tests/test_grades.py
@@ -6,9 +6,10 @@ from edx.analytics.tasks.insights.enrollments import CourseGradeByModeDataTask, 
 class TestCourseGradeByModeDataTask(TestCase):
    def test_requires(self):
-        """The CourseGradeByModeDataTask should require the CourseGradeByModePartitionTask 
+        # The CourseGradeByModeDataTask should require the CourseGradeByModePartitionTask
-        and the ImportPersistentCourseGradeTask."""
+        # and the ImportPersistentCourseGradeTask.
        a_date = datetime(2017, 1, 1)
        the_warehouse_path = '/tmp/foo'
        data_task = CourseGradeByModeDataTask(date=a_date, warehouse_path=the_warehouse_path)

--- a/edx/analytics/tasks/insights/tests/test_video.py
+++ b/edx/analytics/tasks/insights/tests/test_video.py
@@ -41,7 +41,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
                },
                "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
                "ip": "127.0.0.1",
-                "event": '{"id": "%s", "currentTime": 23.4398, "code": "87389iouhdfh", "duration": %s}' %(
+                "event": '{"id": "%s", "currentTime": 23.4398, "code": "87389iouhdfh", "duration": %s}' % (
                    self.video_id,
                    self.video_duration
                ),
@@ -60,7 +60,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
                },
                "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
                "ip": "127.0.0.1",
-                "event": '{"id": "%s", "currentTime": 28, "code": "87389iouhdfh", "duration": %s}' %(
+                "event": '{"id": "%s", "currentTime": 28, "code": "87389iouhdfh", "duration": %s}' % (
                    self.video_id,
                    self.video_duration
                ),
@@ -79,7 +79,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
                },
                "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
                "ip": "127.0.0.1",
-                "event": '{"id": "%s", "currentTime": 100, "code": "87389iouhdfh", "duration": %s}' %(
+                "event": '{"id": "%s", "currentTime": 100, "code": "87389iouhdfh", "duration": %s}' % (
                    self.video_id,
                    self.video_duration
                ),
@@ -98,7 +98,7 @@ class UserVideoViewingTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMixin, Te
                },
                "time": "{0}+00:00".format(self.DEFAULT_TIMESTAMP),
                "ip": "127.0.0.1",
-                "event": '{"id": "%s", "old_time": 14, "new_time": 10, "code": "87389iouhdfh", "duration": %s}' %(
+                "event": '{"id": "%s", "old_time": 14, "new_time": 10, "code": "87389iouhdfh", "duration": %s}' % (
                    self.video_id,
                    self.video_duration
                ),

--- a/edx/analytics/tasks/insights/user_activity.py
+++ b/edx/analytics/tasks/insights/user_activity.py
@@ -42,7 +42,6 @@ class UserActivityTask(OverwriteOutputMixin, WarehouseMixin, EventLogSelectionMi
    output_root = None
    def mapper(self, line):
        value = self.get_event_and_date_string(line)
        if value is None:

--- a/edx/analytics/tasks/insights/video.py
+++ b/edx/analytics/tasks/insights/video.py
--- a/edx/analytics/tasks/launchers/local.py
+++ b/edx/analytics/tasks/launchers/local.py
@@ -44,8 +44,12 @@ OVERRIDE_CONFIGURATION_FILE = 'override.cfg'
 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--additional-config', help='additional configuration file to be loaded after default/override',
+    parser.add_argument(
-        default=None, action='append')
+        '--additional-config',
+        help='additional configuration file to be loaded after default/override',
+        default=None,
+        action='append'
+    )
    arguments, _extra_args = parser.parse_known_args()
    # We get a cleaned command-line arguments list, free of the arguments *we* care about, since Luigi will throw
@@ -76,8 +80,7 @@ def main():
            else:
                log.debug('Configuration file \'%s\' does not exist!', additional_config)
+    # Tell luigi what dependencies to pass to the Hadoop nodes:
-    # Tell luigi what dependencies to pass to the Hadoop nodes
    # - edx.analytics.tasks is used to load the pipeline code, since we cannot trust all will be loaded automatically.
    # - boto is used for all direct interactions with s3.
    # - cjson is used for all parsing event logs.
@@ -113,7 +116,7 @@ def get_cleaned_command_line_args():
        if v == '--additional-config':
            # Clear out the flag, and clear out the value attached to it.
            modified_arg_list[i] = None
-      modified_arg_list[i+1] = None
+            modified_arg_list[i + 1] = None
    return list(filter(lambda x: x is not None, modified_arg_list))

--- a/edx/analytics/tasks/launchers/remote.py
+++ b/edx/analytics/tasks/launchers/remote.py
@@ -20,6 +20,7 @@ REMOTE_LOG_DIR = '/var/log/analytics-tasks'
 REMOTE_CONFIG_DIR_BASE = 'config'
 REMOTE_CODE_DIR_BASE = 'repo'
 def main():
    """Parse arguments and run the remote task."""
    parser = argparse.ArgumentParser()

--- a/edx/analytics/tasks/tests/__init__.py
+++ b/edx/analytics/tasks/tests/__init__.py
--- a/edx/analytics/tasks/tests/acceptance/services/db.py
+++ b/edx/analytics/tasks/tests/acceptance/services/db.py
@@ -28,7 +28,7 @@ class DatabaseService(object):
            with closing(conn.cursor()) as cur:
                try:
                    yield cur
-                except:
+                except Exception:
                    conn.rollback()
                    raise
                else:

--- a/edx/analytics/tasks/tests/acceptance/services/vertica.py
+++ b/edx/analytics/tasks/tests/acceptance/services/vertica.py
@@ -34,7 +34,7 @@ class VerticaService(object):
            with closing(conn.cursor()) as cur:
                try:
                    yield cur
-                except:
+                except Exception:
                    conn.rollback()
                    raise
                else:

--- a/edx/analytics/tasks/tools/analyze/main.py
+++ b/edx/analytics/tasks/tools/analyze/main.py
@@ -73,7 +73,7 @@ def analyze_log_file(filename):
        parser = LogFileParser(log_file, message_pattern=MESSAGE_START_PATTERN, message_factory=create_log_message)
        try:
            return analyze_log(parser)
-        except:
+        except Exception:
            sys.stderr.write('Exception on line {0}\n'.format(parser.line_number))
            raise
@@ -188,9 +188,9 @@ def analyze_hadoop_job(starting_message, parser):
    while message:
        message = parser.next_message()
-        if ('Job complete:' in message.content or
+        job_complete = ('Job complete:' in message.content or 'completed successfully' in message.content)
-            'completed successfully' in message.content or 'Ended Job = ' in message.content):
+        if job_complete or 'Ended Job = ' in message.content:
-            if 'Job complete:' in message.content or 'completed successfully' in message.content:
+            if job_complete:
                move_measure = analyze_output_move(parser)
                if move_measure:
                    yield move_measure
@@ -254,5 +254,6 @@ def sqoop_parameter_parser(raw_params):
    if table_param_match:
        return {'table': table_param_match.group('name')}
 if __name__ == '__main__':
    analyze()
--- a/edx/analytics/tasks/tools/obfuscate_eval.py
+++ b/edx/analytics/tasks/tools/obfuscate_eval.py
@@ -150,6 +150,7 @@ def create_directory(output_dir):
        elif exc.errno != errno.EEXIST or os.path.isdir(output_dir):
            raise
 # These event_type values are known to have the possibility that the
 # user_id in context be different from the user_id in event payload.
 # In these cases, the context user_id represents the user performing the

--- a/edx/analytics/tasks/util/encrypt.py
+++ b/edx/analytics/tasks/util/encrypt.py
@@ -17,6 +17,7 @@ key_cache = {}  # pylint: disable=invalid-name
 DEFAULT_HADOOP_COUNTER_FUNC = lambda x: None
 def get_key_from_target(key_file_target):
    """Get the contents of the key file pointed to by the target"""

--- a/edx/analytics/tasks/util/file_util.py
+++ b/edx/analytics/tasks/util/file_util.py
@@ -24,7 +24,7 @@ def copy_file_to_file(src_file, output_file, progress=None):
            if progress:
                try:
                    progress(len(transfer_buffer))
-                except:  # pylint: disable=bare-except
+                except Exception:  # pylint: disable=bare-except
                    pass
        else:
            break

--- a/edx/analytics/tasks/util/obfuscate_util.py
+++ b/edx/analytics/tasks/util/obfuscate_util.py
@@ -419,6 +419,7 @@ def find_email_context(text, log_context=DEFAULT_LOG_CONTEXT):
    """Development: Find context phrases that might indicate the presence of an email address nearby."""
    return find_all_matches(EMAIL_CONTEXT, text, "EMAIL_CONTEXT", log_context)
 # Find names.
 NAME_CONTEXT = re.compile(
    r'\b(hi|hello|sincerely|yours truly|Dear|Mr|Ms|Mrs|regards|cordially|best wishes|cheers|my name)\b',

--- a/edx/analytics/tasks/util/record.py
+++ b/edx/analytics/tasks/util/record.py
@@ -375,7 +375,7 @@ class Record(object):
        schema = []
        for field_name, field_obj in cls.get_fields().items():
            mode = 'NULLABLE' if field_obj.nullable else 'REQUIRED'
-            description=getattr(field_obj, 'description', None)
+            description = getattr(field_obj, 'description', None)
            schema.append(SchemaField(field_name, field_obj.bigquery_type, description=description, mode=mode))
        return schema

--- a/edx/analytics/tasks/warehouse/financial/orders_import.py
+++ b/edx/analytics/tasks/warehouse/financial/orders_import.py
--- a/edx/analytics/tasks/warehouse/load_internal_reporting_course_catalog.py
+++ b/edx/analytics/tasks/warehouse/load_internal_reporting_course_catalog.py
@@ -662,8 +662,8 @@ class CourseDataTask(BaseCourseRunMetadataTask):
            marketing_url=course_run.get('marketing_url'),
            min_effort=course_run.get('min_effort'),
            max_effort=course_run.get('max_effort'),
-            announcement_time = DateTimeField().deserialize_from_string(course_run.get('announcement')),
+            announcement_time=DateTimeField().deserialize_from_string(course_run.get('announcement')),
-            reporting_type = course_run.get('reporting_type'),
+            reporting_type=course_run.get('reporting_type'),
        )
        output_file.write(record.to_separated_values(sep=u'\t'))
        output_file.write('\n')

--- a/edx/analytics/tasks/warehouse/load_internal_reporting_events.py
+++ b/edx/analytics/tasks/warehouse/load_internal_reporting_events.py
@@ -871,7 +871,6 @@ class TrackingEventRecordDataTask(EventLogSelectionMixin, BaseEventRecordDataTas
        project_name = self.PROJECT_NAME
        event_dict = {}
        self.add_calculated_event_entry(event_dict, 'input_file', self.get_map_input_file())
        self.add_calculated_event_entry(event_dict, 'event_type', event_type)
@@ -1233,7 +1232,6 @@ class SegmentEventRecordDataTask(SegmentEventLogSelectionMixin, BaseEventRecordD
            self.add_calculated_event_entry(event_dict, 'event_source', event_source)
            self.add_calculated_event_entry(event_dict, 'event_category', event_category)
        event_mapping = self.get_event_mapping()
        self.add_event_info(event_dict, event_mapping, event)

--- a/edx/analytics/tasks/warehouse/tests/test_course_subjects.py
+++ b/edx/analytics/tasks/warehouse/tests/test_course_subjects.py
@@ -102,7 +102,7 @@ class TestCourseSubjects(TestCase):
    def test_course_no_subjects(self):
        """With a course with no subjects, we expect a row with NULLs."""
-        course_with_no_subjects = [{"course_runs": [ {"key": "foo"} ], "subjects": [{}]}]
+        course_with_no_subjects = [{"course_runs": [{"key": "foo"}], "subjects": [{}]}]
        data = self.run_task(course_with_no_subjects)
        # We expect an entry in the list of courses, since there is a course in the catalog.
        self.assertEquals(data.shape[0], 1)
@@ -120,7 +120,7 @@ class TestCourseSubjects(TestCase):
        """With a course with one subject, we expect to see that subject."""
        input_data = [
            {
-                    "course_runs": [ {"key": "foo"} ],
+                "course_runs": [{"key": "foo"}],
                "subjects": [{"slug": "testing", "name": "Testing"}]
            }
        ]
@@ -141,7 +141,7 @@ class TestCourseSubjects(TestCase):
        """With a course with two subjects, we expect to see both of those subjects."""
        input_data = [
            {
-                    "course_runs": [ {"key": "foo"} ],
+                "course_runs": [{"key": "foo"}],
                "subjects": [
                    {"slug": "testing", "name": "Testing"},
                    {"slug": "bar", "name": "Bar"},
@@ -174,7 +174,7 @@ class TestCourseSubjects(TestCase):
        """With multiple courses, we expect to see subject information for all of them."""
        input_data = [
            {
-                    "course_runs": [ {"key": "foo"}, {"key": "bar"} ],
+                "course_runs": [{"key": "foo"}, {"key": "bar"}],
                "subjects": [{"slug": "testing", "name": "Testing"}]
            },
        ]
@@ -202,11 +202,11 @@ class TestCourseSubjects(TestCase):
        """With multiple courses, we expect to see subject information for all of them."""
        input_data = [
            {
-                    "course_runs": [ {"key": "foo"} ],
+                "course_runs": [{"key": "foo"}],
                "subjects": [{"slug": "testing", "name": "Testing"}]
            },
            {
-                    "course_runs": [ {"key": "bar"} ],
+                "course_runs": [{"key": "bar"}],
                "subjects": [{"slug": "testing", "name": "Testing"}]
            }
        ]
@@ -241,7 +241,7 @@ class TestCourseSubjects(TestCase):
                "subjects": [{"slug": "testing", "name": "Testing"}]
            },
            {
-                    "course_runs": [ {"key": "bar"} ],
+                "course_runs": [{"key": "bar"}],
                "subjects": [{"slug": "testing"}]
            }
        ]

--- a/edx/analytics/tasks/warehouse/tests/test_load_internal_reporting_events.py
+++ b/edx/analytics/tasks/warehouse/tests/test_load_internal_reporting_events.py
@@ -91,6 +91,7 @@ class BaseTrackingEventRecordTaskMapTest(InitializeOpaqueKeysMixin, MapperTestMi
        )
        self.task.init_local()
 @ddt
 class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase):
    """Test class for emission of tracking log events in EventRecord format."""
@@ -170,6 +171,7 @@ class TrackingEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittes
        expected_value = EventRecord(**expected_dict).to_separated_values()
        self.assert_single_map_output(event, expected_key, expected_value)
 @ddt
 class TrackingJsonEventRecordTaskMapTest(BaseTrackingEventRecordTaskMapTest, unittest.TestCase):
    """Test class for emission of tracking log events in JsonEventRecord format."""

--- a/setup.cfg
+++ b/setup.cfg
@@ -99,5 +99,5 @@ mapreduce.engine =
    emu = edx.analytics.tasks.common.mapreduce:EmulatedMapReduceJobRunner
 [pycodestyle]
-ignore=E501
+ignore=E501,E731
 max_line_length=119