Add answer distribution acceptance test

- Refactor acceptance tests - Update version of Luigi Change-Id: I93f660a3ae9d0987bc9935984c6d72fa14641d80

Add answer distribution acceptance test
- Refactor acceptance tests - Update version of Luigi Change-Id: I93f660a3ae9d0987bc9935984c6d72fa14641d80
3b87fd6a · Carlos Andrés Rocha · af4e49dd · 3b87fd6a · 3b87fd6a · 3b87fd6a
Commit 3b87fd6a authored May 13, 2014 by Carlos Andrés Rocha
9 changed files
--- a/Makefile
+++ b/Makefile
@@ -18,10 +18,10 @@ system-requirements:
 	sudo apt-get install -y -q libmysqlclient-dev

 requirements:
-	$(PIP_INSTALL) -r requirements/default.txt
+	$(PIP_INSTALL) -U -r requirements/default.txt

 test-requirements: requirements
-	$(PIP_INSTALL) -r requirements/test.txt
+	$(PIP_INSTALL) -U -r requirements/test.txt

 test: test-requirements
 	# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
@@ -29,7 +29,7 @@ test: test-requirements
 	python -m coverage run --rcfile=./.coveragerc -m nose -A 'not acceptance'

 test-acceptance: test-requirements
-	python -m coverage run --rcfile=./.coveragerc -m nose --nocapture -A acceptance
+	python -m coverage run --rcfile=./.coveragerc -m nose --nocapture -A acceptance $(ONLY_TESTS)

 coverage: test
 	python -m coverage html
@@ -67,7 +67,7 @@ jenkins-acceptance:
 	$(META_BIN)/pip install awscli
 	$(META_BIN)/aws s3 rm --recursive $(call get_config,tasks_output_url)$(call get_config,identifier) || true

-	$(EXPORTER_BIN)/$(PIP_INSTALL) -r $$WORKSPACE/analytics-exporter/requirements.txt
+	$(EXPORTER_BIN)/$(PIP_INSTALL) -U -r $$WORKSPACE/analytics-exporter/requirements.txt
 	$(EXPORTER_BIN)/$(PIP_INSTALL) -e $$WORKSPACE/analytics-exporter/

 	. $(TASKS_BIN)/activate && $(MAKE) install test-acceptance
--- a/edx/analytics/tasks/tests/acceptance/__init__.py
+++ b/edx/analytics/tasks/tests/acceptance/__init__.py
+import boto
+import json
+import logging
+import os
+import subprocess
+import sys
+if sys.version_info[:2] <= (2, 6):
+    import unittest2 as unittest
+else:
+    import unittest
+
+
+log = logging.getLogger(__name__)
+
+
+class AcceptanceTestCase(unittest.TestCase):
+
+    acceptance = 1
+    NUM_MAPPERS = 4
+    NUM_REDUCERS = 2
+
+    def setUp(self):
+        self.s3_conn = boto.connect_s3()
+
+        config_json = os.getenv('ACCEPTANCE_TEST_CONFIG')
+        try:
+            with open(config_json, 'r') as config_json_file:
+                self.config = json.load(config_json_file)
+        except (IOError, TypeError):
+            try:
+                self.config = json.loads(config_json)
+            except TypeError:
+                self.config = {}
+
+        self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures')
+
+    def call_subprocess(self, command):
+        """Execute a subprocess and log the command before running it."""
+        log.info('Running subprocess {0}'.format(command))
+        subprocess.check_call(command)
--- a/edx/analytics/tasks/tests/acceptance/fixtures/input/answer_dist_acceptance_tracking.log
+++ b/edx/analytics/tasks/tests/acceptance/fixtures/input/answer_dist_acceptance_tracking.log
--- a/edx/analytics/tasks/tests/fixtures/input/load_courseware_studentmodule.sql
+++ b/edx/analytics/tasks/tests/fixtures/input/load_courseware_studentmodule.sql
--- a/edx/analytics/tasks/tests/fixtures/output/edX-E929-2014_T1-courseware_studentmodule-acceptance-analytics.sql.sorted
+++ b/edx/analytics/tasks/tests/fixtures/output/edX-E929-2014_T1-courseware_studentmodule-acceptance-analytics.sql.sorted
--- a/edx/analytics/tasks/tests/acceptance/test_answer_dist.py
+++ b/edx/analytics/tasks/tests/acceptance/test_answer_dist.py
+"""
+End to end test of answer distribution.
+"""
+
+import os
+import logging
+
+from luigi.s3 import S3Client, S3Target
+
+from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase
+from edx.analytics.tasks.url import url_path_join
+
+
+log = logging.getLogger(__name__)
+
+
+class AnswerDistributionAcceptanceTest(AcceptanceTestCase):
+
+    INPUT_FILE = 'answer_dist_acceptance_tracking.log'
+    INPUT_FORMAT = 'oddjob.ManifestTextInputFormat'
+    NUM_REDUCERS = 1
+
+    def setUp(self):
+        super(AnswerDistributionAcceptanceTest, self).setUp()
+
+        assert 'tasks_output_url' in self.config
+        assert 'oddjob_jar' in self.config
+
+        url = self.config['tasks_output_url']
+        identifier = self.config.get('identifier', '')
+
+        self.test_root = url_path_join(url, identifier, 'answer_distribution')
+        self.test_src = url_path_join(self.test_root, 'src')
+        self.test_out = url_path_join(self.test_root, 'out')
+
+        self.oddjob_jar = self.config['oddjob_jar']
+
+        self.s3 = S3Client()
+
+        self.upload_data()
+
+    def upload_data(self):
+        src = os.path.join(self.data_dir, 'input', self.INPUT_FILE)
+        dst = url_path_join(self.test_src, self.INPUT_FILE)
+
+        # Upload test data file
+        self.s3.put(src, dst)
+
+    def test_answer_distribution(self):
+        self.launch_task()
+        self.validate_output()
+
+    def launch_task(self):
+        command = [
+            os.getenv('REMOTE_TASK'),
+            '--job-flow-name', self.config.get('job_flow_name'),
+            '--branch', self.config.get('tasks_branch'),
+            '--repo', self.config.get('tasks_repo'),
+            '--remote-name', self.config.get('identifier'),
+            '--wait',
+            '--log-path', self.config.get('tasks_log_path'),
+            '--user', self.config.get('connection_user'),
+            'AnswerDistributionOneFilePerCourseTask',
+            '--local-scheduler',
+            '--src',  self.test_src,
+            '--dest', url_path_join(self.test_root, 'dst'),
+            '--name', 'test',
+            '--output-root', self.test_out,
+            '--include',  '"*"',
+            '--manifest', url_path_join(self.test_root, 'manifest.txt'),
+            '--base-input-format', self.INPUT_FORMAT,
+            '--lib-jar', self.oddjob_jar,
+            '--n-reduce-tasks', str(self.NUM_REDUCERS),
+        ]
+
+        self.call_subprocess(command)
+
+    def validate_output(self):
+        outputs = self.s3.list(self.test_out)
+        outputs = [url_path_join(self.test_out, p) for p in outputs]
+
+        # There are 2 courses in the test data
+        self.assertEqual(len(outputs), 2)
+
+        # Check that the results have data
+        for output in outputs:
+            with S3Target(output).open() as f:
+                lines = [l for l in f][1:]  # Skip header
+                self.assertTrue(len(lines) > 0)
+
+                # Check that at least one of the count columns is non zero
+                get_count = lambda line: int(line.split(',')[3])
+                self.assertTrue(any(get_count(l) > 0 for l in lines ))
--- a/edx/analytics/tasks/tests/test_acceptance.py
+++ b/edx/analytics/tasks/tests/test_acceptance.py
@@ -13,42 +13,35 @@ import tempfile
 import textwrap
 import shutil
 import subprocess
-import urlparse

-import boto
 import oursql

 from edx.analytics.tasks.url import get_target_from_url
 from edx.analytics.tasks.url import url_path_join
-from edx.analytics.tasks.tests import unittest
+from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase


 log = logging.getLogger(__name__)


-class ExportAcceptanceTest(unittest.TestCase):
+class ExportAcceptanceTest(AcceptanceTestCase):
    """Validate the research data export pipeline for a single course and organization."""

    acceptance = 1

    ENVIRONMENT = 'acceptance'
    TABLE = 'courseware_studentmodule'
-    NUM_MAPPERS = 4
-    NUM_REDUCERS = 2
    COURSE_ID = 'edX/E929/2014_T1'

    def setUp(self):
+        super(ExportAcceptanceTest, self).setUp()
+
        # These variables will be set later
        self.temporary_dir = None
-        self.data_dir = None
        self.external_files_dir = None
        self.working_dir = None
        self.credentials = None

-        self.s3_conn = boto.connect_s3()
-
-        self.config = json.loads(os.getenv('ACCEPTANCE_TEST_CONFIG'))
-
        self.task_output_root = url_path_join(
            self.config.get('tasks_output_url'), self.config.get('identifier'))

@@ -75,7 +68,6 @@ class ExportAcceptanceTest(unittest.TestCase):
        self.temporary_dir = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, self.temporary_dir)

-        self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures')
        self.external_files_dir = os.path.join(self.temporary_dir, 'external')
        self.working_dir = os.path.join(self.temporary_dir, 'work')

@@ -157,6 +149,7 @@ class ExportAcceptanceTest(unittest.TestCase):

                        cursor.execute(line)

+
    def run_export_task(self):
        """
        Preconditions: Populated courseware_studentmodule table in the MySQL database.

--- a/requirements/default.txt
+++ b/requirements/default.txt
@@ -3,6 +3,7 @@ argparse==1.2.1
 boto==2.22.1
 filechunkio==1.5
 html5lib==1.0b3
+luigi==1.0.16
 numpy==1.8.0
 oursql==0.9.3.1
 pandas==0.13.0
@@ -13,4 +14,3 @@ python-gnupg==0.3.6
 pyyaml==3.10
 stevedore==0.14.1
 tornado==3.1.1
-git+https://github.com/spotify/luigi.git@a33756c781b9bf7e51384f0eb19d6a25050ef136#egg=luigi
--- a/share/task.yml
+++ b/share/task.yml
@@ -88,6 +88,10 @@
      command: >
        virtualenv {{ working_venv_dir }}

+    - name: update pip
+      command: >
+        {{ working_venv_dir }}/bin/pip install -U pip
+
    - name: virtualenv initialized
      shell: >
        . {{ working_venv_dir }}/bin/activate && make install