Commit 3b87fd6a by Carlos Andrés Rocha

Add answer distribution acceptance test

- Refactor acceptance tests
- Update version of Luigi

Change-Id: I93f660a3ae9d0987bc9935984c6d72fa14641d80
parent af4e49dd
......@@ -18,10 +18,10 @@ system-requirements:
sudo apt-get install -y -q libmysqlclient-dev
requirements:
$(PIP_INSTALL) -r requirements/default.txt
$(PIP_INSTALL) -U -r requirements/default.txt
test-requirements: requirements
$(PIP_INSTALL) -r requirements/test.txt
$(PIP_INSTALL) -U -r requirements/test.txt
test: test-requirements
# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
......@@ -29,7 +29,7 @@ test: test-requirements
python -m coverage run --rcfile=./.coveragerc -m nose -A 'not acceptance'
test-acceptance: test-requirements
python -m coverage run --rcfile=./.coveragerc -m nose --nocapture -A acceptance
python -m coverage run --rcfile=./.coveragerc -m nose --nocapture -A acceptance $(ONLY_TESTS)
coverage: test
python -m coverage html
......@@ -67,7 +67,7 @@ jenkins-acceptance:
$(META_BIN)/pip install awscli
$(META_BIN)/aws s3 rm --recursive $(call get_config,tasks_output_url)$(call get_config,identifier) || true
$(EXPORTER_BIN)/$(PIP_INSTALL) -r $$WORKSPACE/analytics-exporter/requirements.txt
$(EXPORTER_BIN)/$(PIP_INSTALL) -U -r $$WORKSPACE/analytics-exporter/requirements.txt
$(EXPORTER_BIN)/$(PIP_INSTALL) -e $$WORKSPACE/analytics-exporter/
. $(TASKS_BIN)/activate && $(MAKE) install test-acceptance
import boto
import json
import logging
import os
import subprocess
import sys
if sys.version_info[:2] <= (2, 6):
import unittest2 as unittest
else:
import unittest
log = logging.getLogger(__name__)
class AcceptanceTestCase(unittest.TestCase):
acceptance = 1
NUM_MAPPERS = 4
NUM_REDUCERS = 2
def setUp(self):
self.s3_conn = boto.connect_s3()
config_json = os.getenv('ACCEPTANCE_TEST_CONFIG')
try:
with open(config_json, 'r') as config_json_file:
self.config = json.load(config_json_file)
except (IOError, TypeError):
try:
self.config = json.loads(config_json)
except TypeError:
self.config = {}
self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures')
def call_subprocess(self, command):
"""Execute a subprocess and log the command before running it."""
log.info('Running subprocess {0}'.format(command))
subprocess.check_call(command)
This source diff could not be displayed because it is too large. You can view the blob instead.
"""
End to end test of answer distribution.
"""
import os
import logging
from luigi.s3 import S3Client, S3Target
from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase
from edx.analytics.tasks.url import url_path_join
log = logging.getLogger(__name__)
class AnswerDistributionAcceptanceTest(AcceptanceTestCase):
INPUT_FILE = 'answer_dist_acceptance_tracking.log'
INPUT_FORMAT = 'oddjob.ManifestTextInputFormat'
NUM_REDUCERS = 1
def setUp(self):
super(AnswerDistributionAcceptanceTest, self).setUp()
assert 'tasks_output_url' in self.config
assert 'oddjob_jar' in self.config
url = self.config['tasks_output_url']
identifier = self.config.get('identifier', '')
self.test_root = url_path_join(url, identifier, 'answer_distribution')
self.test_src = url_path_join(self.test_root, 'src')
self.test_out = url_path_join(self.test_root, 'out')
self.oddjob_jar = self.config['oddjob_jar']
self.s3 = S3Client()
self.upload_data()
def upload_data(self):
src = os.path.join(self.data_dir, 'input', self.INPUT_FILE)
dst = url_path_join(self.test_src, self.INPUT_FILE)
# Upload test data file
self.s3.put(src, dst)
def test_answer_distribution(self):
self.launch_task()
self.validate_output()
def launch_task(self):
command = [
os.getenv('REMOTE_TASK'),
'--job-flow-name', self.config.get('job_flow_name'),
'--branch', self.config.get('tasks_branch'),
'--repo', self.config.get('tasks_repo'),
'--remote-name', self.config.get('identifier'),
'--wait',
'--log-path', self.config.get('tasks_log_path'),
'--user', self.config.get('connection_user'),
'AnswerDistributionOneFilePerCourseTask',
'--local-scheduler',
'--src', self.test_src,
'--dest', url_path_join(self.test_root, 'dst'),
'--name', 'test',
'--output-root', self.test_out,
'--include', '"*"',
'--manifest', url_path_join(self.test_root, 'manifest.txt'),
'--base-input-format', self.INPUT_FORMAT,
'--lib-jar', self.oddjob_jar,
'--n-reduce-tasks', str(self.NUM_REDUCERS),
]
self.call_subprocess(command)
def validate_output(self):
outputs = self.s3.list(self.test_out)
outputs = [url_path_join(self.test_out, p) for p in outputs]
# There are 2 courses in the test data
self.assertEqual(len(outputs), 2)
# Check that the results have data
for output in outputs:
with S3Target(output).open() as f:
lines = [l for l in f][1:] # Skip header
self.assertTrue(len(lines) > 0)
# Check that at least one of the count columns is non zero
get_count = lambda line: int(line.split(',')[3])
self.assertTrue(any(get_count(l) > 0 for l in lines ))
......@@ -13,42 +13,35 @@ import tempfile
import textwrap
import shutil
import subprocess
import urlparse
import boto
import oursql
from edx.analytics.tasks.url import get_target_from_url
from edx.analytics.tasks.url import url_path_join
from edx.analytics.tasks.tests import unittest
from edx.analytics.tasks.tests.acceptance import AcceptanceTestCase
log = logging.getLogger(__name__)
class ExportAcceptanceTest(unittest.TestCase):
class ExportAcceptanceTest(AcceptanceTestCase):
"""Validate the research data export pipeline for a single course and organization."""
acceptance = 1
ENVIRONMENT = 'acceptance'
TABLE = 'courseware_studentmodule'
NUM_MAPPERS = 4
NUM_REDUCERS = 2
COURSE_ID = 'edX/E929/2014_T1'
def setUp(self):
super(ExportAcceptanceTest, self).setUp()
# These variables will be set later
self.temporary_dir = None
self.data_dir = None
self.external_files_dir = None
self.working_dir = None
self.credentials = None
self.s3_conn = boto.connect_s3()
self.config = json.loads(os.getenv('ACCEPTANCE_TEST_CONFIG'))
self.task_output_root = url_path_join(
self.config.get('tasks_output_url'), self.config.get('identifier'))
......@@ -75,7 +68,6 @@ class ExportAcceptanceTest(unittest.TestCase):
self.temporary_dir = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.temporary_dir)
self.data_dir = os.path.join(os.path.dirname(__file__), 'fixtures')
self.external_files_dir = os.path.join(self.temporary_dir, 'external')
self.working_dir = os.path.join(self.temporary_dir, 'work')
......@@ -157,6 +149,7 @@ class ExportAcceptanceTest(unittest.TestCase):
cursor.execute(line)
def run_export_task(self):
"""
Preconditions: Populated courseware_studentmodule table in the MySQL database.
......
......@@ -3,6 +3,7 @@ argparse==1.2.1
boto==2.22.1
filechunkio==1.5
html5lib==1.0b3
luigi==1.0.16
numpy==1.8.0
oursql==0.9.3.1
pandas==0.13.0
......@@ -13,4 +14,3 @@ python-gnupg==0.3.6
pyyaml==3.10
stevedore==0.14.1
tornado==3.1.1
git+https://github.com/spotify/luigi.git@a33756c781b9bf7e51384f0eb19d6a25050ef136#egg=luigi
......@@ -88,6 +88,10 @@
command: >
virtualenv {{ working_venv_dir }}
- name: update pip
command: >
{{ working_venv_dir }}/bin/pip install -U pip
- name: virtualenv initialized
shell: >
. {{ working_venv_dir }}/bin/activate && make install
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment