Commit 89eaad7a by Gabe Mulley

Support ssh connections to arbitrary hosts

parent 973a0a17
......@@ -54,3 +54,4 @@ override.cfg
.DS_Store
*.trace
.eggs
......@@ -15,6 +15,11 @@ uninstall:
install: requirements uninstall
python setup.py install --force
bootstrap: uninstall
$(PIP_INSTALL) -U -r requirements/pre.txt
$(PIP_INSTALL) -U -r requirements/base.txt
python setup.py install --force
develop: requirements
python setup.py develop
......@@ -24,6 +29,7 @@ system-requirements:
sudo apt-get install -y -q libmysqlclient-dev libatlas3gf-base libpq-dev python-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev
requirements:
$(PIP_INSTALL) -U -r requirements/pre.txt
$(PIP_INSTALL) -U -r requirements/default.txt
test-requirements: requirements
......
[hadoop]
version = apache1
python-executable=/usr/bin/python2.7
[hdfs]
......
[hive]
release = apache
database = default
warehouse_path = hdfs://localhost:9000/edx-analytics-pipeline/warehouse/
[database-export]
database = reports
credentials = /edx/etc/edx-analytics-pipeline/output.json
[database-import]
database = edxapp
credentials = /edx/etc/edx-analytics-pipeline/input.json
destination = hdfs://localhost:9000/edx-analytics-pipeline/warehouse/
[map-reduce]
engine = hadoop
marker = hdfs://localhost:9000/edx-analytics-pipeline/marker/
[event-logs]
pattern = .*tracking.log.*
expand_interval = 2 days
source = hdfs://localhost:9000/data/
[event-export]
output_root = hdfs://localhost:9000/edx-analytics-pipeline/event-export/output/
environment = simple
config = hdfs://localhost:9000/edx-analytics-pipeline/event_export/config.yaml
gpg_key_dir = hdfs://localhost:9000/edx-analytics-pipeline/event_export/gpg-keys/
gpg_master_key = master@key.org
required_path_text = FakeServerGroup
[manifest]
threshold = 500
input_format = org.edx.localhost.input.ManifestTextInputFormat
lib_jar = hdfs://localhost:9000/edx-analytics-pipeline/packages/edx-analytics-hadoop-util.jar
path = hdfs://localhost:9000/edx-analytics-pipeline/manifest/
[user-activity]
output_root = hdfs://localhost:9000/edx-analytics-pipeline/activity/
[enrollments]
interval_start = 2013-11-01
[enrollment-reports]
src = hdfs://localhost:9000/data/
destination = hdfs://localhost:9000/edx-analytics-pipeline/enrollment_reports/output/
offsets = hdfs://localhost:9000/edx-analytics-pipeline/enrollment_reports/offsets.tsv
blacklist = hdfs://localhost:9000/edx-analytics-pipeline/enrollment_reports/course_blacklist.tsv
history = hdfs://localhost:9000/edx-analytics-pipeline/enrollment_reports/enrollment_history.tsv
[geolocation]
geolocation_data = hdfs://localhost:9000/edx-analytics-pipeline/geo.dat
[calendar]
interval = 2012-01-01-2020-01-01
# Settings that can be used when running inside of a docker container
[hive]
release = apache
database = default
warehouse_path = hdfs://hadoop:9000/edx-analytics-pipeline/warehouse/
[database-export]
database = to_database
credentials = hdfs://hadoop:9000/edx-analytics-pipeline/output/local.json
[database-import]
database = from_database
credentials = hdfs://hadoop:9000/edx-analytics-pipeline/input/credentials.json
destination = hdfs://hadoop:9000/edx-analytics-pipeline/warehouse/
[map-reduce]
engine = hadoop
marker = hdfs://hadoop:9000/edx-analytics-pipeline/marker/
[event-logs]
pattern = .*tracking.log-(?P<date>\d{8}).*\.gz
expand_interval = 2 days
source = hdfs://hadoop:9000/data/
[event-export]
output_root = hdfs://hadoop:9000/edx-analytics-pipeline/event-export/output/
environment = unittest
config = hdfs://hadoop:9000/edx-analytics-pipeline/event_export/config.yaml
gpg_key_dir = hdfs://hadoop:9000/edx-analytics-pipeline/event_export/gpg-keys/
gpg_master_key = master@key.org
required_path_text = FakeServerGroup
[manifest]
threshold = 500
input_format = org.edx.hadoop.input.ManifestTextInputFormat
lib_jar = hdfs://hadoop:9000/edx-analytics-pipeline/packages/edx-analytics-hadoop-util.jar
path = hdfs://hadoop:9000/edx-analytics-pipeline/manifest/
[user-activity]
output_root = hdfs://hadoop:9000/edx-analytics-pipeline/activity/
[enrollments]
blacklist_date = 2014-08-18
blacklist_path = hdfs://hadoop:9000/edx-analytics-pipeline/blacklist/
[enrollment-reports]
src = hdfs://hadoop:9000/edx-analytics-pipeline/data/
destination = hdfs://hadoop:9000/edx-analytics-pipeline/enrollment_reports/output/
offsets = hdfs://hadoop:9000/edx-analytics-pipeline/enrollment_reports/offsets.tsv
blacklist = hdfs://hadoop:9000/edx-analytics-pipeline/enrollment_reports/course_blacklist.tsv
history = hdfs://hadoop:9000/edx-analytics-pipeline/enrollment_reports/enrollment_history.tsv
[geolocation]
geolocation_data = hdfs://hadoop:9000/edx-analytics-pipeline/geo.dat
Jinja2==2.7.3 # BSD
MarkupSafe==0.23 # BSD
PyYAML==3.10 # MIT License
ecdsa==0.11 # MIT
httplib2==0.9 # MIT
paramiko==1.14.0 # LGPL
pycrypto==2.6.1 # public domain
ansible==1.4.5 # GPL v3 License
boto==2.22.1 # MIT
Jinja2==2.7.3 # BSD
MarkupSafe==0.23 # BSD
PyYAML==3.10 # MIT License
ansible==1.4.5 # GPL v3 License
-r base.txt
argparse==1.2.1 # Python Software Foundation License
boto==2.22.1 # MIT
ciso8601==1.0.1 # MIT
ecdsa==0.11 # MIT
filechunkio==1.5 # MIT
html5lib==1.0b3 # MIT
httplib2==0.9 # MIT
isoweek==1.3.0 # BSD
mechanize==0.2.5 # BSD
mysql-connector-python==1.2.2 # GPL v2 with FOSS License Exception
numpy==1.8.0 # BSD
pandas==0.13.0 # BSD
paramiko==1.14.0 # LGPL
pbr==0.5.23 # Apache
pip==1.5.6 # MIT // AN-4322
pycrypto==2.6.1 # public domain
pygeoip==0.3.1 # LGPL
pymongo==2.7.2 # Apache 2.0
python-cjson==1.0.5 # LGPL
......
pip==1.5.6 # MIT // AN-4322
---
- name: Configure luigi
hosts: "mr_{{ name }}_master"
hosts: "{{ name }}"
gather_facts: False
sudo: True
vars:
write_luigi_config: "yes"
roles:
- luigi
- role: luigi
when: write_luigi_config|bool
- name: Run a task
hosts: "mr_{{ name }}_master"
hosts: "{{ name }}"
gather_facts: False
vars:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment