dockerize the analyticstack

parent 21c19b48
FROM edxops/xenial-common:latest
MAINTAINER edxops
RUN apt-get update
ARG ANALYTICS_API_VERSION=master
ENV PYTHONUNBUFFERED=1
ADD . /edx/app/edx_ansible/edx_ansible
COPY docker/build/analytics_api/ansible_overrides.yml /
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
COPY docker/build/analytics_api/ansible_overrides.yml /
RUN /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_api.yml -i '127.0.0.1,' -c local -t "install:base,install:system-requirements,install:configuration,install:app-requirements,install:code" -e@/ansible_overrides.yml
WORKDIR /edx/app/
CMD ["/edx/app/supervisor/venvs/supervisor/bin/supervisord", "-n", "--configuration", "/edx/app/supervisor/supervisord.conf"]
EXPOSE 443 80
RUN /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_api.yml \
-i '127.0.0.1,' -c local \
-t "install:base,install:system-requirements,install:configuration,install:app-requirements,install:code,devstack" \
-e "ANALYTICS_API_VERSION=$ANALYTICS_API_VERSION" \
-e "analytics_api_gunicorn_host=0.0.0.0"
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["/docker-command.sh"]
EXPOSE 8100
../../plays/analytics_api.yml
\ No newline at end of file
---
DOCKER_TLD: "edx"
ANALYTICS_API_DATABASES:
# rw user
default:
ENGINE: 'django.db.backends.mysql'
NAME: '{{ ANALYTICS_API_DEFAULT_DB_NAME }}'
USER: 'api001'
PASSWORD: 'password'
HOST: 'db.{{ DOCKER_TLD }}'
PORT: '3306'
# read-only user
reports:
ENGINE: 'django.db.backends.mysql'
NAME: '{{ ANALYTICS_API_REPORTS_DB_NAME }}'
USER: 'reports001'
PASSWORD: 'password'
HOST: "db.{{ DOCKER_TLD }}"
PORT: '3306'
# To build this Dockerfile:
#
# From the root of configuration:
#
# docker build -f docker/build/analytics-pipeline/Dockerfile .
#
# This allows the dockerfile to update /edx/app/edx_ansible/edx_ansible
# with the currently checked-out configuration repo.
FROM edxops/precise-common:latest
MAINTAINER edxops
ARG ANALYTICS_PIPELINE_VERSION=master
ENV PYTHONUNBUFFERED=1
ADD . /edx/app/edx_ansible/edx_ansible
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
RUN /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_pipeline.yml \
-c local -i '127.0.0.1,' \
-e "ANALYTICS_PIPELINE_VERSION=$ANALYTICS_PIPELINE_VERSION"
USER hadoop
WORKDIR /edx/app/analytics_pipeline/analytics_pipeline/
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["/bin/echo", "OK"]
FROM edxops/xenial-common:latest
MAINTAINER edxops
ARG INSIGHTS_VERSION=master
ENV PYTHONUNBUFFERED=1
ADD . /edx/app/edx_ansible/edx_ansible
COPY docker/build/insights/ansible_overrides.yml /
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
RUN /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook insights.yml \
-i '127.0.0.1,' -c local \
-t "install:base,install:system-requirements,install:configuration,install:app-requirements,install:code" \
-e@/ansible_overrides.yml
CMD ["/edx/app/supervisor/venvs/supervisor/bin/supervisord", "-n", "--configuration", "/edx/app/supervisor/supervisord.conf"]
EXPOSE 8110 18110
-t "install:base,install:system-requirements,install:configuration,install:app-requirements,install:code,devstack:install" \
-e "INSIGHTS_VERSION=$INSIGHTS_VERSION" \
-e "insights_gunicorn_host=0.0.0.0"
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["/docker-command.sh"]
EXPOSE 8110
---
DOCKER_TLD: "edx"
INSIGHTS_LMS_BASE: "http://lms.{{ DOCKER_TLD }}:8000"
INSIGHTS_CMS_BASE: "http://cms.{{ DOCKER_TLD }}:8010"
INSIGHTS_BASE_URL: "http://insights.{{ DOCKER_TLD }}:8110"
INSIGHTS_MEMCACHE:
- "memcache.{{ DOCKER_TLD }}:11211"
ANALYTICS_API_ENDPOINT: "http://analtyicsapi.{{ DOCKER_TLD }}:8100/api/v0"
INSIGHTS_DATABASES:
# rw user
default:
ENGINE: 'django.db.backends.mysql'
NAME: '{{ INSIGHTS_DATABASE_NAME }}'
USER: 'rosencrantz'
PASSWORD: 'secret'
HOST: "db.{{ DOCKER_TLD }}"
PORT: '3306'
# To build this Dockerfile:
#
# From the root of configuration:
#
# docker build -f docker/build/discovery/Dockerfile .
#
# This allows the dockerfile to update /edx/app/edx_ansible/edx_ansible
# with the currently checked-out configuration repo.
FROM edxops/xenial-common:latest
MAINTAINER edxops
ENV REPO_OWNER=edx
ADD . /edx/app/edx_ansible/edx_ansible
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
RUN sudo /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook luigid.yml \
-c local -i '127.0.0.1,' \
-t 'install' \
--extra-vars="COMMON_GIT_PATH=$REPO_OWNER"
USER luigid
EXPOSE 9100
CMD ["/edx/app/luigid/venvs/luigid/bin/luigid", "--address=0.0.0.0", "--port=9100", "--state-path=/var/tmp/luigi-state.pickle"]
......@@ -5,7 +5,67 @@
vars:
serial_count: 1
serial: "{{ serial_count }}"
pre_tasks:
- name: rsyslog started
service:
name: rsyslog
state: started
tags:
- devstack:start
- name: databases ready
wait_for:
host: "{{ item.HOST }}"
port: "{{ item.PORT }}"
delay: 2
with_items: "{{ ANALYTICS_API_DATABASES.values() }}"
tags:
- devstack:start
- name: databases created
mysql_db:
login_host: "{{ item.HOST }}"
login_port: "{{ item.PORT }}"
login_user: "{{ COMMON_MYSQL_MIGRATE_USER }}"
login_password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
db: "{{ item.NAME }}"
state: present
encoding: utf8
with_items: "{{ ANALYTICS_API_DATABASES.values() }}"
tags:
- devstack:start
- name: default django user created
mysql_user:
login_host: "{{ item.HOST }}"
login_port: "{{ item.PORT }}"
login_user: "{{ COMMON_MYSQL_MIGRATE_USER }}"
login_password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
name: "{{ item.USER }}"
password: "{{ item.PASSWORD }}"
priv: '{{ item.NAME }}.*:ALL/{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:SELECT'
host: '%'
with_items:
- "{{ ANALYTICS_API_DATABASES.default }}"
tags:
- devstack:start
- name: read-only reporting user created
mysql_user:
login_host: "{{ item.HOST }}"
login_port: "{{ item.PORT }}"
login_user: "{{ COMMON_MYSQL_MIGRATE_USER }}"
login_password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
name: "{{ item.USER }}"
name: "{{ item.USER }}"
password: "{{ item.PASSWORD }}"
priv: '{{ item.NAME }}.*:SELECT'
host: '%'
with_items:
- "{{ ANALYTICS_API_DATABASES.reports }}"
tags:
- devstack:start
roles:
- common_vars
- docker
- analytics_api
- analytics_api
\ No newline at end of file
- name: Deploy dependencies needed to run edx-analytics-pipeline
hosts: all
sudo: True
gather_facts: True
vars:
serial_count: 1
serial: "{{ serial_count }}"
roles:
- analytics_pipeline
tasks:
- name: store database credentials for analytics pipeline
copy:
content: "{{ item.value | to_json }}"
dest: "{{ analytics_pipeline_config_dir }}/{{ item.key }}.json"
mode: "0644"
owner: "{{ analytics_pipeline_user }}"
group: "{{ analytics_pipeline_user }}"
with_dict: ANALYTICS_PIPELINE_DATABASES
tags:
- devstack:start
- name: use the YARN map-reduce execution framework
hadoop_configuration:
property: mapreduce.framework.name
value: yarn
path: "{{ HADOOP_COMMON_CONF_DIR }}/mapred-site.xml"
tags:
- devstack:start
- name: set the resource manager hostname
hadoop_configuration:
property: yarn.resourcemanager.hostname
value: "{{ HADOOP_COMMON_RESOURCE_MANAGER_HOST }}"
path: "{{ HADOOP_COMMON_CONF_DIR }}/yarn-site.xml"
tags:
- devstack:start
- name: set the default distributed filesystem
hadoop_configuration:
property: fs.defaultFS
value: "{{ HADOOP_DEFAULT_FS }}"
path: "{{ HADOOP_COMMON_CONF_DIR }}/core-site.xml"
tags:
- devstack:start
# - name: refresh registered entrypoints
# shell: ". {{ analytics_pipeline_venv_dir }}/bin/activate && make develop-local"
# args:
# chdir: "{{ analytics_pipeline_code_dir }}"
# environment: analytics_pipeline_install_env
# ignore_errors: yes
# tags:
# - devstack:start
......@@ -5,6 +5,96 @@
vars:
serial_count: 1
serial: "{{ serial_count }}"
pre_tasks:
- name: rsyslog started
service:
name: rsyslog
state: started
tags:
- devstack:start
- name: databases ready
wait_for:
host: "{{ item.HOST }}"
port: "{{ item.PORT }}"
delay: 2
with_items: "{{ INSIGHTS_DATABASES.values() }}"
tags:
- devstack:start
- name: databases created
mysql_db:
login_host: "{{ item.HOST }}"
login_port: "{{ item.PORT }}"
login_user: "{{ COMMON_MYSQL_MIGRATE_USER }}"
login_password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
db: "{{ item.NAME }}"
state: present
encoding: utf8
with_items: "{{ INSIGHTS_DATABASES.values() }}"
tags:
- devstack:start
- name: default django user created
mysql_user:
login_host: "{{ item.HOST }}"
login_port: "{{ item.PORT }}"
login_user: "{{ COMMON_MYSQL_MIGRATE_USER }}"
login_password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
name: "{{ item.USER }}"
password: "{{ item.PASSWORD }}"
priv: '{{ item.NAME }}.*:ALL'
host: '%'
with_items:
- "{{ INSIGHTS_DATABASES.default }}"
tags:
- devstack:start
roles:
- docker
- insights
tasks:
# We need to allow the www-data user to be able write dynamically compiled
# static files to the common data dir.
- name: insights can write to the data dir
file:
path: "{{ COMMON_DATA_DIR }}/{{ insights_service_name }}"
state: directory
mode: "0775"
tags:
- devstack:start
- name: set configuration vars
lineinfile:
dest: "{{ COMMON_CFG_DIR }}/{{ insights_service_name }}.yml"
regexp: '^{{ item.key }}:'
line: '{{ item.key }}: {{ item.value }}'
with_items:
- key: DEBUG
value: "true"
- key: ENABLE_INSECURE_STATIC_FILES
value: "true"
tags:
- devstack:start
# Since the docker container mounts the user's working directory as a volume, it likely doesn't contain the
# "node_modules" directory that is needed to run bower
- name: install node dependencies
npm:
executable: "{{ insights_nodeenv_bin }}/npm"
path: "{{ insights_code_dir }}"
production: yes
environment: "{{ insights_environment }}"
tags:
- devstack:start
# Similarly we have to pull the bower deps into the user's working directory. Note that we have to do this as root
# since permissions get all wonky when you mount a host directory into a docker container.
- name: install bower dependencies
shell: ". {{ insights_venv_dir }}/bin/activate && . {{ insights_nodeenv_bin }}/activate && {{ insights_node_bin }}/bower install --production --config.interactive=false --allow-root"
args:
chdir: "{{ insights_code_dir }}"
tags:
- devstack:start
- name: Deploy Insights
- name: Deploy Luigi Central Scheduler
hosts: all
sudo: True
gather_facts: True
......@@ -6,6 +6,4 @@
serial_count: 1
serial: "{{ serial_count }}"
roles:
- common_vars
- docker
- insights
- luigid
\ No newline at end of file
- name: Deploy all analytics services to a single node
hosts: all
become: True
gather_facts: True
vars:
migrate_db: "yes"
disable_edx_services: false
ENABLE_DATADOG: False
ENABLE_SPLUNKFORWARDER: False
ENABLE_NEWRELIC: False
roles:
- aws
- mysql
- edxlocal
- memcache
- analytics_api
- analytics_pipeline
- role: nginx
nginx_sites:
- insights
- insights
#!/usr/bin/python
# -*- coding: utf-8 -*-
DOCUMENTATION = '''
---
module: hadoop_configuration
short_description: Manages settings in hadoop configuration files
description:
- Manages symbolic links using the 'update-alternatives' tool
- Useful when multiple programs are installed but provide similar functionality (e.g. different editors).
options:
property:
description:
- The property to set the value of.
required: true
value:
description:
- The value for the property.
required: true
path:
description:
- The path to the configuration file to modify. The file is created if it doesn't already exist.
required: true
'''
EXAMPLES = '''
- name: set default FS
hadoop_configuration: property=fs.defaultFS value=hdfs://namenode:8020/ path=/etc/hadoop/core-site.xml
'''
import xml.etree.ElementTree as et
def main():
module = AnsibleModule(
argument_spec = dict(
property = dict(required=True),
value = dict(required=True),
path = dict(required=True),
)
)
params = module.params
property_name = params['property']
value = params['value']
path = params['path']
try:
tree = et.parse(path)
root = tree.getroot()
except:
root = et.Element('configuration')
tree = et.ElementTree(root)
found_property = False
for property_element in root.findall('property'):
name_element = property_element.find('name')
value_element = property_element.find('value')
if name_element is not None and property_name == name_element.text:
found_property = True
if value_element is not None:
if value_element.text == value:
module.exit_json(changed=False)
else:
value_element.text = value
break
if not found_property:
property_element = et.SubElement(root, 'property')
name_element = et.SubElement(property_element, 'name')
name_element.text = property_name
value_element = et.SubElement(property_element, 'value')
value_element.text = value
tree.write(path, xml_declaration=True, encoding='utf-8')
module.exit_json(changed=True)
# import module snippets
from ansible.module_utils.basic import *
main()
......@@ -43,7 +43,7 @@ ANALYTICS_API_DATABASES:
ANALYTICS_API_VERSION: "master"
# Default dummy user, override this!!
ANALYTICS_API_USERS:
"dummy-api-user": "changeme"
"dummy-api-user": "{{ ANALYTICS_API_AUTH_TOKEN }}"
ANALYTICS_API_SECRET_KEY: 'Your secret key here'
ANALYTICS_API_TIME_ZONE: 'UTC'
......@@ -52,7 +52,7 @@ ANALYTICS_API_EMAIL_HOST: 'localhost'
ANALYTICS_API_EMAIL_HOST_USER: 'mail_user'
ANALYTICS_API_EMAIL_HOST_PASSWORD: 'mail_password'
ANALYTICS_API_EMAIL_PORT: 587
ANALYTICS_API_AUTH_TOKEN: 'put-your-api-token-here'
ANALYTICS_API_AUTH_TOKEN: 'changeme'
ANALYTICS_API_ELASTICSEARCH_LEARNERS_HOST: 'localhost'
......@@ -148,6 +148,7 @@ analytics_api_environment:
analytics_api_service_name: "analytics_api"
analytics_api_user: "{{ analytics_api_service_name }}"
analytics_api_home: "{{ COMMON_APP_DIR }}/{{ analytics_api_service_name }}"
analytics_api_venv_dir: "{{ analytics_api_home }}/venvs/{{ analytics_api_service_name }}"
analytics_api_code_dir: "{{ analytics_api_home }}/{{ analytics_api_service_name }}"
analytics_api_conf_dir: "{{ analytics_api_home }}"
......@@ -156,6 +157,7 @@ analytics_api_gunicorn_port: "8100"
analytics_api_gunicorn_timeout: "300"
analytics_api_django_settings: "production"
analytics_api_manage: "{{ analytics_api_code_dir }}/manage.py"
analytics_api_log_dir: "{{ COMMON_LOG_DIR }}/{{ analytics_api_service_name }}"
......
......@@ -73,6 +73,7 @@
tags:
- migrate
- migrate:db
- devstack:start
- name: run collectstatic
shell: "{{ analytics_api_home }}/venvs/{{ analytics_api_service_name }}/bin/python manage.py collectstatic --noinput"
......@@ -94,6 +95,7 @@
tags:
- manage
- manage:app-users
- devstack:start
- name: write out the supervisor wrapper
template:
......@@ -169,3 +171,15 @@
become_user: "{{ supervisor_service_user }}"
tags:
- manage:start
- name: write docker scripts
template:
src: "edx/app/analytics_api/{{ item }}.j2"
dest: "/{{ item }}"
mode: 0744
with_items:
- "docker-entrypoint.sh"
- "docker-command.sh"
tags:
- devstack
- devstack:install
#!/usr/bin/env bash
# {{ ansible_managed }}
until [ -f {{ COMMON_LOG_DIR }}/analytics-api/edx.log ]; do
sleep 1
done
exec tail -f {{ COMMON_LOG_DIR }}/analytics-api/edx.log
\ No newline at end of file
#!/usr/bin/env bash
# {{ ansible_managed }}
set -e
cat >/ansible_overrides.yml <<EOL
---
migrate_db: 'yes'
DEVSTACK_HOST: ${DEVSTACK_HOST:-open.edx}
ANALYTICS_API_ELASTICSEARCH_LEARNERS_HOST: ${ANALYTICS_API_ELASTICSEARCH_LEARNERS_HOST:-fulltextindex}
COMMON_MYSQL_MIGRATE_USER: "${COMMON_MYSQL_MIGRATE_USER:-root}"
COMMON_MYSQL_MIGRATE_PASS: "${COMMON_MYSQL_MIGRATE_PASS:-}"
ANALYTICS_API_DATABASES:
# rw user
default:
ENGINE: 'django.db.backends.mysql'
NAME: 'analytics-api'
USER: 'api001'
PASSWORD: 'password'
HOST: '${ANALYTICS_API_DATABASE_HOST:-resultstore}'
PORT: '${ANALYTICS_API_DATABASE_PORT:-3306}'
# read-only user
reports:
ENGINE: 'django.db.backends.mysql'
NAME: 'reports'
USER: 'reports001'
PASSWORD: 'password'
HOST: '${ANALYTICS_API_DATABASE_HOST:-resultstore}'
PORT: '${ANALYTICS_API_DATABASE_PORT:-3306}'
EOL
echo "Initializing the runtime environment"
/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_api.yml \
-i '127.0.0.1,' -c local \
-t "install:app-configuration,devstack:start" \
--extra-vars="@/ansible_overrides.yml"
/edx/app/supervisor/venvs/supervisor/bin/supervisord --configuration /edx/app/supervisor/supervisord.conf
exec "$@"
\ No newline at end of file
......@@ -10,27 +10,26 @@
##
# Defaults for role analytics_pipeline
#
ANALYTICS_PIPELINE_GIT_IDENTITY: !!null
ANALYTICS_PIPELINE_VERSION: master
ANALYTICS_PIPELINE_OUTPUT_DATABASE_USER: pipeline001
ANALYTICS_PIPELINE_OUTPUT_DATABASE_PASSWORD: password
ANALYTICS_PIPELINE_OUTPUT_DATABASE_HOST: localhost
ANALYTICS_PIPELINE_OUTPUT_DATABASE_PORT: 3306
ANALYTICS_PIPELINE_WHEEL_URL: http://edx-wheelhouse.s3-website-us-east-1.amazonaws.com/Ubuntu/precise
ANALYTICS_PIPELINE_PYTHON_VERSION: 2.7
ANALYTICS_PIPELINE_PYTHON: "/usr/bin/python{{ ANALYTICS_PIPELINE_PYTHON_VERSION }}"
ANALYTICS_PIPELINE_OUTPUT_DATABASE_NAME: "{{ ANALYTICS_API_REPORTS_DB_NAME }}"
ANALYTICS_PIPELINE_OUTPUT_DATABASE:
username: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_USER }}"
password: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_PASSWORD }}"
host: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_HOST }}"
port: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_PORT }}"
ANALYTICS_PIPELINE_REPOS:
- PROTOCOL: "{{ COMMON_GIT_PROTOCOL }}"
DOMAIN: "{{ COMMON_GIT_MIRROR }}"
PATH: "{{ COMMON_GIT_PATH }}"
REPO: edx-analytics-pipeline.git
VERSION: "{{ ANALYTICS_PIPELINE_VERSION }}"
DESTINATION: "{{ analytics_pipeline_code_dir }}"
SSH_KEY: "{{ ANALYTICS_PIPELINE_GIT_IDENTITY }}"
ANALYTICS_PIPELINE_INPUT_DATABASE:
username: "{{ COMMON_MYSQL_READ_ONLY_USER }}"
password: "{{ COMMON_MYSQL_READ_ONLY_PASS }}"
host: localhost
port: 3306
ANALYTICS_PIPELINE_CONFIG_DIR: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline"
ANALYTICS_PIPELINE_HDFS_DATA_DIR: "hdfs://localhost:9000/data"
ANALYTICS_PIPELINE_SERVICE_CONFIG: {}
ANALYTICS_PIPELINE_DATABASES: {}
HADOOP_COMMON_RESOURCE_MANAGER_HOST: resourcemanager
HADOOP_DEFAULT_FS: hdfs://namenode:8020
ANALYTICS_PIPELINE_LUIGI_HADOOP_VERSION: cdh4
ANALYTICS_PIPELINE_LUIGI_HADOOP_COMMAND: "{{ HADOOP_COMMON_HOME }}/bin/hadoop"
......@@ -46,6 +45,19 @@ analytics_pipeline_util_library:
repo: https://github.com/edx/edx-analytics-hadoop-util
version: master
analytics_pipeline_service_name: analytics_pipeline
analytics_pipeline_venv_dir: "{{ analytics_pipeline_home }}/venvs/{{ analytics_pipeline_service_name }}"
analytics_pipeline_user: "{{ analytics_pipeline_service_name }}"
analytics_pipeline_home: "{{ COMMON_APP_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_code_dir: "{{ analytics_pipeline_home }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_log_dir: "{{ COMMON_LOG_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_config_dir: "{{ COMMON_CFG_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_install_env:
WHEEL_URL: "{{ ANALYTICS_PIPELINE_WHEEL_URL }}"
WHEEL_PYVER: "{{ ANALYTICS_PIPELINE_PYTHON_VERSION }}"
#
# OS packages
#
......
{
"connection_user": "hadoop",
"credentials_file_url": "/edx/etc/edx-analytics-pipeline/output.json",
"credentials_file_url": "/edx/etc/analytics_pipeline/output.json",
"exporter_output_bucket": "",
"geolocation_data": "/var/tmp/geolocation-data.mmdb",
"hive_user": "hadoop",
"host": "localhost",
"identifier": "local-devstack",
"manifest_input_format": "org.edx.hadoop.input.ManifestTextInputFormat",
"oddjob_jar": "hdfs://localhost:9000/edx-analytics-pipeline/packages/edx-analytics-hadoop-util.jar",
"oddjob_jar": "hdfs:///edx-analytics-pipeline/packages/edx-analytics-hadoop-util.jar",
"tasks_branch": "origin/HEAD",
"tasks_log_path": "/tmp/acceptance/",
"tasks_output_url": "hdfs://localhost:9000/acceptance-test-output/",
"tasks_output_url": "hdfs:///acceptance-test-output/",
"tasks_repo": "/edx/app/analytics_pipeline/analytics_pipeline",
"vertica_creds_url": "",
"wheel_url": "https://edx-wheelhouse.s3-website-us-east-1.amazonaws.com/Ubuntu/precise"
......
......@@ -12,6 +12,15 @@
dependencies:
- common
- hadoop_master
- hadoop_common
- hive
- sqoop
- role: edx_service
edx_service_name: "{{ analytics_pipeline_service_name }}"
edx_service_config: "{{ ANALYTICS_PIPELINE_SERVICE_CONFIG }}"
edx_service_repos: "{{ ANALYTICS_PIPELINE_REPOS }}"
edx_service_user: "{{ analytics_pipeline_user }}"
edx_service_home: "{{ analytics_pipeline_home }}"
edx_service_packages:
debian: "{{ analytics_pipeline_debian_pkgs }}"
redhat: "{{ analytics_pipeline_redhat_pkgs }}"
......@@ -10,9 +10,9 @@
#
#
# Tasks for role analytics_pipeline
#
#
# Overview:
#
#
# Prepare the machine to run the edX Analytics Data Pipeline. The pipeline currently "installs itself"
# via an ansible playbook that is not included in the edx/configuration repo. However, in order to
# run the pipeline in a devstack environment, some configuration needs to be performed. In a production
......@@ -24,12 +24,12 @@
# hadoop_master: ensures hadoop services are installed
# hive: the pipeline makes extensive usage of hive, so that needs to be installed as well
# sqoop: similarly to hive, the pipeline uses this tool extensively
#
#
# Example play:
#
# - name: Deploy all dependencies of edx-analytics-pipeline to the node
# hosts: all
# become: True
# sudo: True
# gather_facts: True
# roles:
# - analytics_pipeline
......@@ -37,38 +37,91 @@
# ansible-playbook -i 'localhost,' ./analytics_pipeline.yml -e@/ansible/vars/deployment.yml -e@/ansible/vars/env-deployment.yml
#
- name: Create config directory
- name: ensure system packages are installed
command: make system-requirements
args:
chdir: "{{ analytics_pipeline_code_dir }}"
sudo: True
tags:
- install
- install:system-requirements
- name: virtualenv installed
pip:
name: virtualenv
version: 1.10.1
tags:
- install
- install:system-requirements
- name: build virtualenv
command: "virtualenv --python={{ ANALYTICS_PIPELINE_PYTHON }} {{ analytics_pipeline_venv_dir }}"
args:
creates: "{{ analytics_pipeline_venv_dir }}/bin/pip"
become_user: "{{ analytics_pipeline_user }}"
tags:
- install
- install:system-requirements
- name: pip upgraded
shell: ". {{ analytics_pipeline_venv_dir }}/bin/activate && pip install -U pip"
args:
chdir: "{{ analytics_pipeline_code_dir }}"
become_user: "{{ analytics_pipeline_user }}"
environment: analytics_pipeline_install_env
tags:
- install
- install:app-requirements
- name: installed
shell: ". {{ analytics_pipeline_venv_dir }}/bin/activate && make install"
args:
chdir: "{{ analytics_pipeline_code_dir }}"
become_user: "{{ analytics_pipeline_user }}"
environment: analytics_pipeline_install_env
tags:
- install
- install:app-requirements
- name: converted to development mode
shell: ". {{ analytics_pipeline_venv_dir }}/bin/activate && make uninstall && make develop && make test-requirements"
args:
chdir: "{{ analytics_pipeline_code_dir }}"
become_user: "{{ analytics_pipeline_user }}"
environment: analytics_pipeline_install_env
tags:
- devstack
- devstack:install
- name: create log directory
file:
path: "{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
path: "{{ analytics_pipeline_log_dir }}"
mode: "0777"
owner: "{{ analytics_pipeline_user }}"
group: "{{ analytics_pipeline_user }}"
state: directory
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
mode: "0755"
tags:
- install
- install:configuration
- name: Store output database credentials for analytics pipeline
copy:
content: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
mode: "0644"
- name: logging configured
template:
src: logging.cfg.j2
dest: "{{ analytics_pipeline_code_dir }}/logging.cfg"
tags:
- install
- install:configuration
- name: Store input database credentials for analytics pipeline
copy:
content: "{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
dest: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
mode: "0644"
- name: create config directory
file:
path: "{{ analytics_pipeline_config_dir }}"
mode: "0755"
owner: "{{ analytics_pipeline_user }}"
group: "{{ analytics_pipeline_user }}"
state: directory
tags:
- install
- install:configuration
- devstack
- devstack:install
- name: luigi configuration directory created
file:
......@@ -88,14 +141,14 @@
- install
- install:configuration
- name: Util library source checked out
- name: util library source checked out
git:
repo: "{{ analytics_pipeline_util_library.repo }}"
dest: "{{ analytics_pipeline_util_library.path }}"
repo: "{{ analytics_pipeline_util_library.repo }}"
version: "{{ analytics_pipeline_util_library.version }}"
tags:
- install
- install:code
- devstack
- devstack:install
- name: lib directory created
file:
......@@ -104,18 +157,18 @@
group: "{{ hadoop_common_group }}"
state: directory
tags:
- install
- install:app-requirements
- devstack
- devstack:install
- name: Check if the util library needs to be built
- name: check if the util library needs to be built
stat:
path: "{{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar"
register: util_lib_built
tags:
- install
- install:app-requirements
- devstack
- devstack:install
- name: Util library built
- name: util library built
shell: >
{{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
{{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
......@@ -124,63 +177,27 @@
chdir: "{{ analytics_pipeline_util_library.path }}"
when: not util_lib_built.stat.exists
tags:
- install
- install:app-requirements
- name: Ensure hdfs services are started
service:
name: hdfs
state: started
tags:
- manage
- manage:start
- name: Ensure map reduce services are started
service:
name: yarn
state: started
tags:
- manage
- manage:start
- name: Ensure package dir exists in HDFS
shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
become_user: "{{ hadoop_common_user }}"
tags:
- install
- install:app-requirements
- name: Ensure util library is in HDFS
shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
become_user: "{{ hadoop_common_user }}"
tags:
- install
- install:app-requirements
- name: Ensure the data directory exists
shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
become_user: "{{ hadoop_common_user }}"
tags:
- install
- install:base
- devstack
- devstack:install
- name: Ensure tracking log file can be read
file:
path: "{{ COMMON_LOG_DIR }}/tracking/tracking.log"
mode: "0644"
ignore_errors: yes
- name: env vars sourced in hadoop env
lineinfile:
dest: "{{ hadoop_common_env }}"
regexp: "^. {{ analytics_pipeline_venv_dir }}/bin/activate"
line: ". {{ analytics_pipeline_venv_dir }}/bin/activate"
state: present
tags:
- install
- install:configuration
- devstack
- devstack:install
- name: Cron job syncs tracking log file to hdfs
cron:
user: "{{ hadoop_common_user }}"
name: "Sync tracking log to HDFS"
job: "{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
- name: write devstack script
template:
src: "edx/app/analytics_pipeline/docker-entrypoint.sh.j2"
dest: "/docker-entrypoint.sh"
mode: "0777"
tags:
- install
- install:configuration
- devstack
- devstack:install
- name: store configuration for acceptance tests
copy:
......@@ -188,15 +205,5 @@
dest: /var/tmp/acceptance.json
mode: "0644"
tags:
- install
- install:configuration
- name: Grant access to table storing test data in output database
mysql_user:
user: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.username }}"
password: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.password }}"
priv: 'acceptance%.*:ALL'
append_privs: yes
tags:
- install
- install:configuration
- devstack
- devstack:install
#!/usr/bin/env bash
# {{ ansible_managed }}
set -e
export HOST_IP=$(ip route show 0.0.0.0/0 | grep -Eo 'via \S+' | awk '{ print $2 }' | head -n 1)
INPUT_DATABASE_HOST=${INPUT_DATABASE_HOST:-$HOST_IP}
COMMON_MYSQL_MIGRATE_USER="${COMMON_MYSQL_MIGRATE_USER:-root}"
COMMON_MYSQL_MIGRATE_PASS="${COMMON_MYSQL_MIGRATE_PASS:-}"
HADOOP_USER_HOME={{ HADOOP_COMMON_USER_HOME }}
HADOOP_CONF_DIR={{ HADOOP_COMMON_CONF_DIR }}
export HADOOP_HOME={{ HADOOP_COMMON_HOME }}
# wait for HDFS to startup and get out of safe mode
echo -n "Waiting for HDFS... "
until sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -touchz /.devstack_init 2>/dev/null"
do
sleep 1
done
echo "done"
if ! sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -cat /.devstack_ready 2>/dev/null"
then
echo -n "Initializing HDFS files and directories... "
sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -mkdir -p /tmp/hadoop-root /data /user/hadoop /user/hive /edx-analytics-pipeline"
sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -chmod 777 /data /tmp"
sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -chown {{ hadoop_common_user }} /user/hadoop /user/hive /edx-analytics-pipeline /tmp/hadoop-root"
$HADOOP_HOME/bin/hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
$HADOOP_HOME/bin/hdfs dfs -put -f $HADOOP_USER_HOME/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
echo "done"
sudo /bin/bash -c ". $HADOOP_USER_HOME/.bashrc && $HADOOP_HOME/bin/hdfs dfs -touchz /.devstack_ready"
fi
cat >/tmp/ansible_overrides.yml <<EOL
---
migrate_db: yes
DEVSTACK_HOST: ${DEVSTACK_HOST:-open.edx}
HADOOP_DEFAULT_FS: ${HADOOP_DEFAULT_FS:-hdfs://namenode:8020}
HADOOP_COMMON_RESOURCE_MANAGER_HOST: ${HADOOP_COMMON_RESOURCE_MANAGER_HOST:-resourcemanager}
ANALYTICS_PIPELINE_DATABASES:
input:
username: '${COMMON_MYSQL_MIGRATE_USER}'
password: '${COMMON_MYSQL_MIGRATE_PASS}'
host: ${INPUT_DATABASE_HOST}
port: ${INPUT_DATABASE_PORT:-3307}
output:
username: '${COMMON_MYSQL_MIGRATE_USER}'
password: '${COMMON_MYSQL_MIGRATE_PASS}'
host: ${OUTPUT_DATABASE_HOST:-resultstore}
port: ${OUTPUT_DATABASE_PORT:-3306}
warehouse:
username: '${WAREHOUSE_USER:-dbadmin}'
password: '${WAREHOUSE_PASS:-}'
host: '${WAREHOUSE_HOST:-vertica}'
port: '${WAREHOUSE_PORT:-5433}'
EOL
echo "Initializing the runtime environment"
pushd /edx/app/edx_ansible/edx_ansible/docker/plays/
/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_pipeline.yml \
-i '127.0.0.1,' -c local \
-t "devstack:start" \
--extra-vars="@/tmp/ansible_overrides.yml"
popd
{{ HIVE_HOME }}/bin/hive -e "CREATE DATABASE IF NOT EXISTS docker"
exec "$@"
#
# Define logging for use with analytics tasks.
#
[loggers]
keys=root,edx_analytics,luigi_interface
[handlers]
keys=stdoutHandler,localHandler
[formatters]
keys=standard
[logger_root]
level=INFO
handlers=localHandler
[logger_edx_analytics]
# Errors from edx/analytics get routed to stderr.
level=DEBUG
handlers=stdoutHandler,localHandler
qualname=edx.analytics
propagate=0
[logger_luigi_interface]
# Errors from luigi-interface get routed to stdout.
level=DEBUG
handlers=stdoutHandler,localHandler
qualname=luigi-interface
propagate=0
[handler_stdoutHandler]
# Define as in luigi/interface.py.
class=StreamHandler
level=INFO
formatter=standard
args=(sys.stdout,)
[handler_localHandler]
# Define as in edx-platform/common/lib/logsettings.py (for dev logging, not syslog).
class=logging.handlers.RotatingFileHandler
formatter=standard
args=('{{ analytics_pipeline_log_dir }}/{{ analytics_pipeline_service_name }}.log', 'w')
[formatter_standard]
# Define as in edx-platform/common/lib/logsettings.py (for dev logging, not syslog).
format=%(asctime)s %(levelname)s %(process)d [%(name)s] %(filename)s:%(lineno)d - %(message)s
......@@ -5,14 +5,11 @@ edxlocal_debian_pkgs:
edxlocal_databases:
- "{{ ECOMMERCE_DEFAULT_DB_NAME | default(None) }}"
- "{{ INSIGHTS_DATABASE_NAME | default(None) }}"
- "{{ XQUEUE_MYSQL_DB_NAME | default(None) }}"
- "{{ EDXAPP_MYSQL_DB_NAME | default(None) }}"
- "{{ EDXAPP_MYSQL_CSMH_DB_NAME | default(None) }}"
- "{{ EDX_NOTES_API_MYSQL_DB_NAME | default(None) }}"
- "{{ PROGRAMS_DEFAULT_DB_NAME | default(None) }}"
- "{{ ANALYTICS_API_DEFAULT_DB_NAME | default(None) }}"
- "{{ ANALYTICS_API_REPORTS_DB_NAME | default(None) }}"
- "{{ CREDENTIALS_DEFAULT_DB_NAME | default(None) }}"
- "{{ DISCOVERY_DEFAULT_DB_NAME | default(None) }}"
......@@ -23,11 +20,6 @@ edxlocal_database_users:
pass: "{{ ECOMMERCE_DATABASE_PASSWORD | default(None) }}"
}
- {
db: "{{ INSIGHTS_DATABASE_NAME | default(None) }}",
user: "{{ INSIGHTS_MYSQL_USER | default(None) }}",
pass: "{{ INSIGHTS_MYSQL_USER | default(None) }}"
}
- {
db: "{{ XQUEUE_MYSQL_DB_NAME | default(None) }}",
user: "{{ XQUEUE_MYSQL_USER | default(None) }}",
pass: "{{ XQUEUE_MYSQL_PASSWORD | default(None) }}"
......@@ -48,16 +40,6 @@ edxlocal_database_users:
pass: "{{ PROGRAMS_DATABASE_PASSWORD | default(None) }}"
}
- {
db: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_NAME | default(None) }}",
user: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_USER | default(None) }}",
pass: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_PASSWORD | default(None) }}"
}
- {
db: "{{ HIVE_METASTORE_DATABASE_NAME | default(None) }}",
user: "{{ HIVE_METASTORE_DATABASE_USER | default(None) }}",
pass: "{{ HIVE_METASTORE_DATABASE_PASSWORD | default(None) }}"
}
- {
db: "{{ CREDENTIALS_DEFAULT_DB_NAME | default(None) }}",
user: "{{ CREDENTIALS_MYSQL_USER | default(None) }}",
pass: "{{ CREDENTIALS_MYSQL_PASSWORD | default(None) }}"
......
......@@ -36,27 +36,6 @@
when: item != None and item != ''
with_items: "{{ edxlocal_databases }}"
- name: create api user for the analytics api
mysql_user:
name: "api001"
password: "{{ ANALYTICS_API_DATABASES.default.PASSWORD }}"
priv: '{{ ANALYTICS_API_DATABASES.default.NAME }}.*:ALL/reports.*:SELECT'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: create read-only reports user for the analytics-api
mysql_user:
name: reports001
password: "{{ ANALYTICS_API_DATABASES.reports.PASSWORD }}"
priv: '{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:SELECT'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: create a database for the hive metastore
mysql_db:
db: "{{ HIVE_METASTORE_DATABASE.name }}"
state: "present"
encoding: "latin1"
when: HIVE_METASTORE_DATABASE is defined
- name: setup the edx-notes-api db user
mysql_user:
name: "{{ EDX_NOTES_API_MYSQL_DB_USER }}"
......
......@@ -11,17 +11,14 @@
# Defaults for role hadoop_common
#
HADOOP_COMMON_VERSION: 2.3.0
HADOOP_COMMON_VERSION: 2.7.2
HADOOP_COMMON_USER_HOME: "{{ COMMON_APP_DIR }}/hadoop"
HADOOP_COMMON_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hadoop"
HADOOP_COMMON_DATA: "{{ COMMON_DATA_DIR }}/hadoop"
# These are non-standard directories, but are where Hadoop expects to find them.
HADOOP_COMMON_LOGS: "{{ HADOOP_COMMON_HOME }}/logs"
HADOOP_COMMON_CONF_DIR: "{{ HADOOP_COMMON_HOME }}/etc/hadoop"
HADOOP_COMMON_PROTOBUF_VERSION: 2.5.0
HADOOP_COMMON_SERVICES_DIR: "{{ HADOOP_COMMON_USER_HOME }}/services.d"
HADOOP_COMMON_SERVICE_HEAP_MAX: 256
HADOOP_COMMON_TOOL_HEAP_MAX: 128
......@@ -32,15 +29,7 @@ hadoop_common_temporary_dir: /var/tmp
hadoop_common_dist:
filename: "hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://archive.apache.org/dist/hadoop/core/hadoop-{{ HADOOP_COMMON_VERSION }}/hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: 3fad58b525a47cf74458d0996564a2151c5a28baa1f92383e7932774deef5023
hadoop_common_protobuf_dist:
filename: "protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
url: "https://github.com/google/protobuf/releases/download/v{{ HADOOP_COMMON_PROTOBUF_VERSION }}/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
sha256sum: c55aa3dc538e6fd5eaf732f4eb6b98bdcb7cedb5b91d3b5bdcf29c98c293f58e
hadoop_common_native_dist:
filename: "release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://github.com/apache/hadoop-common/archive/release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: a8e1b49d4e891255d465e9449346ac7fb259bb35dce07d9f0df3b46fac3e9bd0
sha256sum: 49ad740f85d27fa39e744eb9e3b1d9442ae63d62720f0aabdae7aa9a718b03f7
hadoop_common_java_home: "{{ oraclejdk_link }}"
hadoop_common_env: "{{ HADOOP_COMMON_HOME }}/hadoop_env"
......@@ -49,38 +38,6 @@ hadoop_common_env: "{{ HADOOP_COMMON_HOME }}/hadoop_env"
#
hadoop_common_debian_pkgs:
- llvm-gcc
- build-essential
- make
- cmake
- automake
- autoconf
- libtool
- zlib1g-dev
- maven
- openssh-server
hadoop_common_redhat_pkgs: []
#
# MapReduce/Yarn memory config (defaults for m1.medium)
# http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/TaskConfiguration_H2.html
#
# mapred_site_config:
# mapreduce.map.memory_mb: 768
# mapreduce.map.java.opts: '-Xmx512M'
# mapreduce.reduce.memory.mb: 1024
# mapreduce.reduce.java.opts: '-Xmx768M'
# yarn_site_config:
# yarn.app.mapreduce.am.resource.mb: 1024
# yarn.scheduler.minimum-allocation-mb: 32
# yarn.scheduler.maximum-allocation-mb: 2048
# yarn.nodemanager.resource.memory-mb: 2048
# yarn.nodemanager.vmem-pmem-ratio: 2.1
mapred_site_config: {}
yarn_site_config:
yarn.log-aggregation-enable: true
# 24 hour log retention
yarn.log-aggregation.retain-seconds: 86400
......@@ -11,4 +11,5 @@
# Role includes for role hadoop_common
dependencies:
- common
- oraclejdk
\ No newline at end of file
......@@ -27,12 +27,18 @@
pkg: "{{ item }}"
state: present
with_items: "{{ hadoop_common_debian_pkgs }}"
tags:
- install
- install:system-requirements
- name: ensure group exists
group:
name: "{{ hadoop_common_group }}"
system: yes
state: present
tags:
- install
- install:system-requirements
- name: ensure user exists
user:
......@@ -44,22 +50,9 @@
system: yes
generate_ssh_key: yes
state: present
- name: own key authorized
file:
src: "{{ HADOOP_COMMON_USER_HOME }}/.ssh/id_rsa.pub"
dest: "{{ HADOOP_COMMON_USER_HOME }}/.ssh/authorized_keys"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: link
- name: ssh configured
template:
src: hadoop_user_ssh_config.j2
dest: "{{ HADOOP_COMMON_USER_HOME }}/.ssh/config"
mode: 0600
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
tags:
- install
- install:system-requirements
- name: ensure user is in sudoers
lineinfile:
......@@ -68,23 +61,36 @@
regexp: '^%hadoop ALL\='
line: '%hadoop ALL=(ALL) NOPASSWD:ALL'
validate: 'visudo -cf %s'
tags:
- install
- install:system-requirements
- name: check if downloaded and extracted
stat: path={{ HADOOP_COMMON_HOME }}
stat:
path: "{{ HADOOP_COMMON_HOME }}"
register: extracted_hadoop_dir
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url:
url: "{{ hadoop_common_dist.url }}"
sha256sum: "{{ hadoop_common_dist.sha256sum }}"
dest: "{{ hadoop_common_temporary_dir }}"
sha256sum: "{{ hadoop_common_dist.sha256sum }}"
when: not extracted_hadoop_dir.stat.exists
tags:
- install
- install:system-requirements
- name: distribution extracted
shell: "tar -xzf {{ hadoop_common_temporary_dir }}/{{ hadoop_common_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hadoop-{{ HADOOP_COMMON_VERSION }}"
args:
chdir: "{{ HADOOP_COMMON_USER_HOME }}"
when: not extracted_hadoop_dir.stat.exists
tags:
- install
- install:system-requirements
- name: versioned directory symlink created
file:
......@@ -93,6 +99,9 @@
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: link
tags:
- install
- install:system-requirements
- name: configuration installed
template:
......@@ -103,21 +112,9 @@
group: "{{ hadoop_common_group }}"
with_items:
- hadoop-env.sh
- mapred-site.xml
- core-site.xml
- hdfs-site.xml
- yarn-site.xml
- name: upstart scripts installed
template:
src: "{{ item }}.j2"
dest: "/etc/init/{{ item }}"
mode: 0640
owner: root
group: root
with_items:
- hdfs.conf
- yarn.conf
tags:
- install
- install:system-requirements
- name: hadoop env file exists
file:
......@@ -125,6 +122,9 @@
state: touch
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
tags:
- install
- install:system-requirements
- name: env vars sourced in bashrc
lineinfile:
......@@ -133,6 +133,9 @@
regexp: "^. {{ hadoop_common_env }}"
line: ". {{ hadoop_common_env }}"
insertbefore: BOF
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile:
......@@ -140,81 +143,6 @@
state: present
regexp: "^. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh"
line: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh"
- name: check if native libraries need to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
register: native_libs_built
- name: protobuf downloaded
get_url:
url: "{{ hadoop_common_protobuf_dist.url }}"
sha256sum: "{{ hadoop_common_protobuf_dist.sha256sum }}"
dest: "{{ hadoop_common_temporary_dir }}"
when: not native_libs_built.stat.exists
- name: protobuf extracted
shell: "tar -xzf {{ hadoop_common_protobuf_dist.filename }}"
args:
chdir: "{{ hadoop_common_temporary_dir }}"
when: not native_libs_built.stat.exists
- name: protobuf installed
shell: "./configure --prefix=/usr/local && make && make install"
args:
chdir: "{{ hadoop_common_temporary_dir }}/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}"
when: not native_libs_built.stat.exists
- name: native lib source downloaded
get_url:
url: "{{ hadoop_common_native_dist.url }}"
sha256sum: "{{ hadoop_common_native_dist.sha256sum }}"
dest: "{{ hadoop_common_temporary_dir }}/{{ hadoop_common_native_dist.filename }}"
when: not native_libs_built.stat.exists
- name: native lib source extracted
shell: "tar -xzf {{ hadoop_common_native_dist.filename }}"
args:
chdir: "{{ hadoop_common_temporary_dir }}"
when: not native_libs_built.stat.exists
- name: native lib built
shell: "mvn package -X -Pnative -DskipTests"
args:
chdir: "{{ hadoop_common_temporary_dir }}/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project"
environment:
LD_LIBRARY_PATH: /usr/local/lib
when: not native_libs_built.stat.exists
- name: old native libs renamed
shell: "mv {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.name }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.new_name }}"
with_items:
- { name: libhadoop.a, new_name: libhadoop32.a }
- { name: libhadoop.so, new_name: libhadoop32.so }
- { name: libhadoop.so.1.0.0, new_name: libhadoop32.so.1.0.0 }
when: not native_libs_built.stat.exists
- name: new native libs installed
shell: "chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ item }} && cp {{ item }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item }}"
args:
chdir={{ hadoop_common_temporary_dir }}/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project/hadoop-common/target/native/target/usr/local/lib
with_items:
- libhadoop.a
- libhadoop.so
- libhadoop.so.1.0.0
when: not native_libs_built.stat.exists
- name: native lib marker touched
file:
path: "{{ HADOOP_COMMON_USER_HOME }}/.native_libs_built"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: touch
when: not native_libs_built.stat.exists
- name: service directory exists
file:
path: "{{ HADOOP_COMMON_SERVICES_DIR }}"
mode: "0750"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: directory
tags:
- install
- install:system-requirements
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
\ No newline at end of file
Host localhost
StrictHostKeyChecking no
Host 0.0.0.0
StrictHostKeyChecking no
\ No newline at end of file
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/datanode</value>
</property>
</configuration>
\ No newline at end of file
description "hdfs"
start on starting yarn
stop on stopping yarn
setuid {{ hadoop_common_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-dfs.sh
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-dfs.sh
end script
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
{% if mapred_site_config is defined %}
{% for key,value in mapred_site_config.iteritems() %}
<property>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
{% endfor %}
{% endif %}
</configuration>
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
{% if yarn_site_config is defined %}
{% for key,value in yarn_site_config.iteritems() %}
<property>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
{% endfor %}
{% endif %}
</configuration>
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hadoop_master
#
#
# vars are namespace with the module name.
#
hadoop_master_role_name: hadoop_master
#
# OS packages
#
hadoop_master_debian_pkgs: []
hadoop_master_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hadoop_master
#
# Overview:
#
# Configures the machine to be a Hadoop master node. This generally means that it will
# run the HDFS name node and the yarn resource manager.
#
# Dependencies:
#
# hadoop_common: this role installs hadoop generically
#
- name: Data directories created
file:
path: "{{ HADOOP_COMMON_DATA }}/{{ item }}"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: directory
with_items:
- namenode
- datanode
- name: Check if namenode is formatted
stat:
path: "{{ HADOOP_COMMON_DATA }}/namenode/current/VERSION"
register: namenode_version_file
- name: Namenode formatted
shell: ". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs namenode -format"
become_user: "{{ hadoop_common_user }}"
when: not namenode_version_file.stat.exists
......@@ -12,25 +12,10 @@
#
HIVE_VERSION: 0.11.0
HIVE_MYSQL_CONNECTOR_VERSION: 5.1.29
HIVE_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hive"
HIVE_CONF: "{{ HIVE_HOME }}/conf"
HIVE_LIB: "{{ HIVE_HOME }}/lib"
HIVE_METASTORE_DATABASE_NAME: edx_hive_metastore
HIVE_METASTORE_DATABASE_USER: edx_hive
HIVE_METASTORE_DATABASE_PASSWORD: edx
HIVE_METASTORE_DATABASE_HOST: 127.0.0.1
HIVE_METASTORE_DATABASE_PORT: 3306
HIVE_METASTORE_DATABASE:
user: "{{ HIVE_METASTORE_DATABASE_USER }}"
password: "{{ HIVE_METASTORE_DATABASE_PASSWORD }}"
name: "{{ HIVE_METASTORE_DATABASE_NAME }}"
host: "{{ HIVE_METASTORE_DATABASE_HOST }}"
port: "{{ HIVE_METASTORE_DATABASE_PORT }}"
#
# vars are namespace with the module name.
#
......@@ -40,10 +25,9 @@ hive_dist:
filename: "hive-{{ HIVE_VERSION }}-bin.tar.gz"
url: "https://archive.apache.org/dist/hive/hive-{{ HIVE_VERSION }}/hive-{{ HIVE_VERSION }}-bin.tar.gz"
sha256sum: c22ee328438e80a8ee4b66979dba69650511a73f8b6edf2d87d93c74283578e5
hive_mysql_connector_dist:
filename: "mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
url: "http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
sha256sum: 04ad83b655066b626daaabb9676a00f6b4bc43f0c234cbafafac1209dcf1be73
hive_site_config:
javax.jdo.option.ConnectionURL: jdbc:derby:;databaseName=/var/tmp/metastore_db;create=true
#
# OS packages
......
......@@ -24,6 +24,9 @@
stat:
path: "{{ HIVE_HOME }}"
register: extracted_dir
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url:
......@@ -31,12 +34,18 @@
sha256sum: "{{ hive_dist.sha256sum }}"
dest: "{{ hive_temporary_dir }}"
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: distribution extracted
shell: "tar -xzf {{ hive_temporary_dir }}/{{ hive_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hive-{{ HIVE_VERSION }}-bin"
args:
chdir: "{{ HADOOP_COMMON_USER_HOME }}"
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: versioned directory symlink created
file:
......@@ -45,43 +54,31 @@
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: link
tags:
- install
- install:system-requirements
- name: hive mysql connector distribution downloaded
get_url:
url: "{{ hive_mysql_connector_dist.url }}"
sha256sum: "{{ hive_mysql_connector_dist.sha256sum }}"
dest: "{{ hive_temporary_dir }}"
when: not extracted_dir.stat.exists
- name: hive mysql connector distribution extracted
shell: "tar -xzf {{ hive_temporary_dir }}/{{ hive_mysql_connector_dist.filename }}"
args:
chdir: "{{ hive_temporary_dir }}"
when: not extracted_dir.stat.exists
- name: hive lib exists
file:
path: "{{ HIVE_LIB }}"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: directory
- name: hive mysql connector installed
shell: "cp mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ HIVE_LIB }} && chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HIVE_LIB }}/mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar"
args:
chdir: "/{{ hive_temporary_dir }}/mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}"
when: not extracted_dir.stat.exists
- name: configuration installed
- name: environment configuration installed
template:
src: "{{ item }}.j2"
dest: "{{ HIVE_CONF }}/{{ item }}"
src: "hive-env.sh.j2"
dest: "{{ HIVE_CONF }}/hive-env.sh"
mode: 0640
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
with_items:
- hive-env.sh
- hive-site.xml
tags:
- install
- install:system-requirements
- name: hive configured
hadoop_configuration:
property: "{{ item.key }}"
value: "{{ item.value }}"
path: "{{ HIVE_CONF }}/hive-site.xml"
become_user: "{{ hadoop_common_user }}"
with_dict: hive_site_config
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile:
......@@ -89,3 +86,6 @@
state: present
regexp: "^. {{ HIVE_CONF }}/hive-env.sh"
line: ". {{ HIVE_CONF }}/hive-env.sh"
tags:
- install
- install:system-requirements
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://{{ HIVE_METASTORE_DATABASE.host }}:{{ HIVE_METASTORE_DATABASE.port }}/{{ HIVE_METASTORE_DATABASE.name }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>{{ HIVE_METASTORE_DATABASE.user }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>{{ HIVE_METASTORE_DATABASE.password }}</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>true</value>
</property>
</configuration>
......@@ -54,25 +54,27 @@
- install:base
- name: install node dependencies
npm: executable={{ insights_nodeenv_bin }}/npm path={{ insights_code_dir }} production=yes
npm:
executable: "{{ insights_nodeenv_bin }}/npm"
path: "{{ insights_code_dir }}"
production: yes
become_user: "{{ insights_user }}"
environment: "{{ insights_environment }}"
tags:
- install
- install:app-requirements
environment: "{{ insights_environment }}"
- name: install bower dependencies
shell: ". {{ insights_venv_dir }}/bin/activate && . {{ insights_nodeenv_bin }}/activate && {{ insights_node_bin }}/bower install --production --config.interactive=false"
args:
chdir: "{{ insights_code_dir }}"
become_user: "{{ insights_user }}"
tags:
- install
- install:app-requirements
- name: migrate
shell: "DB_MIGRATION_USER='{{ COMMON_MYSQL_MIGRATE_USER }}' DB_MIGRATION_PASS='{{ COMMON_MYSQL_MIGRATE_PASS }}' {{ insights_venv_dir }}/bin/python {{ insights_manage }} migrate --noinput"
shell: "DB_MIGRATION_USER='{{ COMMON_MYSQL_MIGRATE_USER }}' DB_MIGRATION_PASS='{{ COMMON_MYSQL_MIGRATE_PASS }}' {{ insights_venv_dir }}/bin/python {{ insights_manage }} migrate --noinput --run-syncdb"
args:
chdir: "{{ insights_code_dir }}"
become_user: "{{ insights_user }}"
......@@ -81,6 +83,7 @@
tags:
- migrate
- migrate:db
- devstack:start
- name: run r.js optimizer
shell: ". {{ insights_nodeenv_bin }}/activate && {{ insights_node_bin }}/r.js -o build.js"
......@@ -185,3 +188,15 @@
become_user: "{{ supervisor_service_user }}"
tags:
- manage:start
- name: write devstack scripts
template:
src: "edx/app/insights/{{ item }}.j2"
dest: "/{{ item }}"
mode: 0744
with_items:
- "docker-entrypoint.sh"
- "docker-command.sh"
tags:
- devstack
- devstack:install
#!/usr/bin/env bash
# {{ ansible_managed }}
until [ -f {{ insights_log_dir }}/edx.log ]; do
sleep 1
done
exec tail -f {{ insights_log_dir }}/edx.log
\ No newline at end of file
#!/usr/bin/env bash
# {{ ansible_managed }}
set -e
export DEVSTACK_HOST="${DEVSTACK_HOST:-open.edx}"
cat >/ansible_overrides.yml <<EOL
---
migrate_db: 'yes'
DEVSTACK_HOST: ${DEVSTACK_HOST}
INSIGHTS_LMS_BASE: "http://${DEVSTACK_HOST}:8000"
INSIGHTS_CMS_BASE: "http://${DEVSTACK_HOST}:8010"
INSIGHTS_BASE_URL: "http://${DEVSTACK_HOST}:8110"
INSIGHTS_MEMCACHE:
- "${INSIGHTS_MEMCACHE:-memcache:11211}"
ANALYTICS_API_ENDPOINT: "${ANALYTICS_API_ENDPOINT:-http://analyticsapi:8100/api/v0}"
COMMON_MYSQL_MIGRATE_USER: "${COMMON_MYSQL_MIGRATE_USER:-root}"
COMMON_MYSQL_MIGRATE_PASS: "${COMMON_MYSQL_MIGRATE_PASS:-}"
INSIGHTS_DATABASES:
# rw user
default:
ENGINE: 'django.db.backends.mysql'
NAME: 'dashboard'
USER: 'insights001'
PASSWORD: 'password'
HOST: '${INSIGHTS_DATABASE_HOST:-resultstore}'
PORT: '${INSIGHTS_DATABASE_PORT:-3306}'
EOL
echo "Initializing the runtime environment"
/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook insights.yml \
-i '127.0.0.1,' -c local \
-t "install:app-configuration,devstack:start" \
--extra-vars="@/ansible_overrides.yml"
/edx/app/supervisor/venvs/supervisor/bin/supervisord --configuration /edx/app/supervisor/supervisord.conf
exec "$@"
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role luigid
#
#
# vars are namespaced with the module name.
#
LUIGID_GIT_IDENTITY: !!null
LUIGID_VERSION: "edx/release"
LUIGID_SERVICE_CONFIG: {}
LUIGID_REPOS:
- PROTOCOL: "{{ COMMON_GIT_PROTOCOL }}"
DOMAIN: "{{ COMMON_GIT_MIRROR }}"
PATH: "{{ COMMON_GIT_PATH }}"
REPO: luigi.git
VERSION: "{{ LUIGID_VERSION }}"
DESTINATION: "{{ luigid_code_dir }}"
SSH_KEY: "{{ LUIGID_GIT_IDENTITY }}"
#
# vars are namespace with the module name.
#
luigid_service_name: "luigid"
luigid_user: "{{ luigid_service_name }}"
luigid_home: "{{ COMMON_APP_DIR }}/{{ luigid_service_name }}"
luigid_code_dir: "{{ luigid_home }}/{{ luigid_service_name }}"
luigid_log_dir: "{{ COMMON_LOG_DIR }}/{{ luigid_service_name }}"
luigid_venv_dir: "{{ luigid_home }}/venvs/{{ luigid_service_name }}"
luigid_conf_dir: "/etc/luigi"
luigid_pidfile_path: "/var/run/luigid.pid"
luigid_bind_address: "0.0.0.0"
luigid_port: 9100
luigid_config:
record_task_history: "True"
luigid_task_history_connection: "sqlite:////var/tmp/luigi-task-hist.db"
luigid_python_requirements:
- "SQLAlchemy==1.1.4"
- "tornado==4.4.2"
#
# OS packages
#
luigid_debian_pkgs: []
luigid_redhat_pkgs: []
......@@ -8,7 +8,25 @@
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hadoop_master
# Role includes for role luigid
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_common
- common
- role: edx_service
edx_service_name: "{{ luigid_service_name }}"
edx_service_config: "{{ LUIGID_SERVICE_CONFIG }}"
edx_service_repos: "{{ LUIGID_REPOS }}"
edx_service_user: "{{ luigid_user }}"
edx_service_home: "{{ luigid_home }}"
edx_service_packages:
debian: "{{ luigid_debian_pkgs }}"
redhat: "{{ luigid_redhat_pkgs }}"
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://openedx.atlassian.net/wiki/display/OpenOPS
# code style: https://openedx.atlassian.net/wiki/display/OpenOPS/Ansible+Code+Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role luigid
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: install application requirements
pip: >
name="{{ item }}"
virtualenv="{{ luigid_venv_dir }}"
state=present
become_user: "{{ luigid_user }}"
with_items: "{{ luigid_python_requirements }}"
tags:
- install
- install:app-requirements
- name: install luigi
pip: >
name="{{ luigid_code_dir }}"
virtualenv="{{ luigid_venv_dir }}"
state=present
become_user: "{{ luigid_user }}"
tags:
- install
- install:app-requirements
- name: config directory created
file:
path: "{{ luigid_conf_dir }}"
state: directory
owner: "{{ luigid_user }}"
group: "{{ luigid_user }}"
mode: "0755"
tags:
- install
- install:base
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ luigid_conf_dir }}/{{ item }}
mode=0640 owner={{ luigid_user }} group={{ luigid_user }}
with_items:
- logging.conf
- client.cfg
tags:
- install
- name: upstart scripts installed
template: >
src={{ item }}.j2
dest=/etc/init/{{ item }}
mode=0640 owner=root group=root
with_items:
- luigid.conf
tags:
- install
[core]
logging_conf_file = /etc/luigi/logging.conf
[scheduler]
{% if luigid_config is defined %}
{% for key,value in luigid_config.iteritems() %}
{{ key }} = {{ value }}
{% endfor %}
{% endif %}
[task_history]
db_connection = {{ luigid_task_history_connection }}
\ No newline at end of file
[loggers]
keys=root,tornado,client,scheduler,server
[logger_root]
level=DEBUG
handlers=console
[logger_client]
level=DEBUG
handlers=console
qualname=luigi-interface
propagate=0
[logger_server]
level=DEBUG
handlers=console
qualname=luigi.server
propagate=0
[logger_scheduler]
level=DEBUG
handlers=console
qualname=luigi.scheduler
propagate=0
[logger_tornado]
level=DEBUG
handlers=console
qualname=tornado
propagate=0
[formatters]
keys=detail
[formatter_detail]
class=logging.Formatter
format=%(asctime)s %(name)-15s %(levelname)-8s %(message)s
[handlers]
keys=console
[handler_console]
level=WARNING
class=StreamHandler
args=(sys.stdout,)
formatter=detail
description "yarn"
description "luigid"
start on runlevel [2345]
stop on runlevel [!2345]
setuid {{ hadoop_common_user }}
setuid {{ luigid_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-yarn.sh
luigid --background --pidfile {{ luigid_pidfile_path }} --address={{ luigid_bind_address }} --port {{ luigid_port }} --state-path=/var/tmp/luigi-state.pickle
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-yarn.sh
end script
pkill -F {{ luigid_pidfile_path }}
end script
\ No newline at end of file
......@@ -23,6 +23,9 @@
stat:
path: "{{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar"
register: installed
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url:
......@@ -30,6 +33,9 @@
sha256sum: "{{ sqoop_dist.sha256sum }}"
dest: "{{ sqoop_temporary_dir }}"
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: Distribution extracted
unarchive:
......@@ -37,12 +43,18 @@
dest: "{{ HADOOP_COMMON_USER_HOME }}"
copy: no
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: Set the Permission
shell: chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ sqoop_base_filename }}
args:
chdir: "{{ HADOOP_COMMON_USER_HOME }}"
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: Versioned directory symlink created
file:
......@@ -51,6 +63,9 @@
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
state: link
tags:
- install
- install:system-requirements
- name: MySQL connector distribution downloaded
get_url:
......@@ -58,6 +73,9 @@
sha256sum: "{{ sqoop_mysql_connector_dist.sha256sum }}"
dest: "{{ sqoop_temporary_dir }}"
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: MySQL connector distribution extracted
unarchive:
......@@ -65,6 +83,9 @@
dest: "{{ sqoop_temporary_dir }}"
copy: no
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: Sqoop lib exists
file:
......@@ -72,6 +93,9 @@
state: directory
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
tags:
- install
- install:system-requirements
#TODO use copy module with remote_src: True once migrate to Ansible 2.x
- name: MySQL connector installed
......@@ -79,13 +103,19 @@
args:
chdir: "{{ sqoop_temporary_dir }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}"
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: FIx MySQL connector permission
- name: Fix MySQL connector permission
file:
path: "{{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar"
owner: "{{ hadoop_common_user }}"
group: "{{ hadoop_common_group }}"
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: Configuration installed
template:
......@@ -96,10 +126,16 @@
group: "{{ hadoop_common_group }}"
with_items:
- sqoop-env.sh
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile:
dest: "{{ hadoop_common_env }}"
state: present
regexp: "^. {{ SQOOP_CONF }}/sqoop-env.sh"
line: ". {{ SQOOP_CONF }}/sqoop-env.sh"
\ No newline at end of file
line: ". {{ SQOOP_CONF }}/sqoop-env.sh"
tags:
- install
- install:system-requirements
\ No newline at end of file
- name: Configure instance(s)
hosts: all
become: True
gather_facts: True
vars:
migrate_db: 'yes'
devstack: true
disable_edx_services: true
mongo_enable_journal: false
EDXAPP_NO_PREREQ_INSTALL: 0
COMMON_SSH_PASSWORD_AUTH: "yes"
EDXAPP_LMS_BASE: 127.0.0.1:8000
EDXAPP_OAUTH_ENFORCE_SECURE: false
EDXAPP_LMS_BASE_SCHEME: http
ECOMMERCE_DJANGO_SETTINGS_MODULE: "ecommerce.settings.devstack"
roles:
- common
- vhost
- edx_ansible
- mysql
- edxlocal
- memcache
- mongo
- { role: 'rabbitmq', rabbitmq_ip: '127.0.0.1' }
- edxapp
- oraclejdk
- elasticsearch
- forum
- ecommerce
- ecomworker
- programs
- role: notifier
NOTIFIER_DIGEST_TASK_INTERVAL: "5"
- analytics_api
- insights
- local_dev
- demo
- analytics_pipeline
- oauth_client_setup
......@@ -27,3 +27,4 @@ weights:
- elasticsearch: 7
- docker-tools: 3
- tools_jenkins: 8
- analytics_pipeline: 7
Vagrant.require_version ">= 1.6.5"
unless Vagrant.has_plugin?("vagrant-vbguest")
raise "Please install the vagrant-vbguest plugin by running `vagrant plugin install vagrant-vbguest`"
end
VAGRANTFILE_API_VERSION = "2"
MEMORY = 4096
CPU_COUNT = 2
vm_guest_ip = "192.168.33.10"
if ENV["VAGRANT_GUEST_IP"]
vm_guest_ip = ENV["VAGRANT_GUEST_IP"]
end
# These are versioning variables in the roles. Each can be overridden, first
# with OPENEDX_RELEASE, and then with a specific environment variable of the
# same name but upper-cased.
VERSION_VARS = [
'edx_platform_version',
'configuration_version',
'certs_version',
'forum_version',
'xqueue_version',
'demo_version',
'NOTIFIER_VERSION',
'ECOMMERCE_VERSION',
'ECOMMERCE_WORKER_VERSION',
'PROGRAMS_VERSION',
'ANALYTICS_API_VERSION',
'INSIGHTS_VERSION',
]
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
# Creates a devstack from a base Ubuntu 12.04 image for virtualbox
config.vm.box = "precise64"
config.vm.box_url = "http://files.vagrantup.com/precise64.box"
config.vm.network :private_network, ip: vm_guest_ip
# If you want to run the box but don't need network ports, set VAGRANT_NO_PORTS=1.
# This is useful if you want to run more than one box at once.
if not ENV['VAGRANT_NO_PORTS']
config.vm.network :forwarded_port, guest: 8000, host: 8000 # LMS
config.vm.network :forwarded_port, guest: 8001, host: 8001 # Studio
config.vm.network :forwarded_port, guest: 8002, host: 8002 # Ecommerce
config.vm.network :forwarded_port, guest: 8003, host: 8003 # LMS for Bok Choy
config.vm.network :forwarded_port, guest: 8004, host: 8004 # Programs
config.vm.network :forwarded_port, guest: 8031, host: 8031 # Studio for Bok Choy
config.vm.network :forwarded_port, guest: 8120, host: 8120 # edX Notes Service
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
end
config.ssh.insert_key = true
# Enable X11 forwarding so we can interact with GUI applications
if ENV['VAGRANT_X11']
config.ssh.forward_x11 = true
end
config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", MEMORY.to_s]
vb.customize ["modifyvm", :id, "--cpus", CPU_COUNT.to_s]
# Allow DNS to work for Ubuntu 12.10 host
# http://askubuntu.com/questions/238040/how-do-i-fix-name-service-for-vagrant-client
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
# Make LC_ALL default to en_US.UTF-8 instead of en_US.
# See: https://github.com/mitchellh/vagrant/issues/1188
config.vm.provision "shell", inline: 'echo \'LC_ALL="en_US.UTF-8"\' > /etc/default/locale'
# Use vagrant-vbguest plugin to make sure Guest Additions are in sync
config.vbguest.auto_reboot = true
config.vbguest.auto_update = true
config.vm.provision :ansible do |ansible|
ansible.playbook = "../../../playbooks/vagrant-analytics.yml"
ansible.verbose = "vvvv"
ansible.extra_vars = {}
VERSION_VARS.each do |var|
if ENV['OPENEDX_RELEASE']
ansible.extra_vars[var] = ENV['OPENEDX_RELEASE']
end
env_var = var.upcase
if ENV[env_var]
ansible.extra_vars[var] = ENV[env_var]
end
end
end
end
../../../playbooks/ansible.cfg
\ No newline at end of file
......@@ -50,10 +50,6 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
end
config.ssh.insert_key = true
......
Vagrant.require_version ">= 1.6.5"
unless Vagrant.has_plugin?("vagrant-vbguest")
raise "Please install the vagrant-vbguest plugin by running `vagrant plugin install vagrant-vbguest`"
end
VAGRANTFILE_API_VERSION = "2"
MEMORY = 4096
CPU_COUNT = 2
# These are versioning variables in the roles. Each can be overridden, first
# with OPENEDX_RELEASE, and then with a specific environment variable of the
# same name but upper-cased.
VERSION_VARS = [
'edx_platform_version',
'configuration_version',
'certs_version',
'forum_version',
'xqueue_version',
'demo_version',
'NOTIFIER_VERSION',
'ECOMMERCE_VERSION',
'ECOMMERCE_WORKER_VERSION',
'PROGRAMS_VERSION',
'ANALYTICS_API_VERSION',
'INSIGHTS_VERSION',
]
MOUNT_DIRS = {
:edx_platform => {:repo => "edx-platform", :local => "/edx/app/edxapp/edx-platform", :owner => "edxapp"},
:themes => {:repo => "themes", :local => "/edx/app/edxapp/themes", :owner => "edxapp"},
:forum => {:repo => "cs_comments_service", :local => "/edx/app/forum/cs_comments_service", :owner => "forum"},
:ecommerce => {:repo => "ecommerce", :local => "/edx/app/ecommerce/ecommerce", :owner => "ecommerce"},
:ecommerce_worker => {:repo => "ecommerce-worker", :local => "/edx/app/ecommerce_worker/ecommerce_worker", :owner => "ecommerce_worker"},
:programs => {:repo => "programs", :local => "/edx/app/programs/programs", :owner => "programs"},
:insights => {:repo => "insights", :local => "/edx/app/insights/edx_analytics_dashboard", :owner => "insights"},
:analytics_api => {:repo => "analytics_api", :local => "/edx/app/analytics_api/analytics_api", :owner => "analytics_api"},
:analytics_pipeline => {:repo => "edx-analytics-pipeline", :local => "/edx/app/analytics_pipeline/analytics_pipeline", :owner => "hadoop"},
# This src directory won't have useful permissions. You can set them from the
# vagrant user in the guest OS. "sudo chmod 0777 /edx/src" is useful.
:src => {:repo => "src", :local => "/edx/src", :owner => "root"},
}
if ENV['VAGRANT_MOUNT_BASE']
MOUNT_DIRS.each { |k, v| MOUNT_DIRS[k][:repo] = ENV['VAGRANT_MOUNT_BASE'] + "/" + MOUNT_DIRS[k][:repo] }
end
# map the name of the git branch that we use for a release
# to a name and a file path, which are used for retrieving
# a Vagrant box from the internet.
openedx_releases = {
"open-release/eucalyptus" => "eucalyptus-analyticstack-2016-09-04",
"open-release/eucalyptus.2" => "eucalyptus-analyticstack-2016-09-04",
"named-release/dogwood.3" => {
:name => "analyticstack", :file => "dogwood-analyticstack-2016-03-15.box",
},
"named-release/dogwood" => {
:name => "analyticstack", :file => "dogwood-analyticstack-2016-03-15.box",
},
}
openedx_releases.default = {
:name => "analyticstack", :file => "analyticstack-latest.box",
}
openedx_release = ENV['OPENEDX_RELEASE']
# Build -e override lines for each overridable variable.
extra_vars_lines = ""
VERSION_VARS.each do |var|
rel = ENV[var.upcase] || openedx_release
if rel
extra_vars_lines += "-e #{var}=#{rel} \\\n"
end
end
$script = <<SCRIPT
if [ ! -d /edx/app/edx_ansible ]; then
echo "Error: Base box is missing provisioning scripts." 1>&2
exit 1
fi
export PYTHONUNBUFFERED=1
source /edx/app/edx_ansible/venvs/edx_ansible/bin/activate
cd /edx/app/edx_ansible/edx_ansible/playbooks
EXTRA_VARS="#{extra_vars_lines}"
CONFIG_VER="#{ENV['CONFIGURATION_VERSION'] || openedx_release || 'master'}"
ansible-playbook -i localhost, -c local run_role.yml -e role=edx_ansible -e configuration_version=$CONFIG_VER $EXTRA_VARS
ansible-playbook -i localhost, -c local vagrant-analytics.yml -e configuration_version=$CONFIG_VER $EXTRA_VARS -e ELASTICSEARCH_CLUSTER_MEMBERS=[]
SCRIPT
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
reldata = openedx_releases[openedx_release]
if Hash == reldata.class
boxname = openedx_releases[openedx_release][:name]
boxfile = openedx_releases[openedx_release].fetch(:file, "#{boxname}.box")
else
boxname = reldata
boxfile = "#{boxname}.box"
end
# Creates an edX devstack VM from an official release
config.vm.box = boxname
config.vm.box_url = "http://files.edx.org/vagrant-images/#{boxfile}"
config.vm.network :private_network, ip: "192.168.33.10"
# If you want to run the box but don't need network ports, set VAGRANT_NO_PORTS=1.
# This is useful if you want to run more than one box at once.
if not ENV['VAGRANT_NO_PORTS']
config.vm.network :forwarded_port, guest: 8000, host: 8000 # LMS
config.vm.network :forwarded_port, guest: 8001, host: 8001 # Studio
config.vm.network :forwarded_port, guest: 8002, host: 8002 # Ecommerce
config.vm.network :forwarded_port, guest: 8003, host: 8003 # LMS for Bok Choy
config.vm.network :forwarded_port, guest: 8031, host: 8031 # Studio for Bok Choy
config.vm.network :forwarded_port, guest: 8120, host: 8120 # edX Notes Service
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200 # Elasticsearch
config.vm.network :forwarded_port, guest: 18080, host: 18080 # Forums
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 9876, host: 9876 # ORA2 Karma tests
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
end
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
# Enable X11 forwarding so we can interact with GUI applications
if ENV['VAGRANT_X11']
config.ssh.forward_x11 = true
end
if ENV['VAGRANT_USE_VBOXFS'] == 'true'
MOUNT_DIRS.each { |k, v|
config.vm.synced_folder v[:repo], v[:local], create: true, owner: v[:owner], group: "www-data"
}
else
MOUNT_DIRS.each { |k, v|
config.vm.synced_folder v[:repo], v[:local], create: true, nfs: true
}
end
config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", MEMORY.to_s]
vb.customize ["modifyvm", :id, "--cpus", CPU_COUNT.to_s]
# Allow DNS to work for Ubuntu 12.10 host
# http://askubuntu.com/questions/238040/how-do-i-fix-name-service-for-vagrant-client
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
# Use vagrant-vbguest plugin to make sure Guest Additions are in sync
config.vbguest.auto_reboot = true
config.vbguest.auto_update = true
# Assume that the base box has the edx_ansible role installed
# We can then tell the Vagrant instance to update itself.
config.vm.provision "shell", inline: $script
end
......@@ -123,11 +123,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200 # Elasticsearch
config.vm.network :forwarded_port, guest: 18080, host: 18080 # Forums
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 9876, host: 9876 # ORA2 Karma tests
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
config.vm.network :forwarded_port, guest: 3306, host: 3307 # Expose the LMS database
end
config.ssh.insert_key = true
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment