Commit 7801638c by Gabe Mulley

support building and running edx-analytics-pipeline in a docker container

parent 9851c3bd
# To build this Dockerfile:
#
# From the root of configuration:
#
# docker build -f docker/build/analytics-pipeline/Dockerfile .
#
# This allows the dockerfile to update /edx/app/edx_ansible/edx_ansible
# with the currently checked-out configuration repo.
FROM edxops/precise-common:latest
MAINTAINER edxops
ENV ANALYTICS_PIPELINE_VERSION=master
ENV REPO_OWNER=edx
ADD . /edx/app/edx_ansible/edx_ansible
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
COPY docker/build/analytics-pipeline/ansible_overrides.yml /
RUN sudo /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_pipeline.yml \
-c local -i '127.0.0.1,' \
-t 'install,devstack:install' \
--extra-vars="@/ansible_overrides.yml" \
--extra-vars="ANALYTICS_PIPELINE_VERSION=$ANALYTICS_PIPELINE_VERSION" \
--extra-vars="COMMON_GIT_PATH=$REPO_OWNER"
USER root
CMD ["/edx/app/analytics_pipeline/devstack.sh"]
VOLUME ["/edx/app/analytics_pipeline/analytics_pipeline", "/data"]
# Hdfs ports
EXPOSE 50010 50020 50070 50075 50090
# Mapred ports
EXPOSE 19888
#Yarn ports
EXPOSE 8030 8031 8032 8033 8040 8042 8088
---
DOCKER_TLD: "edx"
ANALYTICS_PIPELINE_DB_ADMIN_PASSWORD: password
HIVE_METASTORE_DB_ADMIN_PASSWORD: password
ANALYTICS_API_REPORTS_DB_NAME: reports
ANALYTICS_PIPELINE_OUTPUT_DATABASE:
username: pipeline001
password: password
host: 172.17.0.1
port: 3306
ANALYTICS_PIPELINE_INPUT_DATABASE:
username: root
password: password
host: 172.17.0.1
port: 3306
HIVE_METASTORE_DATABASE:
user: edx_hive
password: edx
name: edx_hive_metastore
host: 172.17.0.1
port: 3306
\ No newline at end of file
FROM edxoperations/precise-common:v2
MAINTAINER edxops
ENV CONFIG_BRANCH hack2015/gabe/analytics-hadoop
USER docker
WORKDIR /edx/app/edx_ansible/edx_ansible
RUN sudo git fetch --all
RUN sudo git checkout $CONFIG_BRANCH
RUN sudo git reset --hard origin/$CONFIG_BRANCH
RUN sudo git pull
WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays
RUN sudo ansible-playbook analytics_pipeline.yml -c local
USER root
CMD ["/edx/bin/analytics-pipeline-start.sh"]
#
# Single Docker Compose cluster that will eventually start
# all edX services in a single flock of coordinated containers
#
# This work is currently experimental and a number of services
# are missing entirely. Containers that are present will not
# currently work without manual steps. We are working on
# addressing that.
#
# When running compose you must pass in two environment variables
#
# DOCKER_EDX_ROOT which points to the directory into which you checkout
# your edX source code. For example, assuming the following directory
# structure under /home/me
#
# |-- edx-src
# | |-- course-discovery
# | |-- cs_comments_service
# | |-- edx_course_discovery
# | |-- edx-platform
# | |-- xqueue
# you would define DOCKER_EDX_ROOT="/home/me/edx-src"
#
# DOCKER_DATA_ROOT is the location on your host machine where Docker
# guests can access your local filesystem for storing persistent data
# files, say MongoDB or MySQL data files.
#
db:
container_name: db
image: mysql:5.6
environment:
- MYSQL_ROOT_PASSWORD=password
- MYSQL_USER=migrate
- MYSQL_PASSWORD=password
volumes:
- ${DOCKER_DATA_ROOT}/mysql/data:/data
ports:
- 3306:3306
es1_5:
container_name: es1_5
image: elasticsearch:1.5
volumes:
- ${DOCKER_DATA_ROOT}/elasticsearch/data:/data
ports:
- 9201:9200
- 9301:9300
analytics_pipeline:
container_name: analytics_pipeline
image: edxops/analytics-pipeline
volumes:
- ${DOCKER_EDX_ROOT}/edx-analytics-pipeline:/edx/app/analytics_pipeline/analytics_pipeline
- ${DOCKER_DATA_ROOT}/analytics_pipeline/hadoop:/edx/var/hadoop
- ${DOCKER_DATA_ROOT}/analytics_pipeline/data:/data
environment:
- GITHUB_USER
ports:
# HDFS
# - 50010:50010
# - 50020:50020
- 50070:50070
# - 50075:50075
# - 50090:50090
# # Mapred
# - 19888:19888
# Yarn
# - 8030:8030
# - 8031:8031
# - 8032:8032
# - 8033:8033
# - 8040:8040
# - 8042:8042
- 8088:8088
lms:
container_name: lms
image: edxops/edxapp:v2
ports:
- 8000:8000
- 18000:18000
volumes:
- ${DOCKER_EDX_ROOT}/edx-platform:/edx/app/edxapp/edx-platform
......@@ -2,5 +2,8 @@
hosts: all
sudo: True
gather_facts: True
vars:
serial_count: 1
serial: "{{ serial_count }}"
roles:
- analytics_pipeline
......@@ -10,6 +10,26 @@
##
# Defaults for role analytics_pipeline
#
ANALYTICS_PIPELINE_GIT_IDENTITY: !!null
ANALYTICS_PIPELINE_VERSION: master
ANALYTICS_PIPELINE_WHEEL_URL: http://edx-wheelhouse.s3-website-us-east-1.amazonaws.com/Ubuntu/precise
ANALYTICS_PIPELINE_PYTHON_VERSION: 2.7
ANALYTICS_PIPELINE_PYTHON: "/usr/bin/python{{ ANALYTICS_PIPELINE_PYTHON_VERSION }}"
ANALYTICS_PIPELINE_REPOS:
- PROTOCOL: "{{ COMMON_GIT_PROTOCOL }}"
DOMAIN: "{{ COMMON_GIT_MIRROR }}"
PATH: "{{ COMMON_GIT_PATH }}"
REPO: edx-analytics-pipeline.git
VERSION: "{{ ANALYTICS_PIPELINE_VERSION }}"
DESTINATION: "{{ analytics_pipeline_code_dir }}"
SSH_KEY: "{{ ANALYTICS_PIPELINE_GIT_IDENTITY }}"
ANALYTICS_PIPELINE_SERVICE_CONFIG: {}
ANALYTICS_PIPELINE_DB_ADMIN_USER: 'root'
ANALYTICS_PIPELINE_DB_ADMIN_PASSWORD: ''
ANALYTICS_PIPELINE_OUTPUT_DATABASE_NAME: "{{ ANALYTICS_API_REPORTS_DB_NAME }}"
ANALYTICS_PIPELINE_OUTPUT_DATABASE:
......@@ -24,8 +44,8 @@ ANALYTICS_PIPELINE_INPUT_DATABASE:
host: localhost
port: 3306
ANALYTICS_PIPELINE_CONFIG_DIR: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline"
ANALYTICS_PIPELINE_HDFS_DATA_DIR: "hdfs://localhost:9000/data"
ANALYTICS_PIPELINE_LOCAL_DATA_DIR: /data
ANALYTICS_PIPELINE_LUIGI_HADOOP_VERSION: cdh4
ANALYTICS_PIPELINE_LUIGI_HADOOP_COMMAND: "{{ HADOOP_COMMON_HOME }}/bin/hadoop"
......@@ -41,6 +61,19 @@ analytics_pipeline_util_library:
repo: https://github.com/edx/edx-analytics-hadoop-util
version: master
analytics_pipeline_service_name: analytics_pipeline
analytics_pipeline_venv_dir: "{{ analytics_pipeline_home }}/venvs/{{ analytics_pipeline_service_name }}"
analytics_pipeline_user: "{{ analytics_pipeline_service_name }}"
analytics_pipeline_home: "{{ COMMON_APP_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_code_dir: "{{ analytics_pipeline_home }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_log_dir: "{{ COMMON_LOG_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_config_dir: "{{ COMMON_LOG_DIR }}/{{ analytics_pipeline_service_name }}"
analytics_pipeline_install_env:
WHEEL_URL: "{{ ANALYTICS_PIPELINE_WHEEL_URL }}"
WHEEL_PYVER: "{{ ANALYTICS_PIPELINE_PYTHON_VERSION }}"
#
# OS packages
#
......
......@@ -15,3 +15,12 @@ dependencies:
- hadoop_master
- hive
- sqoop
- role: edx_service
edx_service_name: "{{ analytics_pipeline_service_name }}"
edx_service_config: "{{ ANALYTICS_PIPELINE_SERVICE_CONFIG }}"
edx_service_repos: "{{ ANALYTICS_PIPELINE_REPOS }}"
edx_service_user: "{{ analytics_pipeline_user }}"
edx_service_home: "{{ analytics_pipeline_home }}"
edx_service_packages:
debian: "{{ analytics_pipeline_debian_pkgs }}"
redhat: "{{ analytics_pipeline_redhat_pkgs }}"
\ No newline at end of file
......@@ -37,10 +37,73 @@
# ansible-playbook -i 'localhost,' ./analytics_pipeline.yml -e@/ansible/vars/deployment.yml -e@/ansible/vars/env-deployment.yml
#
- name: ensure system packages are installed
command: >
make system-requirements
chdir={{ analytics_pipeline_code_dir }}
sudo: True
tags:
- install
- install:system-requirements
- name: build virtualenv
command: "virtualenv --python={{ ANALYTICS_PIPELINE_PYTHON }} {{ analytics_pipeline_venv_dir }}"
args:
creates: "{{ analytics_pipeline_venv_dir }}/bin/pip"
sudo_user: "{{ analytics_pipeline_user }}"
tags:
- install
- install:system-requirements
- name: installed
shell: >
. {{ analytics_pipeline_venv_dir }}/bin/activate && make install
chdir={{ analytics_pipeline_code_dir }}
environment: analytics_pipeline_install_env
tags:
- install
- install:app-requirements
- name: converted to development mode
shell: >
. {{ analytics_pipeline_venv_dir }}/bin/activate && make uninstall && make develop
chdir={{ analytics_pipeline_code_dir }}
environment: analytics_pipeline_install_env
tags:
- devstack
- devstack:install
- name: development config installed
file: >
src={{ analytics_pipeline_code_dir }}/config/devstack.cfg
dest={{ analytics_pipeline_code_dir }}/override.cfg
owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
state=link
tags:
- devstack
- devstack:install
- name: create log directory
file: >
path="{{ analytics_pipeline_log_dir }}"
mode=0777 owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
state=directory
tags:
- install
- install:configuration
- name: logging configured
template: >
src=logging.cfg.j2
dest={{ analytics_pipeline_code_dir }}/logging.cfg
tags:
- install
- install:configuration
- name: create config directory
file: >
path="{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
mode=0755 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
path="{{ analytics_pipeline_config_dir }}"
mode=0755 owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
state=directory
tags:
- install
......@@ -49,8 +112,8 @@
- name: store output database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
dest={{ analytics_pipeline_config_dir }}/output.json
mode=0644 owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
tags:
- install
- install:configuration
......@@ -58,8 +121,8 @@
- name: store input database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
dest={{ analytics_pipeline_config_dir }}/input.json
mode=0644 owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
tags:
- install
- install:configuration
......@@ -117,43 +180,73 @@
- install
- install:app-requirements
- name: pipeline start script file installed
template: >
src=analytics-pipeline-start.sh.j2
dest={{ COMMON_BIN_DIR }}/analytics-pipeline-start.sh
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} mode=755
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }}
regexp="^. {{ analytics_pipeline_venv_dir }}/bin/activate"
line=". {{ analytics_pipeline_venv_dir }}/bin/activate"
state=present
tags:
- install
- install:configuration
#- name: ensure hdfs services are started
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HADOOP_COMMON_HOME }}/sbin/start-dfs.sh
#
#- name: ensure hdfs services are started
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HADOOP_COMMON_HOME }}/sbin/start-yarn.sh
#
#- name: ensure package dir exists in HDFS
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure util library is in HDFS
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure the data directory exists
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure tracking log file can be read
# file: >
# path={{ COMMON_LOG_DIR }}/tracking/tracking.log
# mode=0644
# ignore_errors: yes
#
#- name: cron job syncs tracking log file to hdfs
# cron: >
# user={{ hadoop_common_user }}
# name="Sync tracking log to HDFS"
# job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
- name: write devstack script
template:
src: "edx/app/analytics_pipeline/devstack.sh.j2"
dest: "{{ analytics_pipeline_home }}/devstack.sh"
owner: "{{ analytics_pipeline_user }}"
group: "{{ analytics_pipeline_user }}"
mode: 0744
tags:
- devstack
- devstack:install
- name: create data directory
file: >
path="{{ ANALYTICS_PIPELINE_LOCAL_DATA_DIR }}"
mode=0777 owner={{ analytics_pipeline_user }} group={{ analytics_pipeline_user }}
state=directory
tags:
- devstack
- devstack:install
- name: ensure tracking log file can be read
file: >
path={{ ANALYTICS_PIPELINE_LOCAL_DATA_DIR }}/tracking.log
mode=0644
ignore_errors: yes
tags:
- devstack
- devstack:install
- name: cron job syncs tracking log file to hdfs
cron: >
user={{ hadoop_common_user }}
name="Sync tracking log to HDFS"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ ANALYTICS_PIPELINE_LOCAL_DATA_DIR }}/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
tags:
- devstack
- devstack:install
- name: wait for database
wait_for:
host: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.host }}"
port: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.port }}"
delay: 2
tags:
- devstack
- devstack:migrate
- name: create database users
mysql_user:
login_host: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.host }}"
login_port: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.port }}"
login_user: "{{ ANALYTICS_PIPELINE_DB_ADMIN_USER }}"
login_password: "{{ ANALYTICS_PIPELINE_DB_ADMIN_PASSWORD }}"
name: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.username }}"
host: "%"
password: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE.password }}"
priv: "{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE_NAME }}.*:ALL"
tags:
- devstack
- devstack:migrate
#!/bin/bash
if [ -n "$GITHUB_USER" ]
then
sudo usermod docker -s /bin/bash
su docker -c "mkdir -p /home/docker/.ssh"
echo >> /home/docker/.ssh/authorized_keys
curl https://github.com/$GITHUB_USER.keys >> /home/docker/.ssh/authorized_keys
fi
# remote-task will be running ansible, and this directory will be owned by root, just clean it up.
sudo rm -rf /home/docker/.ansible
/usr/sbin/sshd
HADOOP_HOME={{ HADOOP_COMMON_HOME }}
su hadoop -c "$HADOOP_HOME/sbin/start-dfs.sh"
su hadoop -c "$HADOOP_HOME/sbin/start-yarn.sh"
su hadoop -c "$HADOOP_HOME/bin/hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
su hadoop -c "$HADOOP_HOME/bin/hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
su hadoop -c "$HADOOP_HOME/bin/hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
tail -f $HADOOP_HOME/logs/*.log
\ No newline at end of file
#!/usr/bin/env bash
# {{ ansible_managed }}
echo "[sshd] starting"
/usr/sbin/sshd
echo "[sshd] started"
HADOOP_HOME={{ HADOOP_COMMON_HOME }}
HADOOP_USER={{ hadoop_common_user }}
if [ ! -f "{{ HADOOP_COMMON_DATA }}/namenode/current/VERSION" ]
then
echo "[HDFS] formatting"
chmod 777 {{ HADOOP_COMMON_DATA }}
su ${HADOOP_USER} -c "$HADOOP_HOME/bin/hdfs namenode -format"
echo "[HDFS] formatted"
fi
echo "[HDFS] starting"
su ${HADOOP_USER} -c "$HADOOP_HOME/sbin/start-dfs.sh"
echo "[HDFS] started"
echo "[yarn] starting"
su ${HADOOP_USER} -c "$HADOOP_HOME/sbin/start-yarn.sh"
echo "[yarn] started"
echo -n "Initializing HDFS files and directories... "
su ${HADOOP_USER} -c "$HADOOP_HOME/bin/hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
su ${HADOOP_USER} -c "$HADOOP_HOME/bin/hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
su ${HADOOP_USER} -c "$HADOOP_HOME/bin/hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
echo "done"
if [ -n "$GITHUB_USER" ]
then
echo "Creating user account for ${GITHUB_USER}"
cd /edx/app/edx_ansible/edx_ansible/playbooks/edx-east
/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook create_user.yml -c local -i '127.0.0.1,' \
--extra-vars="user=$GITHUB_USER" \
--extra-vars="give_sudo=True"
echo "Creating user account for ${GITHUB_USER}... done"
fi
echo "Configuring development environment"
cd /edx/app/edx_ansible/edx_ansible/docker/plays
/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook analytics_pipeline.yml -c local -i '127.0.0.1,' \
-t 'install:app-requirements,devstack' \
--extra-vars="migrate_db=yes" \
--extra-vars="@/ansible_overrides.yml"
echo "Configuring development environment... done"
echo "[cron] starting"
/usr/sbin/cron
echo "[cron] started"
tail -f $HADOOP_HOME/logs/*.log
#
# Define logging for use with analytics tasks.
#
[loggers]
keys=root,edx_analytics,luigi_interface
[handlers]
keys=stdoutHandler,localHandler
[formatters]
keys=standard
[logger_root]
level=INFO
handlers=localHandler
[logger_edx_analytics]
# Errors from edx/analytics get routed to stderr.
level=DEBUG
handlers=stdoutHandler,localHandler
qualname=edx.analytics
propagate=0
[logger_luigi_interface]
# Errors from luigi-interface get routed to stdout.
level=DEBUG
handlers=stdoutHandler,localHandler
qualname=luigi-interface
propagate=0
[handler_stdoutHandler]
# Define as in luigi/interface.py.
class=StreamHandler
level=INFO
formatter=standard
args=(sys.stdout,)
[handler_localHandler]
# Define as in edx-platform/common/lib/logsettings.py (for dev logging, not syslog).
class=logging.handlers.RotatingFileHandler
formatter=standard
args=('{{ analytics_pipeline_log_dir }}/{{ analytics_pipeline_service_name }}.log', 'w')
[formatter_standard]
# Define as in edx-platform/common/lib/logsettings.py (for dev logging, not syslog).
format=%(asctime)s %(levelname)s %(process)d [%(name)s] %(filename)s:%(lineno)d - %(message)s
......@@ -27,9 +27,15 @@
pkg={{ item }}
state=present
with_items: hadoop_common_debian_pkgs
tags:
- install
- install:system-requirements
- name: ensure group exists
group: name={{ hadoop_common_group }} system=yes state=present
tags:
- install
- install:system-requirements
- name: ensure user exists
user: >
......@@ -38,28 +44,43 @@
home={{ HADOOP_COMMON_USER_HOME }} createhome=yes
shell=/bin/bash system=yes generate_ssh_key=yes
state=present
tags:
- install
- install:system-requirements
- name: own key authorized
file: >
src={{ HADOOP_COMMON_USER_HOME }}/.ssh/id_rsa.pub
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/authorized_keys
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
tags:
- install
- install:system-requirements
- name: ssh configured
template: >
src=hadoop_user_ssh_config.j2
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/config
mode=0600 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
tags:
- install
- install:system-requirements
- name: ensure user is in sudoers
lineinfile: >
dest=/etc/sudoers state=present
regexp='^%hadoop ALL\=' line='%hadoop ALL=(ALL) NOPASSWD:ALL'
validate='visudo -cf %s'
tags:
- install
- install:system-requirements
- name: check if downloaded and extracted
stat: path={{ HADOOP_COMMON_HOME }}
register: extracted_hadoop_dir
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url: >
......@@ -67,18 +88,27 @@
sha256sum={{ hadoop_common_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}
when: not extracted_hadoop_dir.stat.exists
tags:
- install
- install:system-requirements
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ hadoop_common_temporary_dir }}/{{ hadoop_common_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hadoop-{{ HADOOP_COMMON_VERSION }}
when: not extracted_hadoop_dir.stat.exists
tags:
- install
- install:system-requirements
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hadoop-{{ HADOOP_COMMON_VERSION }}
dest={{ HADOOP_COMMON_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
tags:
- install
- install:system-requirements
- name: configuration installed
template: >
......@@ -91,6 +121,9 @@
- core-site.xml
- hdfs-site.xml
- yarn-site.xml
tags:
- install
- install:system-requirements
- name: upstart scripts installed
template: >
......@@ -100,11 +133,17 @@
with_items:
- hdfs.conf
- yarn.conf
tags:
- install
- install:system-requirements
- name: hadoop env file exists
file: >
path={{ hadoop_common_env }} state=touch
owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
tags:
- install
- install:system-requirements
- name: env vars sourced in bashrc
lineinfile: >
......@@ -113,15 +152,24 @@
regexp="^. {{ hadoop_common_env }}"
line=". {{ hadoop_common_env }}"
insertbefore=BOF
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh" line=". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh"
tags:
- install
- install:system-requirements
- name: check if native libraries need to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
register: native_libs_built
tags:
- install
- install:system-requirements
- name: protobuf downloaded
get_url: >
......@@ -129,18 +177,27 @@
sha256sum={{ hadoop_common_protobuf_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: protobuf extracted
shell: >
chdir={{ hadoop_common_temporary_dir }}
tar -xzf {{ hadoop_common_protobuf_dist.filename }}
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: protobuf installed
shell: >
chdir={{ hadoop_common_temporary_dir }}/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}
./configure --prefix=/usr/local && make && make install
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: native lib source downloaded
get_url: >
......@@ -148,12 +205,18 @@
sha256sum={{ hadoop_common_native_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}/{{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: native lib source extracted
shell: >
chdir={{ hadoop_common_temporary_dir }}
tar -xzf {{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: native lib built
shell: >
......@@ -162,6 +225,9 @@
environment:
LD_LIBRARY_PATH: /usr/local/lib
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: old native libs renamed
shell: >
......@@ -171,6 +237,9 @@
- { name: libhadoop.so, new_name: libhadoop32.so }
- { name: libhadoop.so.1.0.0, new_name: libhadoop32.so.1.0.0 }
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: new native libs installed
shell: >
......@@ -181,24 +250,39 @@
- libhadoop.so
- libhadoop.so.1.0.0
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: native lib marker touched
file: >
path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=touch
when: not native_libs_built.stat.exists
tags:
- install
- install:system-requirements
- name: service directory exists
file: >
path={{ HADOOP_COMMON_SERVICES_DIR }}
mode=0750 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
tags:
- install
- install:system-requirements
- name: ssh pid directory exists
file: >
path=/var/run/sshd
mode=0755 owner=root group=root
state=directory
tags:
- install
- install:system-requirements
- name: sshd service started
shell: /usr/sbin/sshd
tags:
- install
- install:system-requirements
......@@ -28,13 +28,22 @@
with_items:
- namenode
- datanode
tags:
- install
- install:system-requirements
- name: check if namenode is formatted
stat: path={{ HADOOP_COMMON_DATA }}/namenode/current/VERSION
register: namenode_version_file
tags:
- install
- install:system-requirements
- name: namenode formatted
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs namenode -format
sudo_user: "{{ hadoop_common_user }}"
when: not namenode_version_file.stat.exists
tags:
- install
- install:system-requirements
......@@ -17,6 +17,9 @@ HIVE_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hive"
HIVE_CONF: "{{ HIVE_HOME }}/conf"
HIVE_LIB: "{{ HIVE_HOME }}/lib"
HIVE_METASTORE_DB_ADMIN_USER: root
HIVE_METASTORE_DB_ADMIN_PASSWORD: ''
HIVE_METASTORE_DATABASE_NAME: edx_hive_metastore
HIVE_METASTORE_DATABASE:
user: edx_hive
......
......@@ -23,6 +23,9 @@
- name: check if downloaded and extracted
stat: path={{ HIVE_HOME }}
register: extracted_dir
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url: >
......@@ -30,18 +33,27 @@
sha256sum={{ hive_dist.sha256sum }}
dest={{ hive_temporary_dir }}
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ hive_temporary_dir }}/{{ hive_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hive-{{ HIVE_VERSION }}-bin
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hive-{{ HIVE_VERSION }}-bin
dest={{ HIVE_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
tags:
- install
- install:system-requirements
- name: hive mysql connector distribution downloaded
get_url: >
......@@ -49,17 +61,26 @@
sha256sum={{ hive_mysql_connector_dist.sha256sum }}
dest={{ hive_temporary_dir }}
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: hive mysql connector distribution extracted
shell: >
chdir={{ hive_temporary_dir }}
tar -xzf {{ hive_temporary_dir }}/{{ hive_mysql_connector_dist.filename }}
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: hive lib exists
file: >
path={{ HIVE_LIB }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
tags:
- install
- install:system-requirements
- name: hive mysql connector installed
shell: >
......@@ -67,6 +88,9 @@
cp mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ HIVE_LIB }} &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HIVE_LIB }}/mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar
when: not extracted_dir.stat.exists
tags:
- install
- install:system-requirements
- name: configuration installed
template: >
......@@ -75,8 +99,53 @@
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- hive-env.sh
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HIVE_CONF }}/hive-env.sh" line=". {{ HIVE_CONF }}/hive-env.sh"
tags:
- install
- install:system-requirements
- name: wait for database
wait_for:
host: "{{ HIVE_METASTORE_DATABASE.host }}"
port: "{{ HIVE_METASTORE_DATABASE.port }}"
delay: 2
when: HIVE_METASTORE_DATABASE is defined
tags:
- devstack
- devstack:migrate
- name: create database users
mysql_user:
login_host: "{{ HIVE_METASTORE_DATABASE.host }}"
login_port: "{{ HIVE_METASTORE_DATABASE.port }}"
login_user: "{{ HIVE_METASTORE_DB_ADMIN_USER }}"
login_password: "{{ HIVE_METASTORE_DB_ADMIN_PASSWORD }}"
name: "{{ HIVE_METASTORE_DATABASE.user }}"
host: "%"
password: "{{ HIVE_METASTORE_DATABASE.password }}"
priv: "{{ HIVE_METASTORE_DATABASE.name }}.*:ALL"
when: HIVE_METASTORE_DATABASE is defined
tags:
- devstack
- devstack:migrate
- name: create a database for the hive metastore
mysql_db:
login_host: "{{ HIVE_METASTORE_DATABASE.host }}"
login_port: "{{ HIVE_METASTORE_DATABASE.port }}"
login_user: "{{ HIVE_METASTORE_DB_ADMIN_USER }}"
login_password: "{{ HIVE_METASTORE_DB_ADMIN_PASSWORD }}"
db: "{{ HIVE_METASTORE_DATABASE.name }}"
state: present
encoding: latin1
when: HIVE_METASTORE_DATABASE is defined
tags:
- devstack
- devstack:migrate
......@@ -18,6 +18,9 @@
executable=/bin/bash
chdir=/var/tmp
creates=/var/tmp/{{ oraclejdk_file }}
tags:
- install
- install:system-requirements
- name: create jvm dir
file: >
......@@ -25,15 +28,24 @@
state=directory
owner=root
group=root
tags:
- install
- install:system-requirements
- name: untar Oracle Java
shell: >
tar -C /usr/lib/jvm -zxvf /var/tmp/{{ oraclejdk_file }}
executable=/bin/bash
creates=/usr/lib/jvm/{{ oraclejdk_base }}
tags:
- install
- install:system-requirements
- name: create symlink expected by elasticsearch
file: src=/usr/lib/jvm/{{ oraclejdk_base }} dest={{ oraclejdk_link }} state=link force=yes
tags:
- install
- install:system-requirements
- name: update alternatives java
alternatives: >
......@@ -44,6 +56,12 @@
- java
- javac
- javaws
tags:
- install
- install:system-requirements
- name: add JAVA_HOME for Oracle Java
template: src=java.sh.j2 dest=/etc/profile.d/java.sh owner=root group=root mode=0755
tags:
- install
- install:system-requirements
......@@ -23,6 +23,9 @@
- name: check if downloaded and extracted
stat: path={{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
register: installed
tags:
- install
- install:system-requirements
- name: distribution downloaded
get_url: >
......@@ -30,18 +33,27 @@
sha256sum={{ sqoop_dist.sha256sum }}
dest={{ sqoop_temporary_dir }}
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ sqoop_temporary_dir }}/{{ sqoop_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ sqoop_base_filename }}
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/{{ sqoop_base_filename }}
dest={{ SQOOP_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
tags:
- install
- install:system-requirements
- name: mysql connector distribution downloaded
get_url: >
......@@ -49,17 +61,26 @@
sha256sum={{ sqoop_mysql_connector_dist.sha256sum }}
dest={{ sqoop_temporary_dir }}
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: mysql connector distribution extracted
shell: >
chdir={{ sqoop_temporary_dir }}
tar -xzf {{ sqoop_temporary_dir }}/{{ sqoop_mysql_connector_dist.filename }}
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: sqoop lib exists
file: >
path={{ SQOOP_LIB }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
tags:
- install
- install:system-requirements
- name: mysql connector installed
shell: >
......@@ -67,6 +88,9 @@
cp mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ SQOOP_LIB }} &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
when: not installed.stat.exists
tags:
- install
- install:system-requirements
- name: configuration installed
template: >
......@@ -75,8 +99,14 @@
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- sqoop-env.sh
tags:
- install
- install:system-requirements
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ SQOOP_CONF }}/sqoop-env.sh" line=". {{ SQOOP_CONF }}/sqoop-env.sh"
tags:
- install
- install:system-requirements
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment