Commit 9c8fce27 by Gabe Mulley

Merge pull request #2176 from edx/gabe/analytics-devstack

Install analytics stack dependencies
parents de9545d0 18f6e3f1
......@@ -6,9 +6,11 @@
*~
.#*
vagrant/*/devstack/edx-platform
vagrant/*/devstack/cs_comments_service
vagrant/*/devstack/ora
vagrant/*/*/edx-platform
vagrant/*/*/cs_comments_service
vagrant/*/*/ora
vagrant/*/*/analytics_api
vagrant/*/*/insights
vagrant_ansible_inventory_default
### OS X artifacts
......
- name: Deploy all analytics services to a single node
hosts: all
sudo: True
gather_facts: True
vars:
migrate_db: "yes"
disable_edx_services: true
EDXAPP_LMS_BASE: 127.0.0.1:8000
EDXAPP_OAUTH_ENFORCE_SECURE: false
EDXAPP_LMS_BASE_SCHEME: http
roles:
- edxlocal
- analytics_api
- analytics_pipeline
- insights
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role analytics_pipeline
#
ANALYTICS_PIPELINE_OUTPUT_DATABASE:
username: pipeline001
password: password
host: localhost
port: 3306
ANALYTICS_PIPELINE_INPUT_DATABASE:
username: "{{ COMMON_MYSQL_READ_ONLY_USER }}"
password: "{{ COMMON_MYSQL_READ_ONLY_PASS }}"
host: localhost
port: 3306
ANALYTICS_PIPELINE_CONFIG_DIR: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline"
ANALYTICS_PIPELINE_HDFS_DATA_DIR: "hdfs://localhost:9000/data"
ANALYTICS_PIPELINE_LUIGI_HADOOP_VERSION: cdh4
ANALYTICS_PIPELINE_LUIGI_HADOOP_COMMAND: "{{ HADOOP_COMMON_HOME }}/bin/hadoop"
ANALYTICS_PIPELINE_LUIGI_HADOOP_STREAMING_JAR: "{{ HADOOP_COMMON_HOME }}/share/hadoop/tools/lib/hadoop-streaming-{{ HADOOP_COMMON_VERSION }}.jar"
#
# vars are namespaced with the module name.
#
analytics_pipeline_role_name: analytics_pipeline
analytics_pipeline_util_library:
path: /var/tmp/edx-analytics-hadoop-util
repo: https://github.com/edx/edx-analytics-hadoop-util
version: master
#
# OS packages
#
analytics_pipeline_debian_pkgs: []
analytics_pipeline_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role analytics_pipeline
dependencies:
- common
- hadoop_master
- hive
- sqoop
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role analytics_pipeline
#
# Overview:
#
# Prepare the machine to run the edX Analytics Data Pipeline. The pipeline currently "installs itself"
# via an ansible playbook that is not included in the edx/configuration repo. However, in order to
# run the pipeline in a devstack environment, some configuration needs to be performed. In a production
# environment many of these config files are stored on S3.
#
# Dependencies:
#
# common: some of the variables from the common role are used here
# hadoop_master: ensures hadoop services are installed
# hive: the pipeline makes extensive usage of hive, so that needs to be installed as well
# sqoop: similarly to hive, the pipeline uses this tool extensively
#
# Example play:
#
# - name: Deploy all dependencies of edx-analytics-pipeline to the node
# hosts: all
# sudo: True
# gather_facts: True
# roles:
# - analytics_pipeline
#
# ansible-playbook -i 'localhost,' ./analytics_pipeline.yml -e@/ansible/vars/deployment.yml -e@/ansible/vars/env-deployment.yml
#
- name: create config directory
file: >
path="{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
mode=0755 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
- name: store output database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: store input database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: luigi configuration directory created
file: >
path=/etc/luigi
state=directory
mode=755
- name: luigi configuration file written
template: >
src=client.cfg.j2
dest=/etc/luigi/client.cfg
mode=644
- name: util library source checked out
git: >
dest={{ analytics_pipeline_util_library.path }} repo={{ analytics_pipeline_util_library.repo }}
version={{ analytics_pipeline_util_library.version }}
- name: lib directory created
file: >
path={{ HADOOP_COMMON_USER_HOME }}/lib
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: check if the util library needs to be built
stat: >
path={{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
register: util_lib_built
- name: util library built
shell: >
chdir={{ analytics_pipeline_util_library.path }}
{{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
{{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
when: not util_lib_built.stat.exists
- name: ensure hdfs services are started
service: >
name=hdfs
state=started
- name: ensure map reduce services are started
service: >
name=yarn
state=started
- name: ensure package dir exists in HDFS
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
sudo_user: "{{ hadoop_common_user }}"
- name: ensure util library is in HDFS
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
sudo_user: "{{ hadoop_common_user }}"
- name: ensure the data directory exists
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}
sudo_user: "{{ hadoop_common_user }}"
- name: ensure tracking log file can be read
file: >
path={{ COMMON_LOG_DIR }}/tracking/tracking.log
mode=0644
ignore_errors: yes
- name: cron job syncs tracking log file to hdfs
cron: >
user={{ hadoop_common_user }}
name="Sync tracking log to HDFS"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
[hadoop]
version: {{ ANALYTICS_PIPELINE_LUIGI_HADOOP_VERSION }}
command: {{ ANALYTICS_PIPELINE_LUIGI_HADOOP_COMMAND }}
streaming-jar: {{ ANALYTICS_PIPELINE_LUIGI_HADOOP_STREAMING_JAR }}
......@@ -696,7 +696,7 @@ generic_env_config: &edxapp_generic_env
CROSS_DOMAIN_CSRF_COOKIE_NAME: "{{ EDXAPP_CROSS_DOMAIN_CSRF_COOKIE_NAME }}"
VIDEO_UPLOAD_PIPELINE: "{{ EDXAPP_VIDEO_UPLOAD_PIPELINE }}"
DEPRECATED_ADVANCED_COMPONENT_TYPES: "{{ EDXAPP_DEPRECATED_ADVANCED_COMPONENT_TYPES }}"
OAUTH_OIDC_ISSUER: "https://{{ EDXAPP_LMS_BASE }}/oauth2"
OAUTH_OIDC_ISSUER: "{{ EDXAPP_LMS_BASE_SCHEME | default('https') }}://{{ EDXAPP_LMS_BASE }}/oauth2"
XBLOCK_FS_STORAGE_BUCKET: "{{ EDXAPP_XBLOCK_FS_STORAGE_BUCKET }}"
XBLOCK_FS_STORAGE_PREFIX: "{{ EDXAPP_XBLOCK_FS_STORAGE_PREFIX }}"
ANALYTICS_DATA_URL: "{{ EDXAPP_ANALYTICS_DATA_URL }}"
......@@ -734,7 +734,7 @@ generic_env_config: &edxapp_generic_env
WIKI_ENABLED: true
SYSLOG_SERVER: "{{ EDXAPP_SYSLOG_SERVER }}"
LOG_DIR: "{{ COMMON_DATA_DIR }}/logs/edx"
JWT_ISSUER: "https://{{ EDXAPP_LMS_BASE }}/oauth2"
JWT_ISSUER: "{{ EDXAPP_LMS_BASE_SCHEME | default('https') }}://{{ EDXAPP_LMS_BASE }}/oauth2"
JWT_EXPIRATION: '{{ EDXAPP_JWT_EXPIRATION }}'
#must end in slash (https://docs.djangoproject.com/en/1.4/ref/settings/#media-url)
......
......@@ -102,6 +102,13 @@
encoding=utf8
when: INSIGHTS_DATABASES is defined
- name: create database user for insights
mysql_user: >
name="{{ INSIGHTS_DATABASES.default.USER }}"
password="{{ INSIGHTS_DATABASES.default.PASSWORD }}"
priv='{{ INSIGHTS_DATABASES.default.NAME }}.*:ALL'
when: INSIGHTS_DATABASES is defined
- name: create database for ecommerce
mysql_db: >
db="{{ ECOMMERCE_DEFAULT_DB_NAME }}"
......@@ -130,6 +137,27 @@
priv='{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:SELECT'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: create pipeline user for the analytics reports database
mysql_user: >
name="pipeline001"
password="password"
priv='{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:ALL'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: create a database for thie hive metastore
mysql_db: >
db={{ HIVE_METASTORE_DATABASE.name }}
state=present
encoding=latin1
when: HIVE_METASTORE_DATABASE is defined
- name: setup the edxapp db user
mysql_user: >
name={{ HIVE_METASTORE_DATABASE.user }}
password={{ HIVE_METASTORE_DATABASE.password }}
priv='{{ HIVE_METASTORE_DATABASE.name }}.*:ALL'
when: HIVE_METASTORE_DATABASE is defined
- name: setup the edx-notes-api db user
mysql_user: >
name={{ EDX_NOTES_API_MYSQL_DB_USER }}
......
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hadoop_common
#
HADOOP_COMMON_VERSION: 2.3.0
HADOOP_COMMON_USER_HOME: "{{ COMMON_APP_DIR }}/hadoop"
HADOOP_COMMON_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hadoop"
HADOOP_COMMON_DATA: "{{ COMMON_DATA_DIR }}/hadoop"
# These are non-standard directories, but are where Hadoop expects to find them.
HADOOP_COMMON_LOGS: "{{ HADOOP_COMMON_HOME }}/logs"
HADOOP_COMMON_CONF_DIR: "{{ HADOOP_COMMON_HOME }}/etc/hadoop"
HADOOP_COMMON_PROTOBUF_VERSION: 2.5.0
HADOOP_COMMON_SERVICES_DIR: "{{ HADOOP_COMMON_USER_HOME }}/services.d"
HADOOP_COMMON_SERVICE_HEAP_MAX: 256
HADOOP_COMMON_TOOL_HEAP_MAX: 128
hadoop_common_role_name: hadoop_common
hadoop_common_user: hadoop
hadoop_common_group: hadoop
hadoop_common_temporary_dir: /var/tmp
hadoop_common_dist:
filename: "hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://archive.apache.org/dist/hadoop/core/hadoop-{{ HADOOP_COMMON_VERSION }}/hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: 3fad58b525a47cf74458d0996564a2151c5a28baa1f92383e7932774deef5023
hadoop_common_protobuf_dist:
filename: "protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
url: "https://protobuf.googlecode.com/files/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
sha256sum: c55aa3dc538e6fd5eaf732f4eb6b98bdcb7cedb5b91d3b5bdcf29c98c293f58e
hadoop_common_native_dist:
filename: "release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://github.com/apache/hadoop-common/archive/release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: a8e1b49d4e891255d465e9449346ac7fb259bb35dce07d9f0df3b46fac3e9bd0
hadoop_common_java_home: "{{ oraclejdk_link }}"
hadoop_common_env: "{{ HADOOP_COMMON_HOME }}/hadoop_env"
#
# OS packages
#
hadoop_common_debian_pkgs:
- llvm-gcc
- build-essential
- make
- cmake
- automake
- autoconf
- libtool
- zlib1g-dev
- maven
hadoop_common_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hadoop_common
dependencies:
- oraclejdk
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hadoop_common
#
# Overview:
#
# This role installs all hadoop services onto the machine. Note that this should
# be used to configure all machines in a hadoop cluster. It does not perform
# any role-specific actions such as formatting the namenode etc.
#
# Dependencies:
#
# oraclejdk: Not strictly required, but we tend to trust it more than openjdk.
#
- name: install system packages
apt: >
pkg={{ item }}
state=present
with_items: hadoop_common_debian_pkgs
- name: ensure group exists
group: name={{ hadoop_common_group }} system=yes state=present
- name: ensure user exists
user: >
name={{ hadoop_common_user }}
group={{ hadoop_common_group }}
home={{ HADOOP_COMMON_USER_HOME }} createhome=yes
shell=/bin/bash system=yes generate_ssh_key=yes
state=present
- name: own key authorized
file: >
src={{ HADOOP_COMMON_USER_HOME }}/.ssh/id_rsa.pub
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/authorized_keys
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: ssh configured
template: >
src=hadoop_user_ssh_config.j2
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/config
mode=0600 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: ensure user is in sudoers
lineinfile: >
dest=/etc/sudoers state=present
regexp='^%hadoop ALL\=' line='%hadoop ALL=(ALL) NOPASSWD:ALL'
validate='visudo -cf %s'
- name: check if downloaded and extracted
stat: path={{ HADOOP_COMMON_HOME }}
register: extracted_hadoop_dir
- name: distribution downloaded
get_url: >
url={{ hadoop_common_dist.url }}
sha256sum={{ hadoop_common_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}
when: not extracted_hadoop_dir.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ hadoop_common_temporary_dir }}/{{ hadoop_common_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hadoop-{{ HADOOP_COMMON_VERSION }}
when: not extracted_hadoop_dir.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hadoop-{{ HADOOP_COMMON_VERSION }}
dest={{ HADOOP_COMMON_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ HADOOP_COMMON_CONF_DIR }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- hadoop-env.sh
- mapred-site.xml
- core-site.xml
- hdfs-site.xml
- yarn-site.xml
- name: upstart scripts installed
template: >
src={{ item }}.j2
dest=/etc/init/{{ item }}
mode=0640 owner=root group=root
with_items:
- hdfs.conf
- yarn.conf
- name: hadoop env file exists
file: >
path={{ hadoop_common_env }} state=touch
owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: env vars sourced in bashrc
lineinfile: >
dest={{ HADOOP_COMMON_USER_HOME }}/.bashrc
state=present
regexp="^. {{ hadoop_common_env }}"
line=". {{ hadoop_common_env }}"
insertbefore=BOF
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh" line=". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh"
- name: check if native libraries need to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
register: native_libs_built
- name: protobuf downloaded
get_url: >
url={{ hadoop_common_protobuf_dist.url }}
sha256sum={{ hadoop_common_protobuf_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}
when: not native_libs_built.stat.exists
- name: protobuf extracted
shell: >
chdir={{ hadoop_common_temporary_dir }}
tar -xzf {{ hadoop_common_protobuf_dist.filename }}
when: not native_libs_built.stat.exists
- name: protobuf installed
shell: >
chdir={{ hadoop_common_temporary_dir }}/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}
./configure --prefix=/usr/local && make && make install
when: not native_libs_built.stat.exists
- name: native lib source downloaded
get_url: >
url={{ hadoop_common_native_dist.url }}
sha256sum={{ hadoop_common_native_dist.sha256sum }}
dest={{ hadoop_common_temporary_dir }}/{{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
- name: native lib source extracted
shell: >
chdir={{ hadoop_common_temporary_dir }}
tar -xzf {{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
- name: native lib built
shell: >
chdir={{ hadoop_common_temporary_dir }}/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project
mvn package -X -Pnative -DskipTests
environment:
LD_LIBRARY_PATH: /usr/local/lib
when: not native_libs_built.stat.exists
- name: old native libs renamed
shell: >
mv {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.name }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.new_name }}
with_items:
- { name: libhadoop.a, new_name: libhadoop32.a }
- { name: libhadoop.so, new_name: libhadoop32.so }
- { name: libhadoop.so.1.0.0, new_name: libhadoop32.so.1.0.0 }
when: not native_libs_built.stat.exists
- name: new native libs installed
shell: >
chdir={{ hadoop_common_temporary_dir }}/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project/hadoop-common/target/native/target/usr/local/lib
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ item }} && cp {{ item }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item }}
with_items:
- libhadoop.a
- libhadoop.so
- libhadoop.so.1.0.0
when: not native_libs_built.stat.exists
- name: native lib marker touched
file: >
path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=touch
when: not native_libs_built.stat.exists
- name: service directory exists
file: >
path={{ HADOOP_COMMON_SERVICES_DIR }}
mode=0750 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
\ No newline at end of file
export JAVA_HOME={{ hadoop_common_java_home }}
export COMMON_BIN_DIR="{{ COMMON_BIN_DIR }}"
export HADOOP_COMMON_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_MAPRED_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_HDFS_HOME={{ HADOOP_COMMON_HOME }}
export YARN_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_CONF_DIR={{ HADOOP_COMMON_CONF_DIR }}
export HADOOP_COMMON_TOOL_HEAP_MAX="{{ HADOOP_COMMON_TOOL_HEAP_MAX }}"
export HADOOP_COMMON_SERVICE_HEAP_MAX="{{ HADOOP_COMMON_SERVICE_HEAP_MAX }}"
export YARN_HEAPSIZE="$HADOOP_COMMON_SERVICE_HEAP_MAX"
{% raw %}
export PATH=$PATH:$HADOOP_COMMON_HOME/bin:$HADOOP_COMMON_HOME/sbin:$COMMON_BIN_DIR
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_COMMON_TOOL_HEAP_MAX}m $HADOOP_CLIENT_OPTS"
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
{% endraw %}
\ No newline at end of file
Host localhost
StrictHostKeyChecking no
Host 0.0.0.0
StrictHostKeyChecking no
\ No newline at end of file
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/datanode</value>
</property>
</configuration>
\ No newline at end of file
description "hdfs"
start on starting yarn
stop on stopping yarn
setuid {{ hadoop_common_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-dfs.sh
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-dfs.sh
end script
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
\ No newline at end of file
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
\ No newline at end of file
description "yarn"
start on runlevel [2345]
stop on runlevel [!2345]
setuid {{ hadoop_common_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-yarn.sh
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-yarn.sh
end script
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hadoop_master
#
#
# vars are namespace with the module name.
#
hadoop_master_role_name: hadoop_master
#
# OS packages
#
hadoop_master_debian_pkgs: []
hadoop_master_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hadoop_master
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hadoop_master
#
# Overview:
#
# Configures the machine to be a Hadoop master node. This generally means that it will
# run the HDFS name node and the yarn resource manager.
#
# Dependencies:
#
# hadoop_common: this role installs hadoop generically
#
- name: data directories created
file: >
path={{ HADOOP_COMMON_DATA }}/{{ item }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
with_items:
- namenode
- datanode
- name: check if namenode is formatted
stat: path={{ HADOOP_COMMON_DATA }}/namenode/current/VERSION
register: namenode_version_file
- name: namenode formatted
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs namenode -format
sudo_user: "{{ hadoop_common_user }}"
when: not namenode_version_file.stat.exists
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hive
#
HIVE_VERSION: 0.11.0
HIVE_MYSQL_CONNECTOR_VERSION: 5.1.29
HIVE_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hive"
HIVE_CONF: "{{ HIVE_HOME }}/conf"
HIVE_LIB: "{{ HIVE_HOME }}/lib"
HIVE_METASTORE_DATABASE:
user: edx_hive
password: edx
name: edx_hive_metastore
host: 127.0.0.1
port: 3306
#
# vars are namespace with the module name.
#
hive_role_name: hive
hive_temporary_dir: /var/tmp
hive_dist:
filename: "hive-{{ HIVE_VERSION }}-bin.tar.gz"
url: "https://archive.apache.org/dist/hive/hive-{{ HIVE_VERSION }}/hive-{{ HIVE_VERSION }}-bin.tar.gz"
sha256sum: c22ee328438e80a8ee4b66979dba69650511a73f8b6edf2d87d93c74283578e5
hive_mysql_connector_dist:
filename: "mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
url: "http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
sha256sum: 04ad83b655066b626daaabb9676a00f6b4bc43f0c234cbafafac1209dcf1be73
#
# OS packages
#
hive_debian_pkgs: []
hive_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hive
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hive
#
# Overview:
#
# Install and configure Hive.
#
# Dependencies:
#
# hadoop_common: Hive requires Hadoop to be installed to function properly.
- name: check if downloaded and extracted
stat: path={{ HIVE_HOME }}
register: extracted_dir
- name: distribution downloaded
get_url: >
url={{ hive_dist.url }}
sha256sum={{ hive_dist.sha256sum }}
dest={{ hive_temporary_dir }}
when: not extracted_dir.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ hive_temporary_dir }}/{{ hive_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hive-{{ HIVE_VERSION }}-bin
when: not extracted_dir.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hive-{{ HIVE_VERSION }}-bin
dest={{ HIVE_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: hive mysql connector distribution downloaded
get_url: >
url={{ hive_mysql_connector_dist.url }}
sha256sum={{ hive_mysql_connector_dist.sha256sum }}
dest={{ hive_temporary_dir }}
when: not extracted_dir.stat.exists
- name: hive mysql connector distribution extracted
shell: >
chdir={{ hive_temporary_dir }}
tar -xzf {{ hive_temporary_dir }}/{{ hive_mysql_connector_dist.filename }}
when: not extracted_dir.stat.exists
- name: hive lib exists
file: >
path={{ HIVE_LIB }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: hive mysql connector installed
shell: >
chdir=/{{ hive_temporary_dir }}/mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}
cp mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ HIVE_LIB }} &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HIVE_LIB }}/mysql-connector-java-{{ HIVE_MYSQL_CONNECTOR_VERSION }}-bin.jar
when: not extracted_dir.stat.exists
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ HIVE_CONF }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- hive-env.sh
- hive-site.xml
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HIVE_CONF }}/hive-env.sh" line=". {{ HIVE_CONF }}/hive-env.sh"
#!/bin/bash
export HIVE_HOME={{ HIVE_HOME }}
export PATH=$PATH:$HIVE_HOME/bin
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://{{ HIVE_METASTORE_DATABASE.host }}:{{ HIVE_METASTORE_DATABASE.port }}/{{ HIVE_METASTORE_DATABASE.name }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>{{ HIVE_METASTORE_DATABASE.user }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>{{ HIVE_METASTORE_DATABASE.password }}</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>true</value>
</property>
</configuration>
......@@ -11,21 +11,26 @@
# Defaults for role insights
#
INSIGHTS_MEMCACHE: [ 'localhost:11211' ]
INSIGHTS_LMS_BASE: 'http://127.0.0.1:8000'
INSIGHTS_BASE_URL: 'http://127.0.0.1:8110'
INSIGHTS_MEMCACHE: [ '127.0.0.1:11211' ]
INSIGHTS_FEEDBACK_EMAIL: 'dashboard@example.com'
INSIGHTS_MKTG_BASE: 'http://example.com'
INSIGHTS_PRIVACY_POLICY_URL: '{{ INSIGHTS_MKTG_BASE }}/privacy-policy'
INSIGHTS_TERMS_OF_SERVICE_URL: '{{ INSIGHTS_MKTG_BASE }}/terms-service'
INSIGHTS_SUPPORT_URL: ''
INSIGHTS_OAUTH2_SECRET: 'secret'
INSIGHTS_OAUTH2_URL_ROOT: 'url_root'
INSIGHTS_OAUTH2_URL_ROOT: '{{ INSIGHTS_LMS_BASE }}/oauth2'
INSIGHTS_OAUTH2_APP_CLIENT_NAME: insights
INSIGHTS_OAUTH2_APP_USERNAME: staff
INSIGHTS_OAUTH2_APP_COMPLETE_URL: '{{ INSIGHTS_BASE_URL }}/complete/edx-oidc/'
INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS: false
INSIGHTS_SECRET_KEY: 'YOUR_SECRET_KEY_HERE'
INSIGHTS_OAUTH2_KEY: 'YOUR_OAUTH2_KEY'
# This will not work on single instance sandboxes
INSIGHTS_DOC_BASE: 'http://localhost/en/latest'
INSIGHTS_LMS_BASE: 'http://localhost:18000'
ANALYTICS_API_ENDPOINT: 'http://localhost:18010'
INSIGHTS_DATA_API_AUTH_TOKEN: 'YOUR_DATA_API_AUTH_TOKEN'
INSIGHTS_DOC_BASE: 'http://127.0.0.1/en/latest'
ANALYTICS_API_ENDPOINT: 'http://127.0.0.1:8100/api/v0'
INSIGHTS_DATA_API_AUTH_TOKEN: 'changeme'
INSIGHTS_PLATFORM_NAME: 'edX'
INSIGHTS_APPLICATION_NAME: 'Insights'
INSIGHTS_SEGMENT_IO_KEY: 'YOUR_KEY'
......@@ -52,7 +57,7 @@ INSIGHTS_DATABASES:
NAME: '{{ INSIGHTS_DATABASE_NAME }}'
USER: 'rosencrantz'
PASSWORD: 'secret'
HOST: 'localhost'
HOST: '127.0.0.1'
PORT: '3306'
INSIGHTS_LMS_COURSE_SHORTCUT_BASE_URL: "URL_FOR_LMS_COURSE_LIST_PAGE"
......@@ -72,7 +77,7 @@ INSIGHTS_CONFIG:
SECRET_KEY: '{{ INSIGHTS_SECRET_KEY }}'
DATA_API_URL: '{{ ANALYTICS_API_ENDPOINT }}'
DATA_API_AUTH_TOKEN: '{{ INSIGHTS_DATA_API_AUTH_TOKEN }}'
SOCIAL_AUTH_REDIRECT_IS_HTTPS: true
SOCIAL_AUTH_REDIRECT_IS_HTTPS: '{{ INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS }}'
SOCIAL_AUTH_EDX_OIDC_KEY: '{{ INSIGHTS_OAUTH2_KEY }}'
SOCIAL_AUTH_EDX_OIDC_SECRET: '{{ INSIGHTS_OAUTH2_SECRET }}'
SOCIAL_AUTH_EDX_OIDC_URL_ROOT: '{{ INSIGHTS_OAUTH2_URL_ROOT }}'
......@@ -105,13 +110,18 @@ INSIGHTS_CONFIG:
DATABASES: "{{ INSIGHTS_DATABASES }}"
LMS_COURSE_SHORTCUT_BASE_URL: "{{ INSIGHTS_LMS_COURSE_SHORTCUT_BASE_URL }}"
COURSE_API_URL: "{{ INSIGHTS_COURSE_API_URL }}"
# When insights is co-located with other django services, we need to ensure they don't all
# use the same cookie names.
SESSION_COOKIE_NAME: "{{ INSIGHTS_SESSION_COOKIE_NAME | default('insights_sessionid') }}"
CSRF_COOKIE_NAME: "{{ INSIGHTS_CSRF_COOKIE_NAME | default('insights_csrftoken') }}"
LANGUAGE_COOKIE_NAME: "{{ INSIGHTS_LANGUAGE_COOKIE_NAME | default('insights_language') }}"
INSIGHTS_NEWRELIC_APPNAME: "{{ COMMON_ENVIRONMENT }}-{{ COMMON_DEPLOYMENT }}-analytics-api"
INSIGHTS_PIP_EXTRA_ARGS: "-i {{ COMMON_PYPI_MIRROR_URL }}"
INSIGHTS_NGINX_PORT: "18110"
INSIGHTS_GUNICORN_WORKERS: "2"
INSIGHTS_GUNICORN_EXTRA: ""
INSIGHTS_COURSE_API_URL: "URL FOR COURSE API"
INSIGHTS_COURSE_API_URL: "{{ INSIGHTS_LMS_BASE }}/api/course_structure/v0/"
INSIGHTS_VERSION: "master"
INSIGHTS_GIT_IDENTITY: !!null
......
......@@ -132,4 +132,4 @@
config={{ supervisor_cfg }}
name={{ insights_service_name }}
when: not disable_edx_services
sudo_user: "{{ supervisor_service_user }}"
\ No newline at end of file
sudo_user: "{{ supervisor_service_user }}"
......@@ -21,6 +21,12 @@ localdev_accounts:
- { user: "{{ ecommerce_user|default('None') }}", home: "{{ ecommerce_home }}",
env: "ecommerce_env", repo: "ecommerce" }
- { user: "{{ analytics_api_user|default('None') }}", home: "{{ analytics_api_home }}",
env: "analytics_api_env", repo: "analytics_api" }
- { user: "{{ insights_user|default('None') }}", home: "{{ insights_home }}",
env: "insights_env", repo: "edx_analytics_dashboard" }
# Helpful system packages for local dev
local_dev_pkgs:
- vim
......@@ -31,5 +37,7 @@ local_dev_pkgs:
localdev_jscover_version: "1.0.2"
localdev_oauth2_clients:
- { name: "{{ ecommerce_service_name }}", url_root: "{{ ECOMMERCE_ECOMMERCE_URL_ROOT }}",
- { name: "{{ ecommerce_service_name | default('None') }}", url_root: "{{ ECOMMERCE_ECOMMERCE_URL_ROOT }}",
id: "{{ ECOMMERCE_SOCIAL_AUTH_EDX_OIDC_KEY }}", secret: "{{ ECOMMERCE_SOCIAL_AUTH_EDX_OIDC_SECRET }}" }
- { name: "{{ INSIGHTS_OAUTH2_APP_CLIENT_NAME | default('None') }}", url_root: "{{ INSIGHTS_BASE_URL }}",
id: "{{ INSIGHTS_OAUTH2_KEY }}", secret: "{{ INSIGHTS_OAUTH2_SECRET }}" }
......@@ -91,3 +91,4 @@
sudo_user: "{{ edxapp_user }}"
environment: "{{ edxapp_environment }}"
with_items: localdev_oauth2_clients
when: item.name != 'None'
......@@ -36,22 +36,14 @@
file: src=/usr/lib/jvm/{{ oraclejdk_base }} dest={{ oraclejdk_link }} state=link
- name: update alternatives java
shell: >
update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/{{ oraclejdk_base }}/bin/java" 1
register: update_alt
changed_when: update_alt.stdout != ""
- name: update alternatives javac
shell: >
update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/{{ oraclejdk_base }}/bin/javac" 1
register: update_alt
changed_when: update_alt.stdout != ""
- name: update alternatives javaws
shell: >
update-alternatives --install "/usr/bin/javaws" "javaws" "/usr/lib/jvm/{{ oraclejdk_base }}/bin/javaws" 1
register: update_alt
changed_when: update_alt.stdout != ""
alternatives: >
name={{ item }}
link="/usr/bin/{{ item }}"
path="/usr/lib/jvm/{{ oraclejdk_base }}/bin/{{ item }}"
with_items:
- java
- javac
- javaws
- name: add JAVA_HOME for Oracle Java
template: src=java.sh.j2 dest=/etc/profile.d/java.sh owner=root group=root mode=0755
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role sqoop
#
SQOOP_VERSION: 1.4.6
# There is no non-alpha version here, this is just the version of sqoop that is compatible with Hadoop 2
SQOOP_HADOOP_VERSION: 2.0.4-alpha
SQOOP_MYSQL_CONNECTOR_VERSION: 5.1.29
SQOOP_HOME: "{{ HADOOP_COMMON_USER_HOME }}/sqoop"
SQOOP_CONF: "{{ SQOOP_HOME }}/conf"
SQOOP_LIB: "{{ SQOOP_HOME }}/lib"
#
# vars are namespace with the module name.
#
sqoop_role_name: sqoop
sqoop_temporary_dir: /var/tmp
sqoop_base_filename: "sqoop-{{ SQOOP_VERSION }}.bin__hadoop-{{ SQOOP_HADOOP_VERSION }}"
sqoop_dist:
filename: "{{ sqoop_base_filename }}.tar.gz"
url: "http://www.carfab.com/apachesoftware/sqoop/{{ SQOOP_VERSION }}/{{ sqoop_base_filename }}.tar.gz"
sha256sum: d582e7968c24ff040365ec49764531cb76dfa22c38add5f57a16a57e70d5d496
sqoop_mysql_connector_dist:
filename: "mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
url: "http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
sha256sum: 04ad83b655066b626daaabb9676a00f6b4bc43f0c234cbafafac1209dcf1be73
#
# OS packages
#
sqoop_debian_pkgs: []
sqoop_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role sqoop
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role sqoop
#
# Overview:
#
# Install and configure Sqoop. This tool is used by the analytics stack to take database snapshots.
#
# Dependencies:
#
# hadoop_common: Sqoop can distribute snapshotting work out to a cluster of workers, it uses Hadoop to do so.
- name: check if downloaded and extracted
stat: path={{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
register: installed
- name: distribution downloaded
get_url: >
url={{ sqoop_dist.url }}
sha256sum={{ sqoop_dist.sha256sum }}
dest={{ sqoop_temporary_dir }}
when: not installed.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf {{ sqoop_temporary_dir }}/{{ sqoop_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ sqoop_base_filename }}
when: not installed.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/{{ sqoop_base_filename }}
dest={{ SQOOP_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: mysql connector distribution downloaded
get_url: >
url={{ sqoop_mysql_connector_dist.url }}
sha256sum={{ sqoop_mysql_connector_dist.sha256sum }}
dest={{ sqoop_temporary_dir }}
when: not installed.stat.exists
- name: mysql connector distribution extracted
shell: >
chdir={{ sqoop_temporary_dir }}
tar -xzf {{ sqoop_temporary_dir }}/{{ sqoop_mysql_connector_dist.filename }}
when: not installed.stat.exists
- name: sqoop lib exists
file: >
path={{ SQOOP_LIB }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: mysql connector installed
shell: >
chdir=/{{ sqoop_temporary_dir }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}
cp mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ SQOOP_LIB }} &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
when: not installed.stat.exists
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ SQOOP_CONF }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- sqoop-env.sh
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ SQOOP_CONF }}/sqoop-env.sh" line=". {{ SQOOP_CONF }}/sqoop-env.sh"
#!/bin/bash
export SQOOP_HOME={{ SQOOP_HOME }}
export SQOOP_LIB=$SQOOP_HOME/lib
export PATH=$PATH:$SQOOP_HOME/bin
- name: Configure instance(s)
hosts: all
sudo: True
gather_facts: True
vars:
migrate_db: 'yes'
openid_workaround: true
devstack: true
disable_edx_services: true
mongo_enable_journal: false
EDXAPP_NO_PREREQ_INSTALL: 0
COMMON_MOTD_TEMPLATE: 'devstack_motd.tail.j2'
COMMON_SSH_PASSWORD_AUTH: "yes"
ENABLE_LEGACY_ORA: !!null
EDXAPP_LMS_BASE: 127.0.0.1:8000
EDXAPP_OAUTH_ENFORCE_SECURE: false
EDXAPP_LMS_BASE_SCHEME: http
roles:
- edx_ansible
- edxlocal
- mongo
- edxapp
- oraclejdk
- elasticsearch
- forum
- demo
- analytics_api
- analytics_pipeline
- insights
- local_dev
\ No newline at end of file
......@@ -12,6 +12,9 @@
COMMON_MOTD_TEMPLATE: 'devstack_motd.tail.j2'
COMMON_SSH_PASSWORD_AUTH: "yes"
ENABLE_LEGACY_ORA: !!null
EDXAPP_LMS_BASE: 127.0.0.1:8000
EDXAPP_OAUTH_ENFORCE_SECURE: false
EDXAPP_LMS_BASE_SCHEME: http
roles:
- edx_ansible
- edxlocal
......@@ -29,5 +32,3 @@
- browsermob-proxy
- local_dev
- demo
- role: analytics_api
when: ANALYTICS_API_GIT_IDENTITY
Vagrant.require_version ">= 1.5.3"
unless Vagrant.has_plugin?("vagrant-vbguest")
raise "Please install the vagrant-vbguest plugin by running `vagrant plugin install vagrant-vbguest`"
end
VAGRANTFILE_API_VERSION = "2"
MEMORY = 4096
CPU_COUNT = 2
edx_platform_mount_dir = "edx-platform"
themes_mount_dir = "themes"
forum_mount_dir = "cs_comments_service"
insights_mount_dir = "insights"
analytics_api_mount_dir = "analytics_api"
if ENV['VAGRANT_MOUNT_BASE']
edx_platform_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + edx_platform_mount_dir
themes_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + themes_mount_dir
forum_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + forum_mount_dir
insights_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + insights_mount_dir
analytics_api_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + analytics_api_mount_dir
end
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
# Creates a devstack from a base Ubuntu 12.04 image for virtualbox
config.vm.box = "precise64"
config.vm.box_url = "http://files.vagrantup.com/precise64.box"
config.vm.network :private_network, ip: "192.168.33.10"
config.vm.network :forwarded_port, guest: 8000, host: 8000 # LMS
config.vm.network :forwarded_port, guest: 8001, host: 8001 # Studio
config.vm.network :forwarded_port, guest: 8003, host: 8003 # LMS for Bok Choy
config.vm.network :forwarded_port, guest: 8031, host: 8031 # Studio for Bok Choy
config.vm.network :forwarded_port, guest: 8120, host: 8120 # edX Notes Service
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
# Enable X11 forwarding so we can interact with GUI applications
if ENV['VAGRANT_X11']
config.ssh.forward_x11 = true
end
if ENV['VAGRANT_USE_VBOXFS'] == 'true'
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, owner: "edxapp", group: "www-data"
config.vm.synced_folder "#{themes_mount_dir}", "/edx/app/edxapp/themes",
create: true, owner: "edxapp", group: "www-data"
config.vm.synced_folder "#{forum_mount_dir}", "/edx/app/forum/cs_comments_service",
create: true, owner: "forum", group: "www-data"
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
else
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, nfs: true
config.vm.synced_folder "#{themes_mount_dir}", "/edx/app/edxapp/themes",
create: true, nfs: true
config.vm.synced_folder "#{forum_mount_dir}", "/edx/app/forum/cs_comments_service",
create: true, nfs: true
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, nfs: true
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, nfs: true
end
config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", MEMORY.to_s]
vb.customize ["modifyvm", :id, "--cpus", CPU_COUNT.to_s]
# Allow DNS to work for Ubuntu 12.10 host
# http://askubuntu.com/questions/238040/how-do-i-fix-name-service-for-vagrant-client
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
["vmware_fusion", "vmware_workstation"].each do |vmware_provider|
config.vm.provider vmware_provider do |v, override|
# Override box url to get vmware one
override.vm.box = "precise64_vmware"
override.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
v.vmx["memsize"] = MEMORY.to_s
v.vmx["numvcpus"] = CPU_COUNT.to_s
end
end
# Make LC_ALL default to en_US.UTF-8 instead of en_US.
# See: https://github.com/mitchellh/vagrant/issues/1188
config.vm.provision "shell", inline: 'echo \'LC_ALL="en_US.UTF-8"\' > /etc/default/locale'
# Use vagrant-vbguest plugin to make sure Guest Additions are in sync
config.vbguest.auto_reboot = true
config.vbguest.auto_update = true
config.vm.provision :ansible do |ansible|
ansible.playbook = "../../../playbooks/vagrant-analytics.yml"
ansible.verbose = "vvvv"
ansible.extra_vars = {}
if ENV['ENABLE_LEGACY_ORA']
ansible.extra_vars['ENABLE_LEGACY_ORA'] = true
end
if ENV['OPENEDX_RELEASE']
ansible.extra_vars = {
edx_platform_version: ENV['OPENEDX_RELEASE'],
certs_version: ENV['OPENEDX_RELEASE'],
forum_version: ENV['OPENEDX_RELEASE'],
xqueue_version: ENV['OPENEDX_RELEASE'],
}
end
if ENV['CONFIGURATION_VERSION']
ansible.extra_vars['configuration_version'] = ENV['CONFIGURATION_VERSION']
end
if ENV['EDX_PLATFORM_VERSION']
ansible.extra_vars['edx_platform_version'] = ENV['EDX_PLATFORM_VERSION']
end
end
end
../../../playbooks/ansible.cfg
\ No newline at end of file
......@@ -13,6 +13,8 @@ themes_mount_dir = "themes"
forum_mount_dir = "cs_comments_service"
ora_mount_dir = "ora"
ecommerce_mount_dir = "ecommerce"
insights_mount_dir = "insights"
analytics_api_mount_dir = "analytics_api"
if ENV['VAGRANT_MOUNT_BASE']
......@@ -21,6 +23,8 @@ if ENV['VAGRANT_MOUNT_BASE']
forum_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + forum_mount_dir
ora_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + ora_mount_dir
ecommerce_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + ecommerce_mount_dir
insights_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + insights_mount_dir
analytics_api_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + analytics_api_mount_dir
end
......@@ -40,6 +44,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
......@@ -63,6 +71,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, owner: "ora", group: "www-data"
end
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
else
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, nfs: true
......@@ -77,6 +89,11 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, nfs: true
end
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, nfs: true
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, nfs: true
end
......@@ -125,5 +142,11 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
xqueue_version: ENV['OPENEDX_RELEASE'],
}
end
if ENV['CONFIGURATION_VERSION']
ansible.extra_vars['configuration_version'] = ENV['CONFIGURATION_VERSION']
end
if ENV['EDX_PLATFORM_VERSION']
ansible.extra_vars['edx_platform_version'] = ENV['EDX_PLATFORM_VERSION']
end
end
end
Vagrant.require_version ">= 1.5.3"
unless Vagrant.has_plugin?("vagrant-vbguest")
raise "Please install the vagrant-vbguest plugin by running `vagrant plugin install vagrant-vbguest`"
end
VAGRANTFILE_API_VERSION = "2"
MEMORY = 4096
CPU_COUNT = 2
edx_platform_mount_dir = "edx-platform"
themes_mount_dir = "themes"
forum_mount_dir = "cs_comments_service"
ora_mount_dir = "ora"
insights_mount_dir = "insights"
analytics_api_mount_dir = "analytics_api"
if ENV['VAGRANT_MOUNT_BASE']
edx_platform_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + edx_platform_mount_dir
themes_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + themes_mount_dir
forum_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + forum_mount_dir
ora_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + ora_mount_dir
insights_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + insights_mount_dir
analytics_api_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + analytics_api_mount_dir
end
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
# Creates an edX devstack VM from an official release
config.vm.box = "analyticstack-beta"
config.vm.network :private_network, ip: "192.168.33.10"
config.vm.network :forwarded_port, guest: 8000, host: 8000
config.vm.network :forwarded_port, guest: 8001, host: 8001
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
# Enable X11 forwarding so we can interact with GUI applications
if ENV['VAGRANT_X11']
config.ssh.forward_x11 = true
end
if ENV['VAGRANT_USE_VBOXFS'] == 'true'
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, owner: "edxapp", group: "www-data"
config.vm.synced_folder "#{themes_mount_dir}", "/edx/app/edxapp/themes",
create: true, owner: "edxapp", group: "www-data"
config.vm.synced_folder "#{forum_mount_dir}", "/edx/app/forum/cs_comments_service",
create: true, owner: "forum", group: "www-data"
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, owner: "ora", group: "www-data"
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
else
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, nfs: true
config.vm.synced_folder "#{themes_mount_dir}", "/edx/app/edxapp/themes",
create: true, nfs: true
config.vm.synced_folder "#{forum_mount_dir}", "/edx/app/forum/cs_comments_service",
create: true, nfs: true
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, nfs: true
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, nfs: true
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, nfs: true
end
config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", MEMORY.to_s]
vb.customize ["modifyvm", :id, "--cpus", CPU_COUNT.to_s]
# Allow DNS to work for Ubuntu 12.10 host
# http://askubuntu.com/questions/238040/how-do-i-fix-name-service-for-vagrant-client
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
end
# Use vagrant-vbguest plugin to make sure Guest Additions are in sync
config.vbguest.auto_reboot = true
config.vbguest.auto_update = true
end
......@@ -43,6 +43,8 @@ edx_platform_mount_dir = "edx-platform"
themes_mount_dir = "themes"
forum_mount_dir = "cs_comments_service"
ora_mount_dir = "ora"
insights_mount_dir = "insights"
analytics_api_mount_dir = "analytics_api"
if ENV['VAGRANT_MOUNT_BASE']
......@@ -50,6 +52,8 @@ if ENV['VAGRANT_MOUNT_BASE']
themes_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + themes_mount_dir
forum_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + forum_mount_dir
ora_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + ora_mount_dir
insights_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + insights_mount_dir
analytics_api_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + analytics_api_mount_dir
end
......@@ -125,6 +129,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API
config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights
config.vm.network :forwarded_port, guest: 50070, host: 50070 # HDFS Admin UI
config.vm.network :forwarded_port, guest: 8088, host: 8088 # Hadoop Resource Manager
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
......@@ -143,6 +151,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
create: true, owner: "forum", group: "www-data"
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, owner: "ora", group: "www-data"
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
else
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, nfs: true
......@@ -152,6 +164,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
create: true, nfs: true
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, nfs: true
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, nfs: true
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, nfs: true
end
config.vm.provider :virtualbox do |vb|
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment