Commit 12d3724f by Gabe Mulley

Install analytics stack dependencies

parent 6577d91b
* First run of ansible always fails - I suspect a version bootstrapping issue
* .rbenv is modified in forums (this may be related to the issue above)
* fix auto-registration of new oauth2 client in LMS (insights role)
* the new version of edx-oauth2-provider is not being installed for some reason?
* manual addition of "DEBUG: true" to /edx/etc/insights.yml in order to get django to serve static assets from "runserver"
* fix 'False', should be just false in /edx/etc/insights.yml
* had to manually run "python manage.py migrate" on the insights app
* EXTRA_SCOPES = ['permissions'] in insights base.py settings
* LMS master rejects this scope?
* ensure hdfs://localhost:9000/data/ directory exists
* change permissions of /edx/var/log/tracking/tracking.log to 644
* document instructions
* vagrant up
* as vagrant user run "/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook -i localhost, -c loc
al analytics_single.yml"
* sudo su hadoop
* edx-analytics-hadoop.sh start
/edx/bin/manage.edxapp lms --settings=aws create_oauth2_client http://localhost:8110 http://localhost:8110/complete/edx-oidc/ confidential --username staff --client_name insights --client_id YOUR_OAUTH2_KEY --client_secret secret --trusted
\ No newline at end of file
- name: Deploy all dependencies of edx-analytics-pipeline to the node
hosts: all
sudo: True
gather_facts: True
roles:
- analytics_pipeline
- name: Deploy all analytics services to a single node
hosts: all
sudo: True
gather_facts: True
vars:
migrate_db: yes
disable_edx_services: true
insights_register_oauth_app: yes
INSIGHTS_OAUTH2_APP_CLIENT_NAME: insights
INSIGHTS_OAUTH2_APP_USERNAME: staff
INSIGHTS_OAUTH2_APP_URL_ROOT: http://localhost:8110
INSIGHTS_OAUTH2_URL_ROOT: http://127.0.0.1:8000/oauth2
INSIGHTS_COURSE_API_URL: http://localhost:8000/api/course_structure/v0/
INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS: 'false'
INSIGHTS_SESSION_COOKIE_NAME: insights_sessionid
INSIGHTS_CSRF_COOKIE_NAME: insights_csrftoken
INSIGHTS_LANGUAGE_COOKIE_NAME: insights_language
roles:
- edxlocal
- analytics_api
- analytics_pipeline
- insights
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role analytics_pipeline
#
ANALYTICS_PIPELINE_OUTPUT_DATABASE:
username: pipeline001
password: password
host: localhost
port: 3306
ANALYTICS_PIPELINE_INPUT_DATABASE:
username: "{{ COMMON_MYSQL_READ_ONLY_USER }}"
password: "{{ COMMON_MYSQL_READ_ONLY_PASS }}"
host: localhost
port: 3306
ANALYTICS_PIPELINE_CONFIG_DIR: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline"
#
# vars are namespaced with the module name.
#
analytics_pipeline_role_name: analytics_pipeline
#
# OS packages
#
analytics_pipeline_debian_pkgs: []
analytics_pipeline_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role analytics_pipeline
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_master
- hive
- sqoop
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role analytics_pipeline
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: create config directory
file: >
path="{{ ANALYTICS_PIPELINE_CONFIG_DIR }}"
mode=0755 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
- name: store output database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_OUTPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/output.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: store input database credentials for analytics pipeline
copy: >
content="{{ ANALYTICS_PIPELINE_INPUT_DATABASE | to_json }}"
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: cron job syncs tracking log file to hdfs
cron: >
user={{ hadoop_common_user }}
name="Sync tracking log to HDFS"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f /edx/var/log/tracking/tracking.log hdfs://localhost:9000/data/tracking.log"
......@@ -12,7 +12,7 @@ IFS=","
-v add verbosity to edx_ansible run
-h this
<repo> - must be one of edx-platform, xqueue, cs_comments_service, xserver, ease, edx-ora, configuration, read-only-certificate-code, edx-analytics-data-api
<repo> - must be one of edx-platform, xqueue, cs_comments_service, xserver, ease, edx-ora, configuration, read-only-certificate-code, edx-analytics-data-api, insights, edx-analytics-pipeline
<version> - can be a commit or tag
EO
......@@ -54,6 +54,7 @@ repos_to_cmd["read-only-certificate-code"]="$edx_ansible_cmd certs.yml -e 'certs
repos_to_cmd["edx-analytics-data-api"]="$edx_ansible_cmd analyticsapi.yml -e 'ANALYTICS_API_VERSION=$2'"
repos_to_cmd["edx-ora2"]="$edx_ansible_cmd ora2.yml -e 'ora2_version=$2'"
repos_to_cmd["insights"]="$edx_ansible_cmd insights.yml -e 'INSIGHTS_VERSION=$2'"
repos_to_cmd["edx-analytics-pipeline"]="$edx_ansible_cmd analytics_pipeline.yml"
if [[ -z $1 || -z $2 ]]; then
......
......@@ -687,7 +687,7 @@ generic_env_config: &edxapp_generic_env
CROSS_DOMAIN_CSRF_COOKIE_NAME: "{{ EDXAPP_CROSS_DOMAIN_CSRF_COOKIE_NAME }}"
VIDEO_UPLOAD_PIPELINE: "{{ EDXAPP_VIDEO_UPLOAD_PIPELINE }}"
DEPRECATED_ADVANCED_COMPONENT_TYPES: "{{ EDXAPP_DEPRECATED_ADVANCED_COMPONENT_TYPES }}"
OAUTH_OIDC_ISSUER: "https://{{ EDXAPP_LMS_BASE }}/oauth2"
OAUTH_OIDC_ISSUER: "{{ EDXAPP_LMS_BASE_SCHEME | default('https') }}://{{ EDXAPP_LMS_BASE }}/oauth2"
XBLOCK_FS_STORAGE_BUCKET: "{{ EDXAPP_XBLOCK_FS_STORAGE_BUCKET }}"
XBLOCK_FS_STORAGE_PREFIX: "{{ EDXAPP_XBLOCK_FS_STORAGE_PREFIX }}"
ANALYTICS_DATA_URL: "{{ EDXAPP_ANALYTICS_DATA_URL }}"
......@@ -725,7 +725,7 @@ generic_env_config: &edxapp_generic_env
WIKI_ENABLED: true
SYSLOG_SERVER: "{{ EDXAPP_SYSLOG_SERVER }}"
LOG_DIR: "{{ COMMON_DATA_DIR }}/logs/edx"
JWT_ISSUER: "https://{{ EDXAPP_LMS_BASE }}/oauth2"
JWT_ISSUER: "{{ EDXAPP_LMS_BASE_SCHEME | default('https') }}://{{ EDXAPP_LMS_BASE }}/oauth2"
JWT_EXPIRATION: '{{ EDXAPP_JWT_EXPIRATION }}'
#must end in slash (https://docs.djangoproject.com/en/1.4/ref/settings/#media-url)
......
......@@ -102,6 +102,13 @@
encoding=utf8
when: INSIGHTS_DATABASES is defined
- name: create database user for insights
mysql_user: >
name="{{ INSIGHTS_DATABASES.default.USER }}"
password="{{ INSIGHTS_DATABASES.default.PASSWORD }}"
priv='{{ INSIGHTS_DATABASES.default.NAME }}.*:ALL'
when: INSIGHTS_DATABASES is defined
- name: create database for ecommerce
mysql_db: >
db="{{ ECOMMERCE_DEFAULT_DB_NAME }}"
......@@ -130,6 +137,13 @@
priv='{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:SELECT'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: create pipeline user for the analytics reports database
mysql_user: >
name="pipeline001"
password="password"
priv='{{ ANALYTICS_API_DATABASES.reports.NAME }}.*:ALL'
when: ANALYTICS_API_SERVICE_CONFIG is defined
- name: setup the edx-notes-api db user
mysql_user: >
name={{ EDX_NOTES_API_MYSQL_DB_USER }}
......
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hadoop_common
#
#
# vars are namespace with the module name.
#
HADOOP_COMMON_VERSION: 2.3.0
HADOOP_COMMON_USER_HOME: /home/hadoop
HADOOP_COMMON_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hadoop"
HADOOP_COMMON_LOGS: "{{ HADOOP_COMMON_HOME }}/logs"
HADOOP_COMMON_DATA: /var/lib/hadoop/hdfs
HADOOP_COMMON_CONF_DIR: "{{ HADOOP_COMMON_HOME }}/etc/hadoop"
HADOOP_COMMON_PROTOBUF_VERSION: 2.5.0
HADOOP_COMMON_SERVICES_DIR: "{{ HADOOP_COMMON_USER_HOME }}/services.d"
HADOOP_COMMON_SERVICE_HEAP_MAX: 256
HADOOP_COMMON_TOOL_HEAP_MAX: 128
hadoop_common_role_name: hadoop_common
hadoop_common_user: hadoop
hadoop_common_group: hadoop
hadoop_common_dist:
filename: "hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://archive.apache.org/dist/hadoop/core/hadoop-{{ HADOOP_COMMON_VERSION }}/hadoop-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: 3fad58b525a47cf74458d0996564a2151c5a28baa1f92383e7932774deef5023
hadoop_common_protobuf_dist:
filename: "protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
url: "https://protobuf.googlecode.com/files/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}.tar.gz"
sha256sum: c55aa3dc538e6fd5eaf732f4eb6b98bdcb7cedb5b91d3b5bdcf29c98c293f58e
hadoop_common_native_dist:
filename: "release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
url: "https://github.com/apache/hadoop-common/archive/release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: a8e1b49d4e891255d465e9449346ac7fb259bb35dce07d9f0df3b46fac3e9bd0
hadoop_common_java_home: /usr/lib/jvm/java-7-oracle
hadoop_common_util_library:
path: /tmp/edx-analytics-hadoop-util
repo: https://github.com/mulby/edx-analytics-hadoop-util
version: master
hadoop_common_env: "{{ HADOOP_COMMON_USER_HOME }}/.hadoop_env"
#
# OS packages
#
hadoop_common_debian_pkgs:
- llvm-gcc
- build-essential
- make
- cmake
- automake
- autoconf
- libtool
- zlib1g-dev
- maven
hadoop_common_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hadoop_common
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- oraclejdk
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hadoop_common
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: install system packages
apt: >
pkg={{ ','.join(hadoop_common_debian_pkgs) }}
state=present
- name: ensure group exists
group: name={{ hadoop_common_group }} system=yes state=present
- name: ensure user exists
user: >
name={{ hadoop_common_user }}
group={{ hadoop_common_group }}
home={{ HADOOP_COMMON_USER_HOME }} createhome=yes
shell=/bin/bash system=yes generate_ssh_key=yes
state=present
- name: own key authorized
file: >
src={{ HADOOP_COMMON_USER_HOME }}/.ssh/id_rsa.pub
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/authorized_keys
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: ssh configured
template: >
src=hadoop_user_ssh_config.j2
dest={{ HADOOP_COMMON_USER_HOME }}/.ssh/config
mode=0600 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: ensure user is in sudoers
lineinfile: >
dest=/etc/sudoers state=present
regexp='^%hadoop ALL\=' line='%hadoop ALL=(ALL) NOPASSWD:ALL'
validate='visudo -cf %s'
- name: check if downloaded and extracted
stat: path={{ HADOOP_COMMON_HOME }}
register: extracted_hadoop_dir
- name: distribution downloaded
get_url: >
url={{ hadoop_common_dist.url }}
sha256sum={{ hadoop_common_dist.sha256sum }}
validate_certs=no
dest=/tmp
when: not extracted_hadoop_dir.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf /tmp/{{ hadoop_common_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hadoop-{{ HADOOP_COMMON_VERSION }}
when: not extracted_hadoop_dir.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hadoop-{{ HADOOP_COMMON_VERSION }}
dest={{ HADOOP_COMMON_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ HADOOP_COMMON_CONF_DIR }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- hadoop-env.sh
- mapred-site.xml
- core-site.xml
- hdfs-site.xml
- yarn-site.xml
- name: hadoop env file exists
file: >
path={{ hadoop_common_env }} state=touch
owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: env vars sourced in bashrc
lineinfile: >
dest={{ HADOOP_COMMON_USER_HOME }}/.bashrc
state=present
regexp="^. {{ hadoop_common_env }}"
line=". {{ hadoop_common_env }}"
insertbefore=BOF
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh" line=". {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh"
- name: check if native libraries need to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
register: native_libs_built
- name: protobuf downloaded
get_url: >
url={{ hadoop_common_protobuf_dist.url }}
sha256sum={{ hadoop_common_protobuf_dist.sha256sum }}
validate_certs=no
dest=/tmp
when: not native_libs_built.stat.exists
- name: protobuf extracted
shell: >
chdir=/tmp
tar -xzf {{ hadoop_common_protobuf_dist.filename }}
when: not native_libs_built.stat.exists
- name: protobuf installed
shell: >
chdir=/tmp/protobuf-{{ HADOOP_COMMON_PROTOBUF_VERSION }}
./configure --prefix=/usr/local && make && make install
when: not native_libs_built.stat.exists
- name: native lib source downloaded
get_url: >
url={{ hadoop_common_native_dist.url }}
sha256sum={{ hadoop_common_native_dist.sha256sum }}
validate_certs=no
dest=/tmp/{{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
- name: native lib source extracted
shell: >
chdir=/tmp
tar -xzf {{ hadoop_common_native_dist.filename }}
when: not native_libs_built.stat.exists
- name: native lib built
shell: >
chdir=/tmp/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project
mvn package -X -Pnative -DskipTests
environment:
LD_LIBRARY_PATH: /usr/local/lib
when: not native_libs_built.stat.exists
- name: old native libs renamed
shell: >
mv {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.name }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item.new_name }}
with_items:
- { name: libhadoop.a, new_name: libhadoop32.a }
- { name: libhadoop.so, new_name: libhadoop32.so }
- { name: libhadoop.so.1.0.0, new_name: libhadoop32.so.1.0.0 }
when: not native_libs_built.stat.exists
- name: new native libs installed
shell: >
chdir=/tmp/hadoop-common-release-{{ HADOOP_COMMON_VERSION }}/hadoop-common-project/hadoop-common/target/native/target/usr/local/lib
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ item }} && cp {{ item }} {{ HADOOP_COMMON_HOME }}/lib/native/{{ item }}
with_items:
- libhadoop.a
- libhadoop.so
- libhadoop.so.1.0.0
when: not native_libs_built.stat.exists
- name: native lib marker touched
file: >
path={{ HADOOP_COMMON_USER_HOME }}/.native_libs_built
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=touch
when: not native_libs_built.stat.exists
- name: util library source checked out
git: >
dest={{ hadoop_common_util_library.path }} repo={{ hadoop_common_util_library.repo }}
version={{ hadoop_common_util_library.version }}
- name: lib directory created
file: >
path={{ HADOOP_COMMON_USER_HOME }}/lib
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: check if the util library needs to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
register: util_lib_built
- name: util library built
shell: >
chdir={{ hadoop_common_util_library.path }}
{{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
{{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
when: not util_lib_built.stat.exists
- name: service directory exists
file: >
path={{ HADOOP_COMMON_SERVICES_DIR }}
mode=0750 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
- name: management script installed
template: >
src={{ item }}.j2
dest={{ COMMON_BIN_DIR }}/{{ item }}
mode=0755 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- edx-analytics-hadoop.sh
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
\ No newline at end of file
#!/usr/bin/env bash
set -e
usage="Usage: edx-analytics-hadoop.sh [start|stop|restart]"
function start() {
start-dfs.sh
start-yarn.sh
}
function stop() {
stop-dfs.sh
stop-yarn.sh
}
function restart() {
stop
start
}
case ${1-} in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
*)
echo $usage
exit 1
;;
esac
export JAVA_HOME={{ hadoop_common_java_home }}
export COMMON_BIN_DIR="{{ COMMON_BIN_DIR }}"
export HADOOP_COMMON_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_MAPRED_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_HDFS_HOME={{ HADOOP_COMMON_HOME }}
export YARN_HOME={{ HADOOP_COMMON_HOME }}
export HADOOP_CONF_DIR={{ HADOOP_COMMON_CONF_DIR }}
export HADOOP_COMMON_TOOL_HEAP_MAX="{{ HADOOP_COMMON_TOOL_HEAP_MAX }}"
export HADOOP_COMMON_SERVICE_HEAP_MAX="{{ HADOOP_COMMON_SERVICE_HEAP_MAX }}"
export YARN_HEAPSIZE="$HADOOP_COMMON_SERVICE_HEAP_MAX"
{% raw %}
export PATH=$PATH:$HADOOP_COMMON_HOME/bin:$HADOOP_COMMON_HOME/sbin:$COMMON_BIN_DIR
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Xmx${HADOOP_COMMON_SERVICE_HEAP_MAX}m -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_COMMON_TOOL_HEAP_MAX}m $HADOOP_CLIENT_OPTS"
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
{% endraw %}
\ No newline at end of file
Host localhost
StrictHostKeyChecking no
Host 0.0.0.0
StrictHostKeyChecking no
\ No newline at end of file
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/namenode</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/datanode</value>
</property>
</configuration>
\ No newline at end of file
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
\ No newline at end of file
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hadoop_master
#
#
# vars are namespace with the module name.
#
hadoop_master_role_name: hadoop_master
#
# OS packages
#
hadoop_master_debian_pkgs: []
hadoop_master_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hadoop_master
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hadoop_master
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: data directories created
file: >
path={{ HADOOP_COMMON_DATA }}/{{ item }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
with_items:
- namenode
- datanode
- name: check if namenode is formatted
stat: path={{ HADOOP_COMMON_DATA }}/namenode/current/VERSION
register: namenode_version_file
- name: namenode formatted
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs namenode -format
sudo_user: "{{ hadoop_common_user }}"
when: not namenode_version_file.stat.exists
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role hive
#
HIVE_VERSION: 0.11.0
HIVE_HOME: "{{ HADOOP_COMMON_USER_HOME }}/hive"
HIVE_CONF: "{{ HIVE_HOME }}/conf"
#
# vars are namespace with the module name.
#
hive_role_name: hive
hive_dist:
filename: "hive-{{ HIVE_VERSION }}-bin.tar.gz"
url: "https://archive.apache.org/dist/hive/hive-{{ HIVE_VERSION }}/hive-{{ HIVE_VERSION }}-bin.tar.gz"
sha256sum: c22ee328438e80a8ee4b66979dba69650511a73f8b6edf2d87d93c74283578e5
#
# OS packages
#
hive_debian_pkgs: []
hive_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Handlers for role hive
#
# Overview:
#
#
- name: notify me
debug: msg="stub handler"
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role hive
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role hive
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: check if downloaded and extracted
stat: path={{ HIVE_HOME }}
register: extracted_dir
- name: distribution downloaded
get_url: >
url={{ hive_dist.url }}
sha256sum={{ hive_dist.sha256sum }}
validate_certs=no
dest=/tmp
when: not extracted_dir.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf /tmp/{{ hive_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} hive-{{ HIVE_VERSION }}-bin
when: not extracted_dir.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/hive-{{ HIVE_VERSION }}-bin
dest={{ HIVE_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ HIVE_CONF }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- hive-env.sh
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ HIVE_CONF }}/hive-env.sh" line=". {{ HIVE_CONF }}/hive-env.sh"
#!/bin/bash
export HIVE_HOME={{ HIVE_HOME }}
export PATH=$PATH:$HIVE_HOME/bin
......@@ -23,9 +23,9 @@ INSIGHTS_SECRET_KEY: 'YOUR_SECRET_KEY_HERE'
INSIGHTS_OAUTH2_KEY: 'YOUR_OAUTH2_KEY'
# This will not work on single instance sandboxes
INSIGHTS_DOC_BASE: 'http://localhost/en/latest'
INSIGHTS_LMS_BASE: 'http://localhost:18000'
ANALYTICS_API_ENDPOINT: 'http://localhost:18010'
INSIGHTS_DATA_API_AUTH_TOKEN: 'YOUR_DATA_API_AUTH_TOKEN'
INSIGHTS_LMS_BASE: 'http://localhost:8000'
ANALYTICS_API_ENDPOINT: 'http://localhost:8100/api/v0'
INSIGHTS_DATA_API_AUTH_TOKEN: 'changeme'
INSIGHTS_PLATFORM_NAME: 'edX'
INSIGHTS_APPLICATION_NAME: 'Insights'
INSIGHTS_SEGMENT_IO_KEY: 'YOUR_KEY'
......@@ -72,7 +72,7 @@ INSIGHTS_CONFIG:
SECRET_KEY: '{{ INSIGHTS_SECRET_KEY }}'
DATA_API_URL: '{{ ANALYTICS_API_ENDPOINT }}'
DATA_API_AUTH_TOKEN: '{{ INSIGHTS_DATA_API_AUTH_TOKEN }}'
SOCIAL_AUTH_REDIRECT_IS_HTTPS: true
SOCIAL_AUTH_REDIRECT_IS_HTTPS: '{{ INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS | default(true) }}'
SOCIAL_AUTH_EDX_OIDC_KEY: '{{ INSIGHTS_OAUTH2_KEY }}'
SOCIAL_AUTH_EDX_OIDC_SECRET: '{{ INSIGHTS_OAUTH2_SECRET }}'
SOCIAL_AUTH_EDX_OIDC_URL_ROOT: '{{ INSIGHTS_OAUTH2_URL_ROOT }}'
......@@ -105,6 +105,11 @@ INSIGHTS_CONFIG:
DATABASES: "{{ INSIGHTS_DATABASES }}"
LMS_COURSE_SHORTCUT_BASE_URL: "{{ INSIGHTS_LMS_COURSE_SHORTCUT_BASE_URL }}"
COURSE_API_URL: "{{ INSIGHTS_COURSE_API_URL }}"
# When insights is co-located with other django services, we need to ensure they don't all
# use the same cookie names.
SESSION_COOKIE_NAME: "{{ INSIGHTS_SESSION_COOKIE_NAME | default('sessionid') }}"
CSRF_COOKIE_NAME: "{{ INSIGHTS_CSRF_COOKIE_NAME | default('csrftoken') }}"
LANGUAGE_COOKIE_NAME: "{{ INSIGHTS_LANGUAGE_COOKIE_NAME | default('language') }}"
INSIGHTS_NEWRELIC_APPNAME: "{{ COMMON_ENVIRONMENT }}-{{ COMMON_DEPLOYMENT }}-analytics-api"
INSIGHTS_PIP_EXTRA_ARGS: "-i {{ COMMON_PYPI_MIRROR_URL }}"
......
......@@ -133,3 +133,9 @@
name={{ insights_service_name }}
when: not disable_edx_services
sudo_user: "{{ supervisor_service_user }}"
- name: register oauth2 application
shell: >
. /edx/app/edxapp/edxapp_env && {{ COMMON_BIN_DIR }}/manage.edxapp lms --settings=aws create_oauth2_client "{{ INSIGHTS_OAUTH2_APP_URL_ROOT }}" "{{ INSIGHTS_OAUTH2_APP_URL_ROOT }}/complete/edx-oidc/" confidential --username "{{ INSIGHTS_OAUTH2_APP_USERNAME }}" --client_name "{{ INSIGHTS_OAUTH2_APP_CLIENT_NAME }}" --client_id "{{ INSIGHTS_OAUTH2_KEY }}" --client_secret "{{ INSIGHTS_OAUTH2_SECRET }}" --trusted
sudo_user: edxapp
when: insights_register_oauth_app|bool
\ No newline at end of file
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Defaults for role sqoop
#
SQOOP_VERSION: 1.4.6
SQOOP_MYSQL_CONNECTOR_VERSION: 5.1.29
SQOOP_HOME: "{{ HADOOP_COMMON_USER_HOME }}/sqoop"
SQOOP_CONF: "{{ SQOOP_HOME }}/conf"
SQOOP_LIB: "{{ SQOOP_HOME }}/lib"
#
# vars are namespace with the module name.
#
sqoop_role_name: sqoop
sqoop_dist:
filename: "sqoop-{{ SQOOP_VERSION }}.bin__hadoop-2.0.4-alpha.tar.gz"
url: "http://www.carfab.com/apachesoftware/sqoop/{{ SQOOP_VERSION }}/sqoop-{{ SQOOP_VERSION }}.bin__hadoop-2.0.4-alpha.tar.gz"
sha256sum: d582e7968c24ff040365ec49764531cb76dfa22c38add5f57a16a57e70d5d496
sqoop_mysql_connector_dist:
filename: "mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
url: "http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}.tar.gz"
sha256sum: 04ad83b655066b626daaabb9676a00f6b4bc43f0c234cbafafac1209dcf1be73
#
# OS packages
#
sqoop_debian_pkgs: []
sqoop_redhat_pkgs: []
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Handlers for role sqoop
#
# Overview:
#
#
- name: notify me
debug: msg="stub handler"
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
##
# Role includes for role sqoop
#
# Example:
#
# dependencies:
# - {
# role: my_role
# my_role_var0: "foo"
# my_role_var1: "bar"
# }
dependencies:
- hadoop_common
---
#
# edX Configuration
#
# github: https://github.com/edx/configuration
# wiki: https://github.com/edx/configuration/wiki
# code style: https://github.com/edx/configuration/wiki/Ansible-Coding-Conventions
# license: https://github.com/edx/configuration/blob/master/LICENSE.TXT
#
#
#
# Tasks for role sqoop
#
# Overview:
#
#
# Dependencies:
#
#
# Example play:
#
#
- name: check if downloaded and extracted
stat: path={{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
register: installed
- name: distribution downloaded
get_url: >
url={{ sqoop_dist.url }}
sha256sum={{ sqoop_dist.sha256sum }}
validate_certs=no
dest=/tmp
when: not installed.stat.exists
- name: distribution extracted
shell: >
chdir={{ HADOOP_COMMON_USER_HOME }}
tar -xzf /tmp/{{ sqoop_dist.filename }} && chown -R {{ hadoop_common_user }}:{{ hadoop_common_group }} sqoop-{{ SQOOP_VERSION }}.bin__hadoop-2.0.4-alpha
when: not installed.stat.exists
- name: versioned directory symlink created
file: >
src={{ HADOOP_COMMON_USER_HOME }}/sqoop-{{ SQOOP_VERSION }}.bin__hadoop-2.0.4-alpha
dest={{ SQOOP_HOME }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=link
- name: mysql connector distribution downloaded
get_url: >
url={{ sqoop_mysql_connector_dist.url }}
sha256sum={{ sqoop_mysql_connector_dist.sha256sum }}
validate_certs=no
dest=/tmp
when: not installed.stat.exists
- name: mysql connector distribution extracted
shell: >
chdir=/tmp
tar -xzf /tmp/{{ sqoop_mysql_connector_dist.filename }}
when: not installed.stat.exists
- name: sqoop lib exists
file: >
path={{ SQOOP_LIB }}
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: mysql connector installed
shell: >
chdir=/tmp/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}
cp mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar {{ SQOOP_LIB }} &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ SQOOP_LIB }}/mysql-connector-java-{{ SQOOP_MYSQL_CONNECTOR_VERSION }}-bin.jar
when: not installed.stat.exists
- name: configuration installed
template: >
src={{ item }}.j2
dest={{ SQOOP_CONF }}/{{ item }}
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- sqoop-env.sh
- name: env vars sourced in hadoop env
lineinfile: >
dest={{ hadoop_common_env }} state=present
regexp="^. {{ SQOOP_CONF }}/sqoop-env.sh" line=". {{ SQOOP_CONF }}/sqoop-env.sh"
#!/bin/bash
export SQOOP_HOME={{ SQOOP_HOME }}
export SQOOP_LIB=$SQOOP_HOME/lib
export PATH=$PATH:$SQOOP_HOME/bin
......@@ -12,6 +12,9 @@
COMMON_MOTD_TEMPLATE: 'devstack_motd.tail.j2'
COMMON_SSH_PASSWORD_AUTH: "yes"
ENABLE_LEGACY_ORA: !!null
EDXAPP_LMS_BASE: 127.0.0.1:8000
EDXAPP_OAUTH_ENFORCE_SECURE: false
EDXAPP_LMS_BASE_SCHEME: http
roles:
- edx_ansible
- edxlocal
......@@ -29,5 +32,4 @@
- browsermob-proxy
- local_dev
- demo
- role: analytics_api
when: ANALYTICS_API_GIT_IDENTITY
- analytics_api
analytics_api @ d3a72344
Subproject commit d3a72344c0f1cc3dab05ca871bafa9a7a9effe1e
cs_comments_service @ 8d83377b
Subproject commit 8d83377b7d93b69eebe2baebad57e9119f45f141
edx-platform @ 0d730f37
Subproject commit 0d730f373f693b031498b5cbfaddc31d52ae4224
insights @ c4b3a529
Subproject commit c4b3a529a960a9bbf59ce76c5cc3aefa89acfefd
......@@ -43,6 +43,8 @@ edx_platform_mount_dir = "edx-platform"
themes_mount_dir = "themes"
forum_mount_dir = "cs_comments_service"
ora_mount_dir = "ora"
insights_mount_dir = "insights"
analytics_api_mount_dir = "analytics_api"
if ENV['VAGRANT_MOUNT_BASE']
......@@ -50,6 +52,8 @@ if ENV['VAGRANT_MOUNT_BASE']
themes_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + themes_mount_dir
forum_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + forum_mount_dir
ora_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + ora_mount_dir
insights_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + insights_mount_dir
analytics_api_mount_dir = ENV['VAGRANT_MOUNT_BASE'] + "/" + analytics_api_mount_dir
end
......@@ -122,6 +126,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.network :forwarded_port, guest: 18080, host: 18080
config.vm.network :forwarded_port, guest: 8765, host: 8765
config.vm.network :forwarded_port, guest: 9200, host: 9200
config.vm.network :forwarded_port, guest: 8100, host: 8100
config.vm.network :forwarded_port, guest: 8110, host: 8110
config.ssh.insert_key = true
config.vm.synced_folder ".", "/vagrant", disabled: true
......@@ -140,6 +146,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
create: true, owner: "forum", group: "www-data"
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, owner: "ora", group: "www-data"
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, owner: "insights", group: "www-data"
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, owner: "analytics_api", group: "www-data"
else
config.vm.synced_folder "#{edx_platform_mount_dir}", "/edx/app/edxapp/edx-platform",
create: true, nfs: true
......@@ -149,6 +159,10 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
create: true, nfs: true
config.vm.synced_folder "#{ora_mount_dir}", "/edx/app/ora/ora",
create: true, nfs: true
config.vm.synced_folder "#{insights_mount_dir}", "/edx/app/insights/edx_analytics_dashboard",
create: true, nfs: true
config.vm.synced_folder "#{analytics_api_mount_dir}", "/edx/app/analytics_api/analytics_api",
create: true, nfs: true
end
config.vm.provider :virtualbox do |vb|
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment