Commit 72a82807 by Gabe Mulley

added upstart jobs, upload util library to HDFS, fix tracking log permissions

parent 12d3724f
......@@ -13,8 +13,7 @@
* document instructions
* vagrant up
* as vagrant user run "/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook -i localhost, -c loc
al analytics_single.yml"
* as vagrant user run "/edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook -i localhost, -c local analytics_single.yml"
* sudo su hadoop
* edx-analytics-hadoop.sh start
......
......@@ -24,12 +24,18 @@ ANALYTICS_PIPELINE_INPUT_DATABASE:
port: 3306
ANALYTICS_PIPELINE_CONFIG_DIR: "{{ COMMON_CFG_DIR }}/edx-analytics-pipeline"
ANALYTICS_PIPELINE_HDFS_DATA_DIR: "hdfs://localhost:9000/data/"
#
# vars are namespaced with the module name.
#
analytics_pipeline_role_name: analytics_pipeline
analytics_pipeline_util_library:
path: /tmp/edx-analytics-hadoop-util
repo: https://github.com/mulby/edx-analytics-hadoop-util
version: master
#
# OS packages
#
......
......@@ -20,6 +20,7 @@
# }
dependencies:
- common
- hadoop_master
- hive
- sqoop
......@@ -39,8 +39,61 @@
dest={{ COMMON_CFG_DIR }}/edx-analytics-pipeline/input.json
mode=0644 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
- name: util library source checked out
git: >
dest={{ analytics_pipeline_util_library.path }} repo={{ analytics_pipeline_util_library.repo }}
version={{ analytics_pipeline_util_library.version }}
- name: lib directory created
file: >
path={{ HADOOP_COMMON_USER_HOME }}/lib
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: check if the util library needs to be built
stat: >
path={{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
register: util_lib_built
- name: util library built
shell: >
chdir={{ analytics_pipeline_util_library.path }}
{{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
{{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
when: not util_lib_built.stat.exists
- name: ensure hdfs services are started
service: >
name=hdfs
state=started
- name: ensure map reduce services are started
service: >
name=yarn
state=started
- name: ensure package dir exists in HDFS
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
sudo_user: "{{ hadoop_common_user }}"
- name: ensure util library is in HDFS
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
sudo_user: "{{ hadoop_common_user }}"
- name: ensure the data directory exists
shell: >
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}
sudo_user: "{{ hadoop_common_user }}"
- name: ensure tracking log file can be read
file: >
path={{ COMMON_LOG_DIR }}/tracking/tracking.log
mode=0644
- name: cron job syncs tracking log file to hdfs
cron: >
user={{ hadoop_common_user }}
name="Sync tracking log to HDFS"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f /edx/var/log/tracking/tracking.log hdfs://localhost:9000/data/tracking.log"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
......@@ -42,10 +42,6 @@ hadoop_common_native_dist:
url: "https://github.com/apache/hadoop-common/archive/release-{{ HADOOP_COMMON_VERSION }}.tar.gz"
sha256sum: a8e1b49d4e891255d465e9449346ac7fb259bb35dce07d9f0df3b46fac3e9bd0
hadoop_common_java_home: /usr/lib/jvm/java-7-oracle
hadoop_common_util_library:
path: /tmp/edx-analytics-hadoop-util
repo: https://github.com/mulby/edx-analytics-hadoop-util
version: master
hadoop_common_env: "{{ HADOOP_COMMON_USER_HOME }}/.hadoop_env"
#
......
......@@ -91,6 +91,15 @@
- hdfs-site.xml
- yarn-site.xml
- name: upstart scripts installed
template: >
src={{ item }}.j2
dest=/etc/init/{{ item }}
mode=0640 owner=root group=root
with_items:
- hdfs.conf
- yarn.conf
- name: hadoop env file exists
file: >
path={{ hadoop_common_env }} state=touch
......@@ -180,38 +189,8 @@
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=touch
when: not native_libs_built.stat.exists
- name: util library source checked out
git: >
dest={{ hadoop_common_util_library.path }} repo={{ hadoop_common_util_library.repo }}
version={{ hadoop_common_util_library.version }}
- name: lib directory created
file: >
path={{ HADOOP_COMMON_USER_HOME }}/lib
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} state=directory
- name: check if the util library needs to be built
stat: path={{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
register: util_lib_built
- name: util library built
shell: >
chdir={{ hadoop_common_util_library.path }}
{{ hadoop_common_java_home }}/bin/javac -cp `{{ HADOOP_COMMON_HOME }}/bin/hadoop classpath` org/edx/hadoop/input/ManifestTextInputFormat.java &&
{{ hadoop_common_java_home }}/bin/jar cf {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar org/edx/hadoop/input/ManifestTextInputFormat.class &&
chown {{ hadoop_common_user }}:{{ hadoop_common_group }} {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar
when: not util_lib_built.stat.exists
- name: service directory exists
file: >
path={{ HADOOP_COMMON_SERVICES_DIR }}
mode=0750 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
- name: management script installed
template: >
src={{ item }}.j2
dest={{ COMMON_BIN_DIR }}/{{ item }}
mode=0755 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items:
- edx-analytics-hadoop.sh
#!/usr/bin/env bash
set -e
usage="Usage: edx-analytics-hadoop.sh [start|stop|restart]"
function start() {
start-dfs.sh
start-yarn.sh
}
function stop() {
stop-dfs.sh
stop-yarn.sh
}
function restart() {
stop
start
}
case ${1-} in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
*)
echo $usage
exit 1
;;
esac
......@@ -6,14 +6,15 @@
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/namenode</value>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<name>dfs.datanode.data.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/datanode</value>
</property>
</configuration>
\ No newline at end of file
description "hdfs"
start on starting yarn
stop on stopping yarn
setuid {{ hadoop_common_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-dfs.sh
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-dfs.sh
end script
description "yarn"
start on runlevel [2345]
stop on runlevel [!2345]
setuid {{ hadoop_common_user }}
pre-start script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
start-yarn.sh
end script
post-stop script
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh
stop-yarn.sh
end script
......@@ -72,7 +72,7 @@ INSIGHTS_CONFIG:
SECRET_KEY: '{{ INSIGHTS_SECRET_KEY }}'
DATA_API_URL: '{{ ANALYTICS_API_ENDPOINT }}'
DATA_API_AUTH_TOKEN: '{{ INSIGHTS_DATA_API_AUTH_TOKEN }}'
SOCIAL_AUTH_REDIRECT_IS_HTTPS: '{{ INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS | default(true) }}'
SOCIAL_AUTH_REDIRECT_IS_HTTPS: '{{ INSIGHTS_SOCIAL_AUTH_REDIRECT_IS_HTTPS | default(true) | bool }}'
SOCIAL_AUTH_EDX_OIDC_KEY: '{{ INSIGHTS_OAUTH2_KEY }}'
SOCIAL_AUTH_EDX_OIDC_SECRET: '{{ INSIGHTS_OAUTH2_SECRET }}'
SOCIAL_AUTH_EDX_OIDC_URL_ROOT: '{{ INSIGHTS_OAUTH2_URL_ROOT }}'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment