Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
configuration
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
configuration
Commits
9851c3bd
Commit
9851c3bd
authored
Oct 14, 2015
by
Gabe Mulley
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
create an analytics pipeline container
parent
3ade5c9f
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
99 additions
and
85 deletions
+99
-85
docker/build/hadoop-master/Dockerfile
+15
-0
docker/plays/analytics_pipeline.yml
+6
-0
playbooks/roles/analytics_pipeline/tasks/main.yml
+39
-56
playbooks/roles/analytics_pipeline/templates/analytics-pipeline-start.sh.j2
+24
-0
playbooks/roles/edxlocal/defaults/main.yml
+5
-0
playbooks/roles/hadoop_common/defaults/main.yml
+1
-0
playbooks/roles/hadoop_common/tasks/main.yml
+9
-0
playbooks/roles/hive/tasks/main.yml
+0
-1
playbooks/roles/hive/templates/hive-site.xml.j2
+0
-28
No files found.
docker/build/hadoop-master/Dockerfile
0 → 100644
View file @
9851c3bd
FROM
edxoperations/precise-common:v2
MAINTAINER
edxops
ENV
CONFIG_BRANCH hack2015/gabe/analytics-hadoop
USER
docker
WORKDIR
/edx/app/edx_ansible/edx_ansible
RUN
sudo
git fetch
--all
RUN
sudo
git checkout
$CONFIG_BRANCH
RUN
sudo
git reset
--hard
origin/
$CONFIG_BRANCH
RUN
sudo
git pull
WORKDIR
/edx/app/edx_ansible/edx_ansible/docker/plays
RUN
sudo
ansible-playbook analytics_pipeline.yml
-c
local
USER
root
CMD
["/edx/bin/analytics-pipeline-start.sh"]
docker/plays/analytics_pipeline.yml
0 → 100644
View file @
9851c3bd
-
name
:
Deploy all dependencies needed to run edx-analytics-pipeline
hosts
:
all
sudo
:
True
gather_facts
:
True
roles
:
-
analytics_pipeline
playbooks/roles/analytics_pipeline/tasks/main.yml
View file @
9851c3bd
...
...
@@ -117,60 +117,43 @@
-
install
-
install:app-requirements
-
name
:
ensure hdfs services are started
service
:
>
name=hdfs
state=started
tags
:
-
manage
-
manage:start
-
name
:
ensure map reduce services are started
service
:
>
name=yarn
state=started
tags
:
-
manage
-
manage:start
-
name
:
ensure package dir exists in HDFS
shell
:
>
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
sudo_user
:
"
{{
hadoop_common_user
}}"
tags
:
-
install
-
install:app-requirements
-
name
:
ensure util library is in HDFS
shell
:
>
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
sudo_user
:
"
{{
hadoop_common_user
}}"
tags
:
-
install
-
install:app-requirements
-
name
:
ensure the data directory exists
shell
:
>
. {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}
sudo_user
:
"
{{
hadoop_common_user
}}"
tags
:
-
install
-
install:base
-
name
:
ensure tracking log file can be read
file
:
>
path={{ COMMON_LOG_DIR }}/tracking/tracking.log
mode=0644
ignore_errors
:
yes
tags
:
-
install
-
install:configuration
-
name
:
pipeline start script file installed
template
:
>
src=analytics-pipeline-start.sh.j2
dest={{ COMMON_BIN_DIR }}/analytics-pipeline-start.sh
owner={{ hadoop_common_user }} group={{ hadoop_common_group }} mode=755
-
name
:
cron job syncs tracking log file to hdfs
cron
:
>
user={{ hadoop_common_user }}
name="Sync tracking log to HDFS"
job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
tags
:
-
install
-
install:configuration
#- name: ensure hdfs services are started
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HADOOP_COMMON_HOME }}/sbin/start-dfs.sh
#
#- name: ensure hdfs services are started
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && {{ HADOOP_COMMON_HOME }}/sbin/start-yarn.sh
#
#- name: ensure package dir exists in HDFS
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure util library is in HDFS
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure the data directory exists
# shell: >
# . {{ HADOOP_COMMON_CONF_DIR }}/hadoop-env.sh && hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}
# sudo_user: "{{ hadoop_common_user }}"
#
#- name: ensure tracking log file can be read
# file: >
# path={{ COMMON_LOG_DIR }}/tracking/tracking.log
# mode=0644
# ignore_errors: yes
#
#- name: cron job syncs tracking log file to hdfs
# cron: >
# user={{ hadoop_common_user }}
# name="Sync tracking log to HDFS"
# job="{{ HADOOP_COMMON_HOME }}/bin/hdfs dfs -put -f {{ COMMON_LOG_DIR }}/tracking/tracking.log {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}/tracking.log"
playbooks/roles/analytics_pipeline/templates/analytics-pipeline-start.sh.j2
0 → 100644
View file @
9851c3bd
#!/bin/bash
if
[
-n
"
$GITHUB_USER
"
]
then
sudo
usermod docker
-s
/bin/bash
su docker
-c
"mkdir -p /home/docker/.ssh"
echo
>>
/home/docker/.ssh/authorized_keys
curl https://github.com/
$GITHUB_USER
.keys
>>
/home/docker/.ssh/authorized_keys
fi
# remote-task will be running ansible, and this directory will be owned by root, just clean it up.
sudo
rm
-rf
/home/docker/.ansible
/usr/sbin/sshd
HADOOP_HOME
={{
HADOOP_COMMON_HOME
}}
su hadoop
-c
"
$HADOOP_HOME
/sbin/start-dfs.sh"
su hadoop
-c
"
$HADOOP_HOME
/sbin/start-yarn.sh"
su hadoop
-c
"
$HADOOP_HOME
/bin/hdfs dfs -mkdir -p /edx-analytics-pipeline/packages/"
su hadoop
-c
"
$HADOOP_HOME
/bin/hdfs dfs -put -f {{ HADOOP_COMMON_USER_HOME }}/lib/edx-analytics-hadoop-util.jar /edx-analytics-pipeline/packages/"
su hadoop
-c
"
$HADOOP_HOME
/bin/hdfs dfs -mkdir -p {{ ANALYTICS_PIPELINE_HDFS_DATA_DIR }}"
tail
-f
$HADOOP_HOME
/logs/
*
.log
\ No newline at end of file
playbooks/roles/edxlocal/defaults/main.yml
View file @
9851c3bd
...
...
@@ -55,3 +55,8 @@ edxlocal_database_users:
user
:
"
{{
HIVE_METASTORE_DATABASE.user
|
default(None)
}}"
,
pass
:
"
{{
HIVE_METASTORE_DATABASE.password
|
default(None)
}}"
}
-
{
db
:
"
{{
HIVE_METASTORE_DATABASE.name
|
default(None)
}}"
,
user
:
"
{{
HIVE_METASTORE_DATABASE.user
|
default(None)
}}"
,
pass
:
"
{{
HIVE_METASTORE_DATABASE.password
|
default(None)
}}"
}
playbooks/roles/hadoop_common/defaults/main.yml
View file @
9851c3bd
...
...
@@ -58,5 +58,6 @@ hadoop_common_debian_pkgs:
-
libtool
-
zlib1g-dev
-
maven
-
openssh-server
hadoop_common_redhat_pkgs
:
[]
playbooks/roles/hadoop_common/tasks/main.yml
View file @
9851c3bd
...
...
@@ -193,3 +193,12 @@
path={{ HADOOP_COMMON_SERVICES_DIR }}
mode=0750 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
state=directory
-
name
:
ssh pid directory exists
file
:
>
path=/var/run/sshd
mode=0755 owner=root group=root
state=directory
-
name
:
sshd service started
shell
:
/usr/sbin/sshd
playbooks/roles/hive/tasks/main.yml
View file @
9851c3bd
...
...
@@ -75,7 +75,6 @@
mode=0640 owner={{ hadoop_common_user }} group={{ hadoop_common_group }}
with_items
:
-
hive-env.sh
-
hive-site.xml
-
name
:
env vars sourced in hadoop env
lineinfile
:
>
...
...
playbooks/roles/hive/templates/hive-site.xml.j2
deleted
100644 → 0
View file @
3ade5c9f
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>
javax.jdo.option.ConnectionURL
</name>
<value>
jdbc:mysql://{{ HIVE_METASTORE_DATABASE.host }}:{{ HIVE_METASTORE_DATABASE.port }}/{{ HIVE_METASTORE_DATABASE.name }}
</value>
</property>
<property>
<name>
javax.jdo.option.ConnectionDriverName
</name>
<value>
com.mysql.jdbc.Driver
</value>
</property>
<property>
<name>
javax.jdo.option.ConnectionUserName
</name>
<value>
{{ HIVE_METASTORE_DATABASE.user }}
</value>
</property>
<property>
<name>
javax.jdo.option.ConnectionPassword
</name>
<value>
{{ HIVE_METASTORE_DATABASE.password }}
</value>
</property>
<property>
<name>
datanucleus.autoCreateSchema
</name>
<value>
true
</value>
</property>
<property>
<name>
hive.metastore.schema.verification
</name>
<value>
true
</value>
</property>
</configuration>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment