Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
configuration
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
edx
configuration
Commits
6f6900af
Commit
6f6900af
authored
Apr 30, 2018
by
Abdul Mannan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add missing packages for analytics pipeline dockers
parent
a863cd17
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
74 additions
and
8 deletions
+74
-8
docker/build/analytics_pipeline/Dockerfile
+4
-2
docker/build/analytics_pipeline/devstack.sh
+12
-0
docker/build/analytics_pipeline_hadoop_datanode/Dockerfile
+4
-1
docker/build/analytics_pipeline_hadoop_namenode/Dockerfile
+4
-1
docker/build/analytics_pipeline_hadoop_nodemanager/Dockerfile
+4
-1
docker/build/analytics_pipeline_hadoop_resourcemanager/Dockerfile
+4
-1
docker/build/analytics_pipeline_spark_master/Dockerfile
+21
-1
docker/build/analytics_pipeline_spark_worker/Dockerfile
+21
-1
No files found.
docker/build/analytics_pipeline/Dockerfile
View file @
6f6900af
...
...
@@ -38,7 +38,7 @@ ENV BOTO_CONFIG=/dev/null \
EDX_PPA_KEY_ID='69464050'
ENV
PATH="
$PATH:/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin
" \
ENV
PATH="
/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin:$PATH
" \
COMMON_DATA_DIR=$COMMON_BASE_DIR/var \
COMMON_APP_DIR=$COMMON_BASE_DIR/app \
COMMON_LOG_DIR=$COMMON_BASE_DIR/var/log \
...
...
@@ -128,9 +128,11 @@ RUN pip install $COMMON_PIP_PACKAGES_PIP $COMMON_PIP_PACKAGES_SETUPTOOLS $COMMON
&&
echo
'[hadoop]\nversion: cdh4\ncommand: /edx/app/hadoop/hadoop/bin/hadoop\nstreaming-jar: /edx/app/hadoop/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar'
>
/etc/luigi/client.cfg
RUN
apt-get update
&&
make system-requirements
ADD
docker/build/analytics_pipeline/devstack.sh /edx/app/analytics_pipeline/devstack.sh
RUN
chown hadoop:hadoop /edx/app/analytics_pipeline/devstack.sh
&&
chmod a+x /edx/app/analytics_pipeline/devstack.sh
USER
hadoop
RUN
touch /edx/app/hadoop/.bashrc
\
&&
echo
'export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_HOME=/edx/app/hadoop/hadoop\nexport HIVE_HOME=/edx/app/hadoop/hive\nexport SQOOP_HOME=/edx/app/hadoop/sqoop\nexport SPARK_HOME=/edx/app/hadoop/spark\nexport PATH="/
usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:/usr/lib/jvm/java-8-oracle/bin:/edx/app/hadoop/hadoop/bin:/edx/app/hadoop/hadoop/sbin:/edx/app/hadoop/hive/bin:/edx/app/hadoop/spark/bin:/edx/app/hadoop/spark/sbin:/edx/app/hadoop/sqoop/bin
"'
>
/edx/app/hadoop/.bashrc
\
&&
echo
'export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_HOME=/edx/app/hadoop/hadoop\nexport HIVE_HOME=/edx/app/hadoop/hive\nexport SQOOP_HOME=/edx/app/hadoop/sqoop\nexport SPARK_HOME=/edx/app/hadoop/spark\nexport PATH="/
edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin:$PATH
"'
>
/edx/app/hadoop/.bashrc
\
&&
.
$ANALYTICS_PIPELINE_VENV
/analytics_pipeline/bin/activate
\
&&
make test-requirements requirements
...
...
docker/build/analytics_pipeline/devstack.sh
0 → 100644
View file @
6f6900af
#!/usr/bin/env bash
COMMAND
=
$1
case
$COMMAND
in
open
)
.
/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin/activate
cd
/edx/app/analytics_pipeline/analytics_pipeline
/bin/bash
;;
esac
docker/build/analytics_pipeline_hadoop_datanode/Dockerfile
View file @
6f6900af
...
...
@@ -17,7 +17,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
||
apt-key adv
--keyserver
hkp://p80.pool.sks-keyservers.net:80
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
\
||
apt-key adv
--keyserver
hkps://hkps.pool.sks-keyservers.net
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
)
RUN
echo
"deb http://repo.mysql.com/apt/debian/ stretch mysql-
${
MYSQL_VERSION
}
"
>
/etc/apt/sources.list.d/mysql.list
RUN
apt-get update
&&
apt-get install
-y
mysql-community-client
&&
rm
-rf
/var/lib/apt/lists/
*
RUN
apt-get
-y
update
\
&&
apt-get install
-y
mysql-community-client
\
&&
apt-get install
-y
--no-install-recommends
python python-setuptools
\
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/
RUN
mkdir
-p
/hadoop/dfs/data
VOLUME
/hadoop/dfs/data
...
...
docker/build/analytics_pipeline_hadoop_namenode/Dockerfile
View file @
6f6900af
...
...
@@ -17,7 +17,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
||
apt-key adv
--keyserver
hkp://p80.pool.sks-keyservers.net:80
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
\
||
apt-key adv
--keyserver
hkps://hkps.pool.sks-keyservers.net
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
)
RUN
echo
"deb http://repo.mysql.com/apt/debian/ stretch mysql-
${
MYSQL_VERSION
}
"
>
/etc/apt/sources.list.d/mysql.list
RUN
apt-get update
&&
apt-get install
-y
mysql-community-client
&&
rm
-rf
/var/lib/apt/lists/
*
RUN
apt-get
-y
update
\
&&
apt-get install
-y
mysql-community-client
\
&&
apt-get install
-y
--no-install-recommends
python python-setuptools
\
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/
RUN
mkdir
-p
/hadoop/dfs/name
VOLUME
/hadoop/dfs/name
...
...
docker/build/analytics_pipeline_hadoop_nodemanager/Dockerfile
View file @
6f6900af
...
...
@@ -15,7 +15,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
||
apt-key adv
--keyserver
hkp://p80.pool.sks-keyservers.net:80
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
\
||
apt-key adv
--keyserver
hkps://hkps.pool.sks-keyservers.net
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
)
RUN
echo
"deb http://repo.mysql.com/apt/debian/ stretch mysql-
${
MYSQL_VERSION
}
"
>
/etc/apt/sources.list.d/mysql.list
RUN
apt-get update
&&
apt-get install
-y
mysql-community-client
&&
rm
-rf
/var/lib/apt/lists/
*
RUN
apt-get
-y
update
\
&&
apt-get install
-y
mysql-community-client
\
&&
apt-get install
-y
--no-install-recommends
python python-setuptools
\
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/
ADD
docker/build/analytics_pipeline_hadoop_nodemanager/nodemanager.sh /run.sh
RUN
chmod a+x /run.sh
...
...
docker/build/analytics_pipeline_hadoop_resourcemanager/Dockerfile
View file @
6f6900af
...
...
@@ -15,7 +15,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
||
apt-key adv
--keyserver
hkp://p80.pool.sks-keyservers.net:80
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
\
||
apt-key adv
--keyserver
hkps://hkps.pool.sks-keyservers.net
--recv-keys
A4A9406876FCBD3C456770C88C718D3B5072E1F5
)
RUN
echo
"deb http://repo.mysql.com/apt/debian/ stretch mysql-
${
MYSQL_VERSION
}
"
>
/etc/apt/sources.list.d/mysql.list
RUN
apt-get update
&&
apt-get install
-y
mysql-community-client
&&
rm
-rf
/var/lib/apt/lists/
*
RUN
apt-get
-y
update
\
&&
apt-get install
-y
mysql-community-client
\
&&
apt-get install
-y
--no-install-recommends
python python-setuptools
\
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/
ADD
docker/build/analytics_pipeline_hadoop_resourcemanager/resourcemanager.sh /run.sh
RUN
chmod a+x /run.sh
...
...
docker/build/analytics_pipeline_spark_master/Dockerfile
View file @
6f6900af
...
...
@@ -7,7 +7,27 @@ ENV SPARK_MASTER_PORT=7077 \
SPARK_MASTER_LOG=/spark/logs \
HADOOP_USER_NAME=hadoop \
SPARK_HOME=/spark \
PATH=$PATH:/spark/bin
PATH=$PATH:/spark/bin \
CORE_CONF_fs_defaultFS=hdfs://namenode:8020 \
CORE_CONF_hadoop_http_staticuser_user=root \
CORE_CONF_hadoop_proxyuser_hue_hosts=* \
CORE_CONF_hadoop_proxyuser_hue_groups=* \
HDFS_CONF_dfs_webhdfs_enabled=true \
HDFS_CONF_dfs_permissions_enabled=false \
YARN_CONF_yarn_log___aggregation___enable=true \
YARN_CONF_yarn_resourcemanager_recovery_enabled=true \
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore \
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate \
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs \
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ \
YARN_CONF_yarn_timeline___service_enabled=true \
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true \
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true \
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager \
YARN_CONF_yarn_timeline___service_hostname=historyserver \
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 \
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 \
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
RUN
apt-get
-y
update
&&
apt-get
-y
install
--reinstall
python-pkg-resources
\
&&
echo
'spark.master spark://sparkmaster:7077\nspark.eventLog.enabled true\nspark.eventLog.dir hdfs://namenode:8020/tmp/spark-events\nspark.history.fs.logDirectory hdfs://namenode:8020/tmp/spark-events'
>
/spark/conf/spark-defaults.conf
...
...
docker/build/analytics_pipeline_spark_worker/Dockerfile
View file @
6f6900af
...
...
@@ -5,7 +5,27 @@ ADD docker/build/analytics_pipeline_spark_worker/worker.sh /
ENV
SPARK_WORKER_WEBUI_PORT=8081 \
SPARK_WORKER_LOG=/spark/logs \
SPARK_MASTER="spark://sparkmaster:7077" \
SPARK_HOME=/spark
SPARK_HOME=/spark \
CORE_CONF_fs_defaultFS=hdfs://namenode:8020 \
CORE_CONF_hadoop_http_staticuser_user=root \
CORE_CONF_hadoop_proxyuser_hue_hosts=* \
CORE_CONF_hadoop_proxyuser_hue_groups=* \
HDFS_CONF_dfs_webhdfs_enabled=true \
HDFS_CONF_dfs_permissions_enabled=false \
YARN_CONF_yarn_log___aggregation___enable=true \
YARN_CONF_yarn_resourcemanager_recovery_enabled=true \
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore \
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate \
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs \
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ \
YARN_CONF_yarn_timeline___service_enabled=true \
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true \
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true \
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager \
YARN_CONF_yarn_timeline___service_hostname=historyserver \
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 \
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 \
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
RUN
apt-get
-y
update
&&
apt-get
-y
install
--reinstall
python-pkg-resources
CMD
["/bin/bash", "/worker.sh"]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment