Commit 6f6900af by Abdul Mannan

add missing packages for analytics pipeline dockers

parent a863cd17
......@@ -38,7 +38,7 @@ ENV BOTO_CONFIG=/dev/null \
EDX_PPA_KEY_ID='69464050'
ENV PATH="$PATH:/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin" \
ENV PATH="/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin:$PATH" \
COMMON_DATA_DIR=$COMMON_BASE_DIR/var \
COMMON_APP_DIR=$COMMON_BASE_DIR/app \
COMMON_LOG_DIR=$COMMON_BASE_DIR/var/log \
......@@ -128,9 +128,11 @@ RUN pip install $COMMON_PIP_PACKAGES_PIP $COMMON_PIP_PACKAGES_SETUPTOOLS $COMMON
&& echo '[hadoop]\nversion: cdh4\ncommand: /edx/app/hadoop/hadoop/bin/hadoop\nstreaming-jar: /edx/app/hadoop/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar' > /etc/luigi/client.cfg
RUN apt-get update && make system-requirements
ADD docker/build/analytics_pipeline/devstack.sh /edx/app/analytics_pipeline/devstack.sh
RUN chown hadoop:hadoop /edx/app/analytics_pipeline/devstack.sh && chmod a+x /edx/app/analytics_pipeline/devstack.sh
USER hadoop
RUN touch /edx/app/hadoop/.bashrc \
&& echo 'export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_HOME=/edx/app/hadoop/hadoop\nexport HIVE_HOME=/edx/app/hadoop/hive\nexport SQOOP_HOME=/edx/app/hadoop/sqoop\nexport SPARK_HOME=/edx/app/hadoop/spark\nexport PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:/usr/lib/jvm/java-8-oracle/bin:/edx/app/hadoop/hadoop/bin:/edx/app/hadoop/hadoop/sbin:/edx/app/hadoop/hive/bin:/edx/app/hadoop/spark/bin:/edx/app/hadoop/spark/sbin:/edx/app/hadoop/sqoop/bin"' > /edx/app/hadoop/.bashrc \
&& echo 'export JAVA_HOME=/usr/lib/jvm/java-8-oracle\nexport HADOOP_HOME=/edx/app/hadoop/hadoop\nexport HIVE_HOME=/edx/app/hadoop/hive\nexport SQOOP_HOME=/edx/app/hadoop/sqoop\nexport SPARK_HOME=/edx/app/hadoop/spark\nexport PATH="/edx/app/analytics_pipeline/venvs/analytics_pipeline/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SQOOP_HOME}/bin:$PATH"' > /edx/app/hadoop/.bashrc \
&& . $ANALYTICS_PIPELINE_VENV/analytics_pipeline/bin/activate \
&& make test-requirements requirements
......
#!/usr/bin/env bash
COMMAND=$1
case $COMMAND in
open)
. /edx/app/analytics_pipeline/venvs/analytics_pipeline/bin/activate
cd /edx/app/analytics_pipeline/analytics_pipeline
/bin/bash
;;
esac
......@@ -17,7 +17,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
|| apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 \
|| apt-key adv --keyserver hkps://hkps.pool.sks-keyservers.net --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 )
RUN echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list
RUN apt-get update && apt-get install -y mysql-community-client && rm -rf /var/lib/apt/lists/*
RUN apt-get -y update \
&& apt-get install -y mysql-community-client \
&& apt-get install -y --no-install-recommends python python-setuptools \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /
RUN mkdir -p /hadoop/dfs/data
VOLUME /hadoop/dfs/data
......
......@@ -17,7 +17,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
|| apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 \
|| apt-key adv --keyserver hkps://hkps.pool.sks-keyservers.net --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 )
RUN echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list
RUN apt-get update && apt-get install -y mysql-community-client && rm -rf /var/lib/apt/lists/*
RUN apt-get -y update \
&& apt-get install -y mysql-community-client \
&& apt-get install -y --no-install-recommends python python-setuptools \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /
RUN mkdir -p /hadoop/dfs/name
VOLUME /hadoop/dfs/name
......
......@@ -15,7 +15,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
|| apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 \
|| apt-key adv --keyserver hkps://hkps.pool.sks-keyservers.net --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 )
RUN echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list
RUN apt-get update && apt-get install -y mysql-community-client && rm -rf /var/lib/apt/lists/*
RUN apt-get -y update \
&& apt-get install -y mysql-community-client \
&& apt-get install -y --no-install-recommends python python-setuptools \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /
ADD docker/build/analytics_pipeline_hadoop_nodemanager/nodemanager.sh /run.sh
RUN chmod a+x /run.sh
......
......@@ -15,7 +15,10 @@ RUN ( apt-key adv --keyserver ha.pool.sks-keyservers.net --recv-keys A4A9406876F
|| apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 \
|| apt-key adv --keyserver hkps://hkps.pool.sks-keyservers.net --recv-keys A4A9406876FCBD3C456770C88C718D3B5072E1F5 )
RUN echo "deb http://repo.mysql.com/apt/debian/ stretch mysql-${MYSQL_VERSION}" > /etc/apt/sources.list.d/mysql.list
RUN apt-get update && apt-get install -y mysql-community-client && rm -rf /var/lib/apt/lists/*
RUN apt-get -y update \
&& apt-get install -y mysql-community-client \
&& apt-get install -y --no-install-recommends python python-setuptools \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /
ADD docker/build/analytics_pipeline_hadoop_resourcemanager/resourcemanager.sh /run.sh
RUN chmod a+x /run.sh
......
......@@ -7,7 +7,27 @@ ENV SPARK_MASTER_PORT=7077 \
SPARK_MASTER_LOG=/spark/logs \
HADOOP_USER_NAME=hadoop \
SPARK_HOME=/spark \
PATH=$PATH:/spark/bin
PATH=$PATH:/spark/bin \
CORE_CONF_fs_defaultFS=hdfs://namenode:8020 \
CORE_CONF_hadoop_http_staticuser_user=root \
CORE_CONF_hadoop_proxyuser_hue_hosts=* \
CORE_CONF_hadoop_proxyuser_hue_groups=* \
HDFS_CONF_dfs_webhdfs_enabled=true \
HDFS_CONF_dfs_permissions_enabled=false \
YARN_CONF_yarn_log___aggregation___enable=true \
YARN_CONF_yarn_resourcemanager_recovery_enabled=true \
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore \
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate \
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs \
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ \
YARN_CONF_yarn_timeline___service_enabled=true \
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true \
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true \
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager \
YARN_CONF_yarn_timeline___service_hostname=historyserver \
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 \
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 \
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
RUN apt-get -y update && apt-get -y install --reinstall python-pkg-resources \
&& echo 'spark.master spark://sparkmaster:7077\nspark.eventLog.enabled true\nspark.eventLog.dir hdfs://namenode:8020/tmp/spark-events\nspark.history.fs.logDirectory hdfs://namenode:8020/tmp/spark-events' > /spark/conf/spark-defaults.conf
......
......@@ -5,7 +5,27 @@ ADD docker/build/analytics_pipeline_spark_worker/worker.sh /
ENV SPARK_WORKER_WEBUI_PORT=8081 \
SPARK_WORKER_LOG=/spark/logs \
SPARK_MASTER="spark://sparkmaster:7077" \
SPARK_HOME=/spark
SPARK_HOME=/spark \
CORE_CONF_fs_defaultFS=hdfs://namenode:8020 \
CORE_CONF_hadoop_http_staticuser_user=root \
CORE_CONF_hadoop_proxyuser_hue_hosts=* \
CORE_CONF_hadoop_proxyuser_hue_groups=* \
HDFS_CONF_dfs_webhdfs_enabled=true \
HDFS_CONF_dfs_permissions_enabled=false \
YARN_CONF_yarn_log___aggregation___enable=true \
YARN_CONF_yarn_resourcemanager_recovery_enabled=true \
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore \
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate \
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs \
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ \
YARN_CONF_yarn_timeline___service_enabled=true \
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true \
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true \
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager \
YARN_CONF_yarn_timeline___service_hostname=historyserver \
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 \
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 \
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
RUN apt-get -y update && apt-get -y install --reinstall python-pkg-resources
CMD ["/bin/bash", "/worker.sh"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment