Commit a3ee4914 by Jesse Shapiro Committed by GitHub

Merge pull request #3558 from open-craft/haikuginger/configure-hadoop-settings

Make Hadoop more configurable
parents 6ca051fa 6fdd558a
......@@ -62,6 +62,31 @@ hadoop_common_debian_pkgs:
hadoop_common_redhat_pkgs: []
#
# Vars are used to fill in the following files:
# core-site.xml
# hdfs-site.xml
# mapred-site.xml
# yarn-site.xml
#
MAPRED_SITE_DEFAULT_CONFIG:
mapreduce.framework.name: "yarn"
YARN_SITE_DEFAULT_CONFIG:
yarn.nodemanager.aux-services: "mapreduce_shuffle"
yarn.nodemanager.aux-services.mapreduce.shuffle.class: "org.apache.hadoop.mapred.ShuffleHandler"
yarn.log-aggregation-enable: "true"
# 24 hour log retention
yarn.log-aggregation.retain-seconds: 86400
HADOOP_CORE_SITE_DEFAULT_CONFIG:
fs.default.name: "hdfs://localhost:9000"
HDFS_SITE_DEFAULT_CONFIG:
dfs.replication: "1"
dfs.namenode.name.dir: "file:{{ HADOOP_COMMON_DATA }}/namenode"
dfs.datanode.data.dir: "file:{{ HADOOP_COMMON_DATA }}/datanode"
#
# MapReduce/Yarn memory config (defaults for m1.medium)
# http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/TaskConfiguration_H2.html
#
......@@ -78,9 +103,16 @@ hadoop_common_redhat_pkgs: []
# yarn.nodemanager.resource.memory-mb: 2048
# yarn.nodemanager.vmem-pmem-ratio: 2.1
#
# Variables override the stock configuration for entry into
# the following files. Ensure that you use unambiguous
# string literals to avoid any confusion:
# core-site.xml
# hdfs-site.xml
# mapred-site.xml
# yarn-site.xml
#
mapred_site_config: {}
yarn_site_config:
yarn.log-aggregation-enable: true
# 24 hour log retention
yarn.log-aggregation.retain-seconds: 86400
yarn_site_config: {}
HADOOP_CORE_SITE_EXTRA_CONFIG: {}
HDFS_SITE_EXTRA_CONFIG: {}
{% do HADOOP_CORE_SITE_DEFAULT_CONFIG.update(HADOOP_CORE_SITE_EXTRA_CONFIG) %}
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
{% for key, value in HADOOP_CORE_SITE_DEFAULT_CONFIG.iteritems() %}
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
</configuration>
\ No newline at end of file
{% endfor %}
</configuration>
{% do HDFS_SITE_DEFAULT_CONFIG.update(HDFS_SITE_EXTRA_CONFIG) %}
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
{% for key, value in HDFS_SITE_DEFAULT_CONFIG.iteritems() %}
<property>
<name>dfs.replication</name>
<value>1</value>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ HADOOP_COMMON_DATA }}/datanode</value>
</property>
</configuration>
\ No newline at end of file
{% endfor %}
</configuration>
{% do MAPRED_SITE_DEFAULT_CONFIG.update(mapred_site_config) %}
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
{% if mapred_site_config is defined %}
{% for key,value in mapred_site_config.iteritems() %}
{% for key, value in MAPRED_SITE_DEFAULT_CONFIG.iteritems() %}
<property>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
{% endfor %}
{% endif %}
</configuration>
{% do YARN_SITE_DEFAULT_CONFIG.update(yarn_site_config) %}
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
{% if yarn_site_config is defined %}
{% for key,value in yarn_site_config.iteritems() %}
{% for key, value in YARN_SITE_DEFAULT_CONFIG.iteritems() %}
<property>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
{% endfor %}
{% endif %}
</configuration>
......@@ -30,6 +30,23 @@ HIVE_METASTORE_DATABASE:
host: "{{ HIVE_METASTORE_DATABASE_HOST }}"
port: "{{ HIVE_METASTORE_DATABASE_PORT }}"
#
# Vars are used to fill in the hive-site.xml file
#
HIVE_SITE_DEFAULT_CONFIG:
javax.jdo.option.ConnectionURL: "jdbc:mysql://{{ HIVE_METASTORE_DATABASE.host }}:{{ HIVE_METASTORE_DATABASE.port }}/{{ HIVE_METASTORE_DATABASE.name }}"
javax.jdo.option.ConnectionDriverName: "com.mysql.jdbc.Driver"
javax.jdo.option.ConnectionUserName: "{{ HIVE_METASTORE_DATABASE.user }}"
javax.jdo.option.ConnectionPassword: "{{ HIVE_METASTORE_DATABASE.password }}"
datanucleus.autoCreateSchema: "true"
hive.metastore.schema.verification: "true"
#
# Variables override the stock configuration for entry into
# the hive-site.xml file. Ensure that you use unambiguous
# string literals to avoid any confusion.
#
HIVE_SITE_EXTRA_CONFIG: {}
#
# vars are namespace with the module name.
......
{% do HIVE_SITE_DEFAULT_CONFIG.update(HIVE_SITE_EXTRA_CONFIG) %}
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
{% for key, value in HIVE_SITE_DEFAULT_CONFIG.iteritems() %}
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://{{ HIVE_METASTORE_DATABASE.host }}:{{ HIVE_METASTORE_DATABASE.port }}/{{ HIVE_METASTORE_DATABASE.name }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>{{ HIVE_METASTORE_DATABASE.user }}</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>{{ HIVE_METASTORE_DATABASE.password }}</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>true</value>
<name>{{ key }}</name>
<value>{{ value }}</value>
</property>
{% endfor %}
</configuration>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment