add hadoop

This commit is contained in:
James Oliver 2016-03-21 05:14:29 -07:00
parent 757f993e08
commit 39e745d6bd
5 changed files with 333 additions and 0 deletions

View File

@ -0,0 +1,24 @@
# Hadoop + Yarn (Experimental)
### Info:
This template will install Apache Hadoop 2.7.1 and Yarn on Rancher network. It is recommended that Hadoop be installed on instances with 8+GB of ram. This image also makes use of 'named' volumes and requires Docker 1.9.x (Ideally, 1.9.1), One Hadoop cluster can be deployed per environment. Additional nodes can be added to the cluster, removing nodes is not currently setup.
### Using
Select Hadoop from the Rancher Catalog. Common HDFS options and Yarn/MapReduce memory options are available to set.
Once the values are set, and the cluster is deployed:
On the hosts running the following services you can access
* HDFS manager on: `namenode-primary:50070`.
* Yarn Resource manager is accessible via `yarn-resourcemanager:8088`
Your default HDFS filesystem URL is hdfs://<namenode>:8020 (Only available on Rancher Network)

View File

@ -0,0 +1,166 @@
bootstrap-hdfs:
image: rancher/hadoop-base:v0.3.5
labels:
io.rancher.container.start_once: true
command: 'su -c "sleep 20 && exec /bootstrap-hdfs.sh" hdfs'
net: "container:namenode-primary"
volumes_from:
- namenode-primary-data
sl-namenode-config:
image: rancher/hadoop-followers-config:v0.3.5
net: "container:namenode-primary"
environment:
NODETYPE: "hdfs"
volumes_from:
- namenode-primary-data
namenode-config:
image: rancher/hadoop-config:v0.3.5
net: "container:namenode-primary"
volumes_from:
- namenode-primary-data
namenode-primary:
image: rancher/hadoop-base:v0.3.5
command: 'su -c "sleep 15 && /usr/local/hadoop-2.7.1/bin/hdfs namenode" hdfs'
volumes_from:
- namenode-primary-data
ports:
- 50070:50070
labels:
io.rancher.sidekicks: namenode-config,sl-namenode-config,bootstrap-hdfs,namenode-primary-data
io.rancher.container.hostname_override: container_name
io.rancher.scheduler.affinity:container_label_soft: io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/jobhistory-server
io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/datanode
namenode-primary-data:
image: rancher/hadoop-base:v0.3.5
volumes:
- '${cluster}-namenode-primary-config:/etc/hadoop'
- '/tmp'
net: none
labels:
io.rancher.container.start_once: true
command: '/bootstrap-local.sh'
datanode-config:
image: rancher/hadoop-config:v0.3.5
net: "container:datanode"
volumes_from:
- datanode-data
datanode-data:
image: rancher/hadoop-base:v0.3.5
net: none
volumes:
- '${cluster}-datanode-config:/etc/hadoop'
- '/tmp'
labels:
io.rancher.container.start_once: true
command: '/bootstrap-local.sh'
datanode:
image: rancher/hadoop-base:v0.3.5
volumes_from:
- datanode-data
labels:
io.rancher.sidekicks: datanode-config,datanode-data
io.rancher.container.hostname_override: container_name
io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name}
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/namenode-primary,io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager
links:
- 'namenode-primary:namenode'
command: 'su -c "sleep 45 && exec /usr/local/hadoop-2.7.1/bin/hdfs datanode" hdfs'
yarn-nodemanager-config:
image: rancher/hadoop-config:v0.3.5
net: "container:yarn-nodemanager"
volumes_from:
- yarn-nodemanager-data
yarn-nodemanager-data:
image: rancher/hadoop-base:v0.3.5
net: none
volumes:
- '${cluster}-yarn-nodemanager-config:/etc/hadoop'
- '/tmp'
labels:
io.rancher.container.start_once: true
command: '/bootstrap-local.sh'
yarn-nodemanager:
image: rancher/hadoop-base:v0.3.5
volumes_from:
- yarn-nodemanager-data
ports:
- '8042:8042'
labels:
io.rancher.container.hostname_override: container_name
io.rancher.sidekicks: yarn-nodemanager-config,yarn-nodemanager-data
io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name}
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/namenode-primary,io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/jobhistory-server,
io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/datanode
links:
- 'namenode-primary:namenode'
- 'yarn-resourcemanager:yarn-rm'
command: 'su -c "sleep 45 && exec /usr/local/hadoop-2.7.1/bin/yarn nodemanager" yarn'
jobhistory-server-config:
image: rancher/hadoop-config:v0.3.5
net: "container:jobhistory-server"
volumes_from:
- jobhistory-server-data
jobhistory-server-data:
image: rancher/hadoop-base:v0.3.5
net: none
volumes:
- '${cluster}-jobhistory-config:/etc/hadoop'
- '/tmp'
labels:
io.rancher.container.start_once: true
command: '/bootstrap-local.sh'
jobhistory-server:
image: rancher/hadoop-base:v0.3.5
volumes_from:
- jobhistory-server-data
links:
- 'namenode-primary:namenode'
- 'yarn-resourcemanager:yarn-rm'
ports:
- '10020:10020'
- '19888:19888'
labels:
io.rancher.sidekicks: jobhistory-server-config,jobhistory-server-data
io.rancher.container.hostname_override: container_name
io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/namenode-primary
command: 'su -c "sleep 45 && /usr/local/hadoop-2.7.1/bin/mapred historyserver" mapred'
yarn-resourcemanager-config:
image: rancher/hadoop-config:v0.3.5
net: "container:yarn-resourcemanager"
volumes_from:
- yarn-resourcemanager-data
sl-yarn-resourcemanager-config:
image: rancher/hadoop-followers-config:v0.3.5
net: "container:yarn-resourcemanager"
environment:
NODETYPE: "yarn"
volumes_from:
- yarn-resourcemanager-data
yarn-resourcemanager-data:
image: rancher/hadoop-base:v0.3.5
net: none
volumes:
- '${cluster}-yarn-resourcemanager-config:/etc/hadoop'
- '/tmp'
labels:
io.rancher.container.start_once: true
command: '/bootstrap-local.sh'
yarn-resourcemanager:
image: rancher/hadoop-base:v0.3.5
volumes_from:
- yarn-resourcemanager-data
ports:
- '8088:8088'
links:
- 'namenode-primary:namenode'
labels:
io.rancher.sidekicks: yarn-resourcemanager-config,sl-yarn-resourcemanager-config,yarn-resourcemanager-data
io.rancher.container.hostname_override: container_name
io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name},io.rancher.stack_service.name=$${stack_name}/datanode,io.rancher.stack_service.name=$${stack_name}/yarn-nodemanager
io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/namenode-primary
command: 'su -c "sleep 30 && /usr/local/hadoop-2.7.1/bin/yarn resourcemanager" yarn'

View File

@ -0,0 +1,100 @@
.catalog:
name: "Hadoop + Yarn"
version: "2.7.1-rancher1"
description: "Hadoop + Yarn"
minimum_rancher_version: v0.46.0
questions:
- variable: "cluster"
label: "Cluster Name"
description: "Name for the stack volumes"
required: true
default: "hadoop"
type: "string"
- variable: "dfs_replication"
description: "Default number of HDFS replicas"
label: "Default DFS Replica Count"
required: true
type: "int"
default: "3"
- variable: "yarn_node_manager_cpu_vcores"
description: "yarn.nodemanager.resource.cpu-vcores value"
label: "Yarn Nodemanager CPU vcores"
default: "8"
type: "int"
required: true
- variable: "yarn_node_manager_resource_memory"
description: "yarn.nodemanager.resource.memory-mb value"
label: "Yarn Nodemanager Memory Value"
default: "8192"
type: "int"
required: true
- variable: "yarn_minimum_allocation"
description: "yarn.scheduler.minimum-allocation-mb value"
label: "Yarn Minimum Memory allocation"
default: "1024"
type: "int"
required: true
- variable: "mapreduce_map_memory"
description: "mapreduce.map.memory.mb"
label: "Mapreduce Map Memory"
default: "1024"
type: "int"
required: true
- variable: "mapreduce_reduce_memory"
description: "mapreduce.reduce.memory.mb"
label: "Mapreduce Reduce Memory"
default: "2048"
type: "int"
required: true
- variable: "mapreduce_map_java_opts"
description: "mapreduce.map.java.opts"
label: "Mapreduce Map Java Opts"
default: "-Xmx768m"
type: "string"
required: true
- variable: "mapreduce_reduce_java_opts"
description: "mapreduce.reduce.java.opts"
label: "Mapreduce Reduce Java Opts"
default: "-Xmx1536m"
type: "string"
required: true
namenode-primary:
scale: 1
metadata: &hdfs_metadata
core-site:
hadoop.proxyuser.hue.hosts: "*"
hadoop.proxyuser.hue.groups: "*"
hdfs-site:
dfs.replication: "${dfs_replication}"
dfs.webhdfs.enabled: "true"
datanode:
scale: 1
metadata: *hdfs_metadata
yarn-resourcemanager:
scale: 1
metadata: &yarn_metadata
core-site:
hadoop.proxyuser.hue.hosts: "*"
hadoop.proxyuser.hue.groups: "*"
hdfs-site:
dfs-replication: "${dfs_replication}"
dfs.webhdfs.enabled: "true"
yarn-site:
yarn.nodemanager.resource.cpu-vcores: "${yarn_node_manager_cpu_vcores}"
yarn.nodemanager.resource.memory-mb: "${yarn_node_manager_resource_memory}"
yarn.scheduler.minimum-allocation-mb: "${yarn_minimum_allocation}"
yarn.nodemanager.aux-services: "mapreduce_shuffle"
yarn.log-aggregation-enable: "true"
yarn.log-aggregation.retain-seconds: 10800
yarn.log-aggregation.retain-check-interval-seconds: 3600
mapred-site:
mapreduce.map.memory.mb: "${mapreduce_map_memory}"
mapreduce.reduce.memory.mb: "${mapreduce_reduce_memory}"
mapreduce.map.java.opts: "${mapreduce_map_java_opts}"
mapreduce.reduce.java.opts: "${mapreduce_reduce_java_opts}"
jobhistory-server:
scale: 1
metadata: *yarn_metadata
yarn-nodemanager:
scale: 1
metadata: *yarn_metadata

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -0,0 +1,5 @@
name: Hadoop + Yarn
description: |
(Experimental - Requires 2+ hosts) Hadoop + Yarn big data tools
version: 2.7.1-rancher1
category: Big Data