From 39e745d6bd723a4fc2fa7cc962213e5b4c67cece Mon Sep 17 00:00:00 2001 From: James Oliver Date: Mon, 21 Mar 2016 05:14:29 -0700 Subject: [PATCH] add hadoop --- templates/hadoop/0/README.md | 24 ++++ templates/hadoop/0/docker-compose.yml | 166 ++++++++++++++++++++++++ templates/hadoop/0/rancher-compose.yml | 100 ++++++++++++++ templates/hadoop/catalogIcon-hadoop.svg | 38 ++++++ templates/hadoop/config.yml | 5 + 5 files changed, 333 insertions(+) create mode 100644 templates/hadoop/0/README.md create mode 100644 templates/hadoop/0/docker-compose.yml create mode 100644 templates/hadoop/0/rancher-compose.yml create mode 100644 templates/hadoop/catalogIcon-hadoop.svg create mode 100644 templates/hadoop/config.yml diff --git a/templates/hadoop/0/README.md b/templates/hadoop/0/README.md new file mode 100644 index 0000000..8fc5ed5 --- /dev/null +++ b/templates/hadoop/0/README.md @@ -0,0 +1,24 @@ +# Hadoop + Yarn (Experimental) + + +### Info: + + This template will install Apache Hadoop 2.7.1 and Yarn on Rancher network. It is recommended that Hadoop be installed on instances with 8+GB of ram. This image also makes use of 'named' volumes and requires Docker 1.9.x (Ideally, 1.9.1), One Hadoop cluster can be deployed per environment. Additional nodes can be added to the cluster, removing nodes is not currently setup. + +### Using + +Select Hadoop from the Rancher Catalog. Common HDFS options and Yarn/MapReduce memory options are available to set. + +Once the values are set, and the cluster is deployed: + +On the hosts running the following services you can access + +* HDFS manager on: `namenode-primary:50070`. +* Yarn Resource manager is accessible via `yarn-resourcemanager:8088` + +Your default HDFS filesystem URL is hdfs://:8020 (Only available on Rancher Network) + + + + + diff --git a/templates/hadoop/0/docker-compose.yml b/templates/hadoop/0/docker-compose.yml new file mode 100644 index 0000000..0885515 --- /dev/null +++ b/templates/hadoop/0/docker-compose.yml @@ -0,0 +1,166 @@ +bootstrap-hdfs: + image: rancher/hadoop-base:v0.3.5 + labels: + io.rancher.container.start_once: true + command: 'su -c "sleep 20 && exec /bootstrap-hdfs.sh" hdfs' + net: "container:namenode-primary" + volumes_from: + - namenode-primary-data +sl-namenode-config: + image: rancher/hadoop-followers-config:v0.3.5 + net: "container:namenode-primary" + environment: + NODETYPE: "hdfs" + volumes_from: + - namenode-primary-data +namenode-config: + image: rancher/hadoop-config:v0.3.5 + net: "container:namenode-primary" + volumes_from: + - namenode-primary-data +namenode-primary: + image: rancher/hadoop-base:v0.3.5 + command: 'su -c "sleep 15 && /usr/local/hadoop-2.7.1/bin/hdfs namenode" hdfs' + volumes_from: + - namenode-primary-data + ports: + - 50070:50070 + labels: + io.rancher.sidekicks: namenode-config,sl-namenode-config,bootstrap-hdfs,namenode-primary-data + io.rancher.container.hostname_override: container_name + io.rancher.scheduler.affinity:container_label_soft: io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/jobhistory-server + io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/datanode +namenode-primary-data: + image: rancher/hadoop-base:v0.3.5 + volumes: + - '${cluster}-namenode-primary-config:/etc/hadoop' + - '/tmp' + net: none + labels: + io.rancher.container.start_once: true + command: '/bootstrap-local.sh' + + +datanode-config: + image: rancher/hadoop-config:v0.3.5 + net: "container:datanode" + volumes_from: + - datanode-data +datanode-data: + image: rancher/hadoop-base:v0.3.5 + net: none + volumes: + - '${cluster}-datanode-config:/etc/hadoop' + - '/tmp' + labels: + io.rancher.container.start_once: true + command: '/bootstrap-local.sh' +datanode: + image: rancher/hadoop-base:v0.3.5 + volumes_from: + - datanode-data + labels: + io.rancher.sidekicks: datanode-config,datanode-data + io.rancher.container.hostname_override: container_name + io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name} + io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/namenode-primary,io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager + links: + - 'namenode-primary:namenode' + command: 'su -c "sleep 45 && exec /usr/local/hadoop-2.7.1/bin/hdfs datanode" hdfs' + +yarn-nodemanager-config: + image: rancher/hadoop-config:v0.3.5 + net: "container:yarn-nodemanager" + volumes_from: + - yarn-nodemanager-data +yarn-nodemanager-data: + image: rancher/hadoop-base:v0.3.5 + net: none + volumes: + - '${cluster}-yarn-nodemanager-config:/etc/hadoop' + - '/tmp' + labels: + io.rancher.container.start_once: true + command: '/bootstrap-local.sh' +yarn-nodemanager: + image: rancher/hadoop-base:v0.3.5 + volumes_from: + - yarn-nodemanager-data + ports: + - '8042:8042' + labels: + io.rancher.container.hostname_override: container_name + io.rancher.sidekicks: yarn-nodemanager-config,yarn-nodemanager-data + io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name} + io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/namenode-primary,io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/jobhistory-server, + io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/datanode + links: + - 'namenode-primary:namenode' + - 'yarn-resourcemanager:yarn-rm' + command: 'su -c "sleep 45 && exec /usr/local/hadoop-2.7.1/bin/yarn nodemanager" yarn' + +jobhistory-server-config: + image: rancher/hadoop-config:v0.3.5 + net: "container:jobhistory-server" + volumes_from: + - jobhistory-server-data +jobhistory-server-data: + image: rancher/hadoop-base:v0.3.5 + net: none + volumes: + - '${cluster}-jobhistory-config:/etc/hadoop' + - '/tmp' + labels: + io.rancher.container.start_once: true + command: '/bootstrap-local.sh' +jobhistory-server: + image: rancher/hadoop-base:v0.3.5 + volumes_from: + - jobhistory-server-data + links: + - 'namenode-primary:namenode' + - 'yarn-resourcemanager:yarn-rm' + ports: + - '10020:10020' + - '19888:19888' + labels: + io.rancher.sidekicks: jobhistory-server-config,jobhistory-server-data + io.rancher.container.hostname_override: container_name + io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/yarn-resourcemanager,io.rancher.stack_service.name=$${stack_name}/namenode-primary + command: 'su -c "sleep 45 && /usr/local/hadoop-2.7.1/bin/mapred historyserver" mapred' + +yarn-resourcemanager-config: + image: rancher/hadoop-config:v0.3.5 + net: "container:yarn-resourcemanager" + volumes_from: + - yarn-resourcemanager-data +sl-yarn-resourcemanager-config: + image: rancher/hadoop-followers-config:v0.3.5 + net: "container:yarn-resourcemanager" + environment: + NODETYPE: "yarn" + volumes_from: + - yarn-resourcemanager-data +yarn-resourcemanager-data: + image: rancher/hadoop-base:v0.3.5 + net: none + volumes: + - '${cluster}-yarn-resourcemanager-config:/etc/hadoop' + - '/tmp' + labels: + io.rancher.container.start_once: true + command: '/bootstrap-local.sh' +yarn-resourcemanager: + image: rancher/hadoop-base:v0.3.5 + volumes_from: + - yarn-resourcemanager-data + ports: + - '8088:8088' + links: + - 'namenode-primary:namenode' + labels: + io.rancher.sidekicks: yarn-resourcemanager-config,sl-yarn-resourcemanager-config,yarn-resourcemanager-data + io.rancher.container.hostname_override: container_name + io.rancher.scheduler.affinity:container_label_ne: io.rancher.stack_service.name=$${stack_name}/$${service_name},io.rancher.stack_service.name=$${stack_name}/datanode,io.rancher.stack_service.name=$${stack_name}/yarn-nodemanager + io.rancher.scheduler.affinity:container_label: io.rancher.stack_service.name=$${stack_name}/namenode-primary + command: 'su -c "sleep 30 && /usr/local/hadoop-2.7.1/bin/yarn resourcemanager" yarn' diff --git a/templates/hadoop/0/rancher-compose.yml b/templates/hadoop/0/rancher-compose.yml new file mode 100644 index 0000000..3bb1a14 --- /dev/null +++ b/templates/hadoop/0/rancher-compose.yml @@ -0,0 +1,100 @@ +.catalog: + name: "Hadoop + Yarn" + version: "2.7.1-rancher1" + description: "Hadoop + Yarn" + minimum_rancher_version: v0.46.0 + questions: + - variable: "cluster" + label: "Cluster Name" + description: "Name for the stack volumes" + required: true + default: "hadoop" + type: "string" + - variable: "dfs_replication" + description: "Default number of HDFS replicas" + label: "Default DFS Replica Count" + required: true + type: "int" + default: "3" + - variable: "yarn_node_manager_cpu_vcores" + description: "yarn.nodemanager.resource.cpu-vcores value" + label: "Yarn Nodemanager CPU vcores" + default: "8" + type: "int" + required: true + - variable: "yarn_node_manager_resource_memory" + description: "yarn.nodemanager.resource.memory-mb value" + label: "Yarn Nodemanager Memory Value" + default: "8192" + type: "int" + required: true + - variable: "yarn_minimum_allocation" + description: "yarn.scheduler.minimum-allocation-mb value" + label: "Yarn Minimum Memory allocation" + default: "1024" + type: "int" + required: true + - variable: "mapreduce_map_memory" + description: "mapreduce.map.memory.mb" + label: "Mapreduce Map Memory" + default: "1024" + type: "int" + required: true + - variable: "mapreduce_reduce_memory" + description: "mapreduce.reduce.memory.mb" + label: "Mapreduce Reduce Memory" + default: "2048" + type: "int" + required: true + - variable: "mapreduce_map_java_opts" + description: "mapreduce.map.java.opts" + label: "Mapreduce Map Java Opts" + default: "-Xmx768m" + type: "string" + required: true + - variable: "mapreduce_reduce_java_opts" + description: "mapreduce.reduce.java.opts" + label: "Mapreduce Reduce Java Opts" + default: "-Xmx1536m" + type: "string" + required: true +namenode-primary: + scale: 1 + metadata: &hdfs_metadata + core-site: + hadoop.proxyuser.hue.hosts: "*" + hadoop.proxyuser.hue.groups: "*" + hdfs-site: + dfs.replication: "${dfs_replication}" + dfs.webhdfs.enabled: "true" +datanode: + scale: 1 + metadata: *hdfs_metadata +yarn-resourcemanager: + scale: 1 + metadata: &yarn_metadata + core-site: + hadoop.proxyuser.hue.hosts: "*" + hadoop.proxyuser.hue.groups: "*" + hdfs-site: + dfs-replication: "${dfs_replication}" + dfs.webhdfs.enabled: "true" + yarn-site: + yarn.nodemanager.resource.cpu-vcores: "${yarn_node_manager_cpu_vcores}" + yarn.nodemanager.resource.memory-mb: "${yarn_node_manager_resource_memory}" + yarn.scheduler.minimum-allocation-mb: "${yarn_minimum_allocation}" + yarn.nodemanager.aux-services: "mapreduce_shuffle" + yarn.log-aggregation-enable: "true" + yarn.log-aggregation.retain-seconds: 10800 + yarn.log-aggregation.retain-check-interval-seconds: 3600 + mapred-site: + mapreduce.map.memory.mb: "${mapreduce_map_memory}" + mapreduce.reduce.memory.mb: "${mapreduce_reduce_memory}" + mapreduce.map.java.opts: "${mapreduce_map_java_opts}" + mapreduce.reduce.java.opts: "${mapreduce_reduce_java_opts}" +jobhistory-server: + scale: 1 + metadata: *yarn_metadata +yarn-nodemanager: + scale: 1 + metadata: *yarn_metadata diff --git a/templates/hadoop/catalogIcon-hadoop.svg b/templates/hadoop/catalogIcon-hadoop.svg new file mode 100644 index 0000000..0979794 --- /dev/null +++ b/templates/hadoop/catalogIcon-hadoop.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/templates/hadoop/config.yml b/templates/hadoop/config.yml new file mode 100644 index 0000000..270f772 --- /dev/null +++ b/templates/hadoop/config.yml @@ -0,0 +1,5 @@ +name: Hadoop + Yarn +description: | + (Experimental - Requires 2+ hosts) Hadoop + Yarn big data tools +version: 2.7.1-rancher1 +category: Big Data