一、硬件准备
基础配置:
操作系统 64位
CPU (英特尔)Intel(R) I3处理器
内存 8.00 GB ( 1600 MHz)
硬盘剩余空间 50G
流畅配置:
操作系统 64位
CPU (英特尔)Intel(R) I5处理器或以上配置
内存 16.00 GB ( 1600 MHz)
硬盘剩余情况 100G
二、软件环境准备
虚拟机 VMWare
操作系统 RedHat6.6 64位
JDK jdk-7u25-linux-i586.tar.gz
远程连接 CRT
hadoop生态系统 hadoop-2.6.0-cdh5.5.2.tar.gz
三、安装步骤
第一步应该是同步时间,同步后得重启,否则的话会运行mr的时候会卡主不动,不同步运行mr的时候会报错
检查防火墙和selinux是否关闭
1.vim /etc/hosts (3节点都修改,可以把初始内容都删掉后添加如下内容)
192.168.121.132 h202
192.168.121.131 h201
192.168.121.130 h200
reboot (3节点都重启)
或者修改 vim /etc/hostname
2. 3台机器 创建hadoop 用户
useradd hadoop
passwd 123456
3.安装JDK (3台都安装)
root@h202:/usr# tar -zxvf jdk-7u25-linux-i586.tar.gz
root@h201:/usr# scp -r jdk1.7.0_25/ root@h201:/usr/
root@h200:/usr# scp -r jdk1.7.0_25/ root@h200:/usr/
root@h202:/usr# vi /etc/profile(三台都改)
1 export JAVA_HOME=/usr/jdk1.7.0_25 2 export JRE_HOME=${JAVA_HOME}/jre 3 export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib 4 export PATH=${JAVA_HOME}/bin:$PATH
root@h202:/usr# source /etc/profile (使环境变量生效)
4.安装ssh 证书
root@h202:/usr# su - hadoop (3个节点必须得先切换到hadoop用户)
hadoop@h202:~$ ssh-keygen -t rsa
hadoop@h201:~$ ssh-keygen -t rsa
hadoop@h200:~$ ssh-keygen -t rsa
hadoop@h202:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h202
hadoop@h202:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h201
hadoop@h202:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h200
hadoop@h201:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h202
hadoop@h201:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h201
hadoop@h201:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h200
hadoop@h200:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h202
hadoop@h200:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h201
hadoop@h200:~$ ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub h200
5.安装hadoop-2.6.0-cdh5.5.2
hadoop@h202:~$ tar -zxvf hadoop-2.6.0-cdh5.5.2.tar.gz
hadoop@h202:~$ vi .bash_profile
1 export JAVA_HOME=/usr/jdk1.7.0_25 2 export JRE_HOME=${JAVA_HOME}/jre 3 export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib 4 export PATH=${JAVA_HOME}/bin:$PATH 5 6 HADOOP_HOME=/home/hadoop/hadoop-2.6.0-cdh5.5.2 7 HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop 8 PATH=$HADOOP_HOME/bin:$PATH 9 export HADOOP_HOME HADOOP_CONF_DIR PATH
hadoop@h202:~$ source .profile
6.修改core-site.xml
hadoop@h202:~$ cd hadoop-2.6.0-cdh5.5.2/etc/hadoop
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ vi core-site.xml
1 <property> 2 <name>fs.defaultFS</name> 3 <value>hdfs://h202:9000</value> 4 <description>NameNode URI.</description> 5 </property> 6 7 <property> 8 <name>io.file.buffer.size</name> 9 <value>131072</value> 10 <description>Size of read/write buffer used inSequenceFiles.</description> 11 </property>
7.编辑hdfs-site.xml
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ cd /home/hadoop/hadoop-2.6.0-cdh5.5.2
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ mkdir -p dfs/name
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ mkdir -p dfs/data
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ mkdir -p dfs/namesecondary
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ cd etc/hadoop
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ vi hdfs-site.xml
1 <property> 2 <name>dfs.namenode.secondary.http-address</name> 3 <value>h202:50090</value> 4 <description>The secondary namenode http server address andport.</description> 5 </property> 6 7 <property> 8 <name>dfs.namenode.name.dir</name> 9 <value>file:///home/hadoop/hadoop-2.6.0-cdh5.5.2/dfs/name</value> 10 <description>Path on the local filesystem where the NameNodestores the namespace and transactions logs persistently.</description> 11 </property> 12 13 <property> 14 <name>dfs.datanode.data.dir</name> 15 <value>file:///home/hadoop/hadoop-2.6.0-cdh5.5.2/dfs/data</value> 16 <description>Comma separated list of paths on the local filesystemof a DataNode where it should store its blocks.</description> 17 </property> 18 19 <property> 20 <name>dfs.namenode.checkpoint.dir</name> 21 <value>file:///home/hadoop/hadoop-2.6.0-cdh5.5.2/dfs/namesecondary</value> 22 <description>Determines where on the local filesystem the DFSsecondary name node should store the temporary images to merge. If this is acomma-delimited list of directories then the image is replicated in all of thedirectories for redundancy.</description> 23 </property> 24 25 <property> 26 <name>dfs.replication</name> 27 <value>2</value> 28 </property>
8.编辑mapred-site.xml
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ cp mapred-site.xml.template mapred-site.xml
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ vi mapred-site.xml
1 <property> 2 <name>mapreduce.framework.name</name> 3 <value>yarn</value> 4 <description>Theruntime framework for executing MapReduce jobs. Can be one of local, classic oryarn.</description> 5 </property> 6 7 <property> 8 <name>mapreduce.jobhistory.address</name> 9 <value>h202:10020</value> 10 <description>MapReduce JobHistoryServer IPC host:port</description> 11 </property> 12 13 <property> 14 <name>mapreduce.jobhistory.webapp.address</name> 15 <value>h202:19888</value> 16 <description>MapReduce JobHistoryServer Web UI host:port</description> 17 </property>
*****
属性”mapreduce.framework.name“表示执行mapreduce任务所使用的运行框架,默认为local,需要将其改为”yarn”
*****
9.编辑yarn-site.xml
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ vi yarn-site.xml
1 <property> 2 <name>yarn.resourcemanager.hostname</name> 3 <value>h202</value> 4 <description>The hostname of theRM.</description> 5 </property> 6 7 <property> 8 <name>yarn.nodemanager.aux-services</name> 9 <value>mapreduce_shuffle</value> 10 <description>Shuffle service that needs to be set for Map Reduceapplications.</description> 11 </property>
10.
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ vi hadoop-env.sh
export JAVA_HOME=/usr/jdk1.7.0_25
11.
[hadoop@h202 hadoop]$ vi slaves
h201
h200
12.
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2/etc/hadoop$ cd
hadoop@h202:~$ scp -r ./hadoop-2.6.0-cdh5.5.2/ hadoop@h201:/home/hadoop/
hadoop@h202:~$ scp -r ./hadoop-2.6.0-cdh5.5.2/ hadoop@h200:/home/hadoop/
========================================================
WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
报上面的警告:
去网址http://dl.bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64-2.6.0.tar下载hadoop-native-64-2.6.0.tar(这个包是64位的,所以只对64位的Linux操作系统好使,32位的不行)
hadoop@h202:~$ tar -xvf hadoop-native-64-2.6.0.tar -C hadoop-2.6.0-cdh5.5.2/lib/native/
第二种方法:直接在log4j日志中去除告警信息
在hadoop-2.6.0-cdh5.5.2/etc/hadoop/log4j.properties文件中添加
log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
验证:
开启
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ bin/hdfs namenode -format //namenode 格式化
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ sbin/start-all.sh
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ jps
2260 ResourceManager
1959 NameNode
2121 SecondaryNameNode
2559 Jps
hadoop@h201:~/hadoop-2.6.0-cdh5.5.2$ jps
1889 NodeManager
2038 Jps
1788 DataNode
hadoop@h200:~/hadoop-2.6.0-cdh5.5.2$ jps
1889 NodeManager
2038 Jps
1788 DataNode
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ bin/hadoop fs -ls /
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ bin/hadoop fs -mkdir /aaa
关闭
hadoop@h202:~/hadoop-2.6.0-cdh5.5.2$ sbin/stop-all.sh
hadoop启动过程:NameNode Secondary NameNode DataNode
单节点开启:bin/hadoop-daemon.sh start namenode
单节点关闭:bin/hadoop-daemon.sh stop namenode