CentOS7 下 Hadoop 单节点部署

准备工作

hadoop下载:(hadoop2.7.5)

http://archive.apache.org/dist/hadoop/core/

关闭防火墙:

# 停止防火墙
systemctl stop firewalld

# 关闭防火墙开机自启动
systemctl disable firewalld

修改 hosts 文件,让 hadoop 对应本机 IP 地址 (非 127.0.0.1)

# vim /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

10.0.0.19 vsr119
10.1.0.19 sr119
10.1.0.31  sr131
10.0.0.29 vsr129
10.1.0.29 sr129

安装JDK

# 解压(1.8以上)
tar xf /opt/jdk-8u202-linux-x64.tar.gz

# 配置环境变量(切换到自己的用户)
$ vim .bashrc
# JAVA_HOME
export JAVA_HOME=/home/jiangchun/jdk1.8
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH

# 刷新环境变量
source .bashrc

# 验证
$ java -version
java version "1.8.0_161"
Java(TM) SE Runtime Environment (build 1.8.0_161-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode)

安装Hadoop

# 解压
tar xf hadoop-2.7.5.tar.gz

# 配置环境变量
vim .bashrc
export HADOOP_HOME=/home/jiangchun/hadoop-2.7.5
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin

# 刷新环境变量
source .bashrc

# 验证
 $ hadoop version
Hadoop 2.7.5
Subversion https://shv@git-wip-us.apache.org/repos/asf/hadoop.git -r 18065c2b6806ed4aa6a3187d77cbe21bb3dba075
Compiled by kshvachk on 2017-12-16T01:06Z
Compiled with protoc 2.5.0
From source with checksum 9f118f95f47043332d51891e37f736e9
This command was run using /home/jiangchun/hadoop-2.7.5/share/hadoop/common/hadoop-common-2.7.5.jar

配置Hadoop

一、配置 HDFS

hadoop-env.sh

# vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/hadoop-env.sh

# 配置 JDK 路径
# The java implementation to use.
export JAVA_HOME=/home/jiangchun/jdk1.8

core-site.xml

# fs.defaultFS:默认的文件系统,NN会在这个节点(sr131)启动
# vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/core-site.xml

<configuration>
   <property>
     <name>fs.defaultFS</name>
     <value>hdfs://sr131:9000</value>
  </property>

</configuration>

hdfs-site.xml

<configuration>

    <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:/home/jiangchun/hadoop-2.7.5/dfs/name</value>
    </property>
    <property>
            <name>dfs.datanode.data.dir</name>
            <value>/mnt/DP_disk1/tpcds/dfs,/mnt/DP_disk2/tpcds/dfs,/mnt/DP_disk3/tpcds/dfs,/mnt/DP_disk4/tpcds/dfs,/mnt/DP_disk5/tpcds/dfs,/mnt/DP_disk6/tpcds/dfs,/mnt/DP_disk7/tpcds/dfs,/mnt/DP_disk8/tpcds/dfs</value>
    </property>
    <property>
      <name>dfs.permissions</name>
      <value>false</value>
    </property>
    <property>
      <name>dfs.replication</name>
      <value>1</value>
    </property>

    <property>
      <name>dfs.datanode.socket.write.timeout</name>
      <value>600000</value>
    </property>
    <!--
    <property>
      <name>dfs.socket.timeout</name>
      <value>0</value>
    </property>
    -->
    <property>
      <name>dfs.datanode.max.transfer.threads</name>
      <value>4096000</value>
    </property>
    <property>
      <name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name>
      <value>1000</value>
    </property>
    <property>
      <name>dfs.datanode.handler.count</name>
      <value>40</value>
    </property>
    <property>
      <name>dfs.client.socket-timeout</name>
      <value>300000</value>
    </property>
    <property>
      <name>dfs.datanode.max.xcievers</name>
      <value>8192</value>
    </property>
</configuration>

Slaves

# vim slaves    # 写本主机在hosts文件中注册的名称
sr131    

挂载磁盘

# vim mount.sh
mount -t ext4 -o noatime,nodiratime /dev/sdi1 /mnt/DP_disk1
mount -t ext4 -o noatime,nodiratime /dev/sdb1 /mnt/DP_disk2
mount -t ext4 -o noatime,nodiratime /dev/sdc1 /mnt/DP_disk3
mount -t ext4 -o noatime,nodiratime /dev/sdd1 /mnt/DP_disk4
mount -t ext4 -o noatime,nodiratime /dev/sde1 /mnt/DP_disk5
mount -t ext4 -o noatime,nodiratime /dev/sdf1 /mnt/DP_disk6
mount -t ext4 -o noatime,nodiratime /dev/sdg1 /mnt/DP_disk7
mount -t ext4 -o noatime,nodiratime /dev/sdh1 /mnt/DP_disk8

mount -o dax  /dev/pmem0    /mnt/pmem0
mount -o dax  /dev/pmem1    /mnt/pmem1

启动hdfs

# 第一次使用需要先格式化一次。之前若格式化过请先停止进程,然后删除文件再执行格式化操作
hdfs namenode -format

# 启动 namenode
hadoop-daemon.sh start namenode

# 启动 datanode
hadoop-daemon.sh start datanode

# 验证,查看 jvm 进程
jps

# 84609 Jps
# 84242 NameNode
# 84471 DataNode

浏览器访问 CentOS 的 IP 地址加端口号 (默认50070) 即可看到 web 端

二、配置 YARN

yarn-env.sh

# vim /home/jiangchun/hadoop-2.7.5/etc/hadoop/yarn-env.sh
默认

yarn-site.xml

# vim yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties -->

   <property>
      <name>yarn.resourcemanager.hostname</name>
      <value>sr131</value>
   </property>
   <property>
      <name>yarn.resourcemanager.scheduler.class</name>
      <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
   </property>
   <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
   </property>
   <property>
      <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
      <value>org.apache.hadoop.mapred.ShuffleHandler</value>
   </property>
   <property>
           <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
           <value>99</value>
   </property>
   <property>
      <name>yarn.nodemanager.resource.memory-mb</name>
      <!--<value>786432</value>-->
      <value>1715472</value>
   </property>
   <property>
      <name>yarn.scheduler.maximum-allocation-vcores</name>
      <!--<value>120</value>-->
      <value>96</value>
   </property>
   <property>
      <name>yarn.scheduler.minimum-allocation-vcores</name>
      <value>1</value>
   </property>
   <property>
      <name>yarn.nodemanager.local-dirs</name>
     <value>/mnt/DP_disk1/tpcds/yarn,/mnt/DP_disk2/tpcds/yarn,/mnt/DP_disk3/tpcds/yarn,/mnt/DP_disk4/tpcds/yarn,/mnt/DP_disk5/tpcds/yarn,/mnt/DP_disk6/tpcds/yarn,/mnt/DP_disk7/tpcds/yarn,/mnt/DP_disk8/tpcds/yarn</value>
   </property>
   <property>
      <name>yarn.log.aggregation.enable</name>
      <value>fasle</value>
   </property>

   <property>
      <name>yarn.nodemanager.log.retain-seconds</name>
      <value>25920000</value>
   </property>

   <property>
      <name>yarn.log.server.url</name>
      <value>http://sr131:19888/jobhistory/logs/</value>
   </property>

   <property>
      <name>yarn.nodemanager.pmem-check-enabled</name>
      <value>false</value>
   </property>

   <property>
      <name>yarn.nodemanager.vmem-check-enabled</name>
      <value>false</value>
   </property>

</configuration>

启动 yarn,需保证 hdfs 已启动

# 启动 resourcemanager
yarn-daemon.sh start resourcemanager

# 启动 nodemanager
yarn-daemon.sh start nodemanager

# 查看 JVM 进程
jps

# 1604 DataNode
# 1877 ResourceManager
# 3223 Jps
# 1468 NameNode
# 2172 NodeManager

浏览器访问 CentOS 的 IP 地址加端口号 (默认8088) 即可看到 web 端

三、配置 MapReduce

mapred-env.sh

# vim mapred-env.sh

默认
如果不行,修改一下JDK路径
export JAVA_HOME=/home/jiangchun/jdk1.8

mapred-site.xml

# 复制一份
cp /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml.template /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml

# 编辑
vim /opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
<configuration>
    <!-- 指定MR运行在YARN上 -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
原文地址:https://www.cnblogs.com/baicai37/p/13684280.html