3 hadoop安装Standalone模式和伪分布式模式

Standalone模式
默认模式，即默认配置即是standalone模式
装好hadoop-client，会将依赖的包安装好
yum install hadoop-client
配置文件
各个配置文件在/etc/hadoop/conf下
特别注意：Standalone模式下，HDFS是用本地文件系统来模拟的
验证(用自带的grep的mr示例)：
mkdir input
cp /etc/hadoop/conf/*.xml input
hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.12.1.jar grep input output 'dfs[a-z.]+'
cat output/*
---

[root@hadoop1 ~]# yum install hadoop-client
[root@hadoop1 ~]# cd /usr/lib/
[root@hadoop1 lib]# ll
drwxr-xr-x  10 root root  4096 May 13 10:37 hadoop
drwxr-xr-x  10 root root  4096 May 13 10:37 hadoop-0.20-mapreduce
drwxr-xr-x   7 root root  4096 May 13 10:37 hadoop-hdfs
drwxr-xr-x   6 root root 12288 May 13 10:37 hadoop-mapreduce
drwxr-xr-x   7 root root  4096 May 13 10:37 hadoop-yarn
[root@hadoop1 lib]# cd /etc/hadoop
[root@hadoop1 hadoop]# ll
total 8
lrwxrwxrwx 1 root root   29 May 13 10:37 conf -> /etc/alternatives/hadoop-conf
lrwxrwxrwx 1 root root   10 May 13 10:37 conf.dist -> conf.empty
drwxr-xr-x 2 root root 4096 May 13 10:37 conf.empty
drwxr-xr-x 2 root root 4096 May 13 10:37 conf.impala
[root@hadoop1 hadoop]# update-alternatives --display hadoop-conf
hadoop-conf - status is auto.
 link currently points to /etc/hadoop/conf.empty
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
Current `best' version is /etc/hadoop/conf.empty.
[root@hadoop1 hadoop]# cd conf.empty/
[root@hadoop1 conf.empty]# ll
total 104
-rw-r--r-- 1 root root  4436 Nov 22 14:09 capacity-scheduler.xml
-rw-r--r-- 1 root root  1335 Nov 22 14:09 configuration.xsl
-rw-r--r-- 1 root root   318 Nov 22 14:09 container-executor.cfg
-rw-r--r-- 1 root root   904 Nov 22 14:09 core-site.xml
-rw-r--r-- 1 root root  3032 Nov 22 14:09 fair-scheduler.xml
-rw-r--r-- 1 root root  2598 Nov 22 14:09 hadoop-metrics2.properties
-rw-r--r-- 1 root root  2490 Nov 22 14:09 hadoop-metrics.properties
-rw-r--r-- 1 root root  9683 Nov 22 14:09 hadoop-policy.xml
-rw-r--r-- 1 root root  1039 Nov 22 14:09 hdfs-site.xml
-rw-r--r-- 1 root root 12601 Nov 22 14:09 log4j.properties
-rw-r--r-- 1 root root  4113 Nov 22 14:09 mapred-queues.xml.template
-rw-r--r-- 1 root root   904 Nov 22 14:09 mapred-site.xml
-rw-r--r-- 1 root root   758 Nov 22 14:09 mapred-site.xml.template
-rw-r--r-- 1 root root    10 Nov 22 14:09 slaves
-rw-r--r-- 1 root root  2316 Nov 22 14:09 ssl-client.xml.example
-rw-r--r-- 1 root root  2697 Nov 22 14:09 ssl-server.xml.example
-rw-r--r-- 1 root root  4567 Nov 22 14:09 yarn-env.sh
-rw-r--r-- 1 root root  2296 Nov 22 14:09 yarn-site.xml
[root@hadoop1 tmp]# mkdir input
[root@hadoop1 tmp]# cd input/
[root@hadoop1 input]# cp /etc/hadoop/conf/*.xml .
[root@hadoop1 input]# cd ..
[root@hadoop1 tmp]# hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar grep input output "dfs[a-z.]+"
19/05/17 10:53:20 INFO mapred.Task: Task attempt_local1615788708_0002_r_000000_0 is allowed to commit now
19/05/17 10:53:20 INFO output.FileOutputCommitter: Saved output of task 'attempt_local1615788708_0002_r_000000_0' to file:/tmp/output/_temporary/0/task_local1615788708_0002_r_000000
19/05/17 10:53:20 INFO mapred.LocalJobRunner: reduce > reduce
19/05/17 10:53:20 INFO mapred.Task: Task 'attempt_local1615788708_0002_r_000000_0' done.
19/05/17 10:53:20 INFO mapred.LocalJobRunner: Finishing task: attempt_local1615788708_0002_r_000000_0
19/05/17 10:53:20 INFO mapred.LocalJobRunner: reduce task executor complete.
19/05/17 10:53:21 INFO mapreduce.Job: Job job_local1615788708_0002 running in uber mode : false
19/05/17 10:53:21 INFO mapreduce.Job:  map 100% reduce 100%
19/05/17 10:53:21 INFO mapreduce.Job: Job job_local1615788708_0002 completed successfully
19/05/17 10:53:21 INFO mapreduce.Job: Counters: 30
    File System Counters
        FILE: Number of bytes read=55192
        FILE: Number of bytes written=1338184
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=2
        Map output records=2
        Map output bytes=47
        Map output materialized bytes=57
        Input split bytes=108
        Combine input records=0
        Combine output records=0
        Reduce input groups=1
        Reduce shuffle bytes=57
        Reduce input records=2
        Reduce output records=2
        Spilled Records=4
        Shuffled Maps =1
        Failed Shuffles=0
        Merged Map outputs=1
        GC time elapsed (ms)=38
        Total committed heap usage (bytes)=270680064
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=161
    File Output Format Counters 
        Bytes Written=47
[root@hadoop1 tmp]# ll -l
total 88
drwxr-xr-x  3 root root  4096 May 17 10:52 hadoop-root
drwxr-xr-x  2 root root  4096 May 17 10:53 hsperfdata_root
drwxr-xr-x  2 root root  4096 May 17 10:50 input
drwx------. 2 root root  4096 Apr 15 17:51 keyring-sO07mP
drwx------  2 gdm  gdm   4096 Apr 26 17:12 orbit-gdm
drwxr-xr-x  2 root root  4096 May 17 10:53 output
drwx------. 2 root root  4096 Apr 15 17:51 pulse-zf8TjJesGYwd
drwx------  2 gdm  gdm   4096 Apr 26 17:12 pulse-zSmklD09U96Z
-rw-------  1 root root 55359 May 13 10:34 yum_save_tx-2019-05-13-10-34HPxvyy.yumtx
[root@hadoop1 tmp]# cd output/
[root@hadoop1 output]# ll
total 4
-rw-r--r-- 1 root root 35 May 17 10:53 part-r-00000
-rw-r--r-- 1 root root  0 May 17 10:53 _SUCCESS
[root@hadoop1 output]# cat part-r-00000 
1    dfsadmin
1    dfs.namenode.name.dir

2 伪分布式环境
各个角色对应的rpm
HDFS NameNode: yum install hadoop-hdfs-namenode
HDFS SecondaryNameNode: yum install hadoop-hdfs-secondarynamenode
HDFS DataNode: yum install hadoop-hdfs-datanode
YARN ResourceManager: yum install hadoop-yarn-resourcemanager
YARN NodeManager: yum install hadoop-yarn-nodemanager
MapReduce: yum install hadoop-mapreduce
配置文件
各个配置文件在/etc/hadoop/conf下
特别注意：由于单节点，HDFS block replica要设为1（默认3）
配置文件
core-site.xml: 整个集群最基础的配置文件
hdfs-site.xml: hdfs的配置文件
mapred-site.xml: MapReduce的配置文件
yarn-site.xml：yarn的配置文件

https://hadoop.apache.org/docs/r2.7.7/hadoop-project-dist/hadoop-common/SingleCluster.html
Pseudo-Distributed Operation
[root@hadoop1 ~]# yum install hadoop-hdfs-namenode
yum install hadoop-hdfs-secondarynamenode
yum install hadoop-hdfs-datanode
yum install hadoop-yarn-resourcemanager
yum install hadoop-yarn-nodemanager
yum install hadoop-mapreduce
[root@hadoop1 ~]# service hadoop-hdfs-namenode status
Hadoop namenode is not running                             [FAILED]
[root@hadoop1 ~]# ll /etc/init
init/      init.conf  init.d/    inittab    
[root@hadoop1 ~]# ll /etc/init.d/hadoop-*
-rwxr-xr-x 1 root root 4617 Nov 22 14:10 /etc/init.d/hadoop-hdfs-datanode
-rwxr-xr-x 1 root root 5381 Nov 22 14:10 /etc/init.d/hadoop-hdfs-namenode
-rwxr-xr-x 1 root root 4468 Nov 22 14:10 /etc/init.d/hadoop-hdfs-secondarynamenode
-rwxr-xr-x 1 root root 4487 Nov 22 14:10 /etc/init.d/hadoop-yarn-nodemanager
-rwxr-xr-x 1 root root 4447 Nov 22 14:10 /etc/init.d/hadoop-yarn-resourcemanager
[root@hadoop1 hadoop]# cp -R conf.empty conf.pseudo
[root@hadoop1 hadoop]# cd conf.pseudo/
[root@hadoop1 conf.pseudo]# vim core-site.xml 
[root@hadoop1 conf.pseudo]# vim hdfs-site.xml
[root@hadoop1 conf.pseudo]# vim mapred-site.xml
[root@hadoop1 conf.pseudo]# vim yarn-site.xml
--core-site.xml
<configuration>
   <property>
       <name>fs.defaultFS</name>
       <value>hdfs://localhost:9000</value>
   </property>
</configuration>
--hdfs-site.xml
<configuration>
  <property>
     <name>dfs.replication</name>
     <value>1</value>
  </property>
</configuration>
--mapred-site.xml
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
--yarn-site.xml
<configuration>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
</configuration>
[root@hadoop1 hadoop]# update-alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.pseudo 50
[root@hadoop1 hadoop]# update-alternatives --display hadoop-conf
hadoop-conf - status is auto.
 link currently points to /etc/hadoop/conf.pseudo
/etc/hadoop/conf.empty - priority 10
/etc/hadoop/conf.impala - priority 5
/etc/hadoop/conf.pseudo - priority 50
Current `best' version is /etc/hadoop/conf.pseudo.
##50 优先级
[root@hadoop1 hadoop]# ll /etc/hadoop/conf
lrwxrwxrwx 1 root root 29 May 17 11:18 /etc/hadoop/conf -> /etc/alternatives/hadoop-conf
[root@hadoop1 hadoop]# ll /etc/alternatives/hadoop-conf/

初始化HDFS
NameNode要format才能用
hadoop namenode -format
启动服务
service hadoop-hdfs-namenode start
service hadoop-hdfs-datanode start
service hadoop-yarn-resourcemanager start
service hadoop-yarn-nodemanager start
验证服务
WebUI:
NameNode: http://{hostname}:50070
ResourceManager: http://{hostname}:8088
命令行操作：
HDFS：hadoop fs -mkdir /user && hadoop fs -mkdir /user/{username}
MapReduce: hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar
[root@hadoop1 hadoop]# hadoop namenode -h

[root@hadoop1 hadoop]# su hdfs
bash-4.1$ id
uid=494(hdfs) gid=491(hdfs) groups=491(hdfs),501(hadoop)
bash-4.1$ hadoop namenode -format
19/05/17 11:24:40 INFO namenode.FSImageFormatProtobuf: Image file /tmp/hadoop-hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 320 bytes saved in 0 seconds .
19/05/17 11:24:40 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
19/05/17 11:24:40 INFO util.ExitUtil: Exiting with status 0
19/05/17 11:24:40 INFO namenode.NameNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoop1/192.168.19.69
************************************************************/
bash-4.1$ cd /tmp/hadoop-hdfs/
bash-4.1$ ls -l
total 4
drwxr-xr-x 3 hdfs hdfs 4096 May 17 11:24 dfs
bash-4.1$ cd dfs/name/current/
bash-4.1$ ls -l
total 16
-rw-r--r-- 1 hdfs hdfs 320 May 17 11:24 fsimage_0000000000000000000
-rw-r--r-- 1 hdfs hdfs  62 May 17 11:24 fsimage_0000000000000000000.md5
-rw-r--r-- 1 hdfs hdfs   2 May 17 11:24 seen_txid
-rw-r--r-- 1 hdfs hdfs 201 May 17 11:24 VERSION
启动报错
[hadoop@hadoop1 ~]$ sudo service hadoop-hdfs-namenode restart
Error: JAVA_HOME is not set and could not be found.
Failed to stop Hadoop namenode. Return value: 1.           [FAILED]
Error: JAVA_HOME is not set and could not be found.
Failed to start Hadoop namenode. Return value: 3           [FAILED]
[root@hadoop1 hadoop]# java -version
java version "1.8.0_191"
Java(TM) SE Runtime Environment (build 1.8.0_191-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode)
[hadoop@hadoop1 jdk]$ export
declare -x HOSTNAME="hadoop1"
declare -x JAVA_HOME="/opt/jdk"
declare -x LANG="en_US.UTF-8"
[root@hadoop1 conf.pseudo]# source hdoop-env.sh
[root@hadoop1 conf.pseudo]# vim /etc/default/hadoop
export JAVA_HOME=/opt/jdk
[root@hadoop1 conf.pseudo]# source /etc/default/hadoop
[root@hadoop1 conf.pseudo]# service hadoop-hdfs-namenode restart
no namenode to stop
Stopped Hadoop namenode:                                   [  OK  ]
starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.out
Started Hadoop namenode:                                   [  OK  ]
日志
[root@hadoop1 conf.pseudo]# tail -n 20  /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.log 
2019-05-17 14:19:40,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Rescanning after 30000 milliseconds
2019-05-17 14:19:40,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Scanned 0 directive(s) and 0 block(s) in 0 millisecond(s).
2019-05-17 14:20:10,920 INFO org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor: Rescanning after 30001 milliseconds
[root@hadoop1 conf.pseudo]# jps
16742 NameNode
17047 Jps
分别启动其他服务
[root@hadoop1 conf.pseudo]# service hadoop-hdfs-datanode start
starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-hadoop1.out
Started Hadoop datanode (hadoop-hdfs-datanode):            [  OK  ]
[root@hadoop1 conf.pseudo]# service hadoop-yarn-resourcemanager start
starting resourcemanager, logging to /var/log/hadoop-yarn/yarn-yarn-resourcemanager-hadoop1.out
Started Hadoop resourcemanager:                            [  OK  ]
[root@hadoop1 conf.pseudo]# service hadoop-yarn-nodemanager start
starting nodemanager, logging to /var/log/hadoop-yarn/yarn-yarn-nodemanager-hadoop1.out
Started Hadoop nodemanager:                                [  OK  ]
[root@hadoop1 conf.pseudo]# 
[root@hadoop1 ~]# 
[root@hadoop1 ~]# jps
17458 NodeManager
16742 NameNode
17192 ResourceManager
17081 DataNode
17583 Jps
web访问
http://*:50070/dfshealth.html#tab-overview
[root@hadoop1 ~]# w3m http://*:50070
[root@hadoop1 ~]# w3m http://*:8088

[root@hadoop1 ~]# sudo -u hdfs hadoop fs -ls /
Error: JAVA_HOME is not set and could not be found.
[root@hadoop1 ~]# cat >> /etc/default/hadoop-hdfs-datanode << EOF
export JAVA_HOME=/opt/jdk
EOF
[root@hadoop1 ~]# cat >> /etc/default/hadoop-hdfs-namenode << EOF
export JAVA_HOME=/opt/jdk
EOF

cat >> /etc/default/hadoop-0.20-mapreduce << EOF
export JAVA_HOME=/opt/jdk
EOF

cat >> /etc/default/hadoop-hdfs-secondarynamenode << EOF
export JAVA_HOME=/opt/jdk
EOF

cat >> /etc/default/hadoop-yarn-resourcemanager << EOF
export JAVA_HOME=/opt/jdk
EOF
cat >> /etc/default/hadoop-yarn-nodemanager << EOF
export JAVA_HOME=/opt/jdk
EOF

[root@hadoop1 ~]# source /etc/default/hadoop-hdfs-namenode
[root@hadoop1 ~]# source /etc/default/hadoop-hdfs-datanode 
[root@hadoop1 ~]# source /etc/default/hadoop-hdfs-secondarynamenode 
[root@hadoop1 ~]# source /etc/default/hadoop-yarn-resourcemanager 
[root@hadoop1 ~]# source /etc/default/hadoop-yarn-nodemanager 
[root@hadoop1 ~]# source /etc/default/hadoop-0.20-mapreduce 
[root@hadoop1 ~]# sudo -u hdfs hadoop fs -ls /
Error: JAVA_HOME is not set and could not be found.
[root@hadoop1 ~]# hadoop fs -ls /
[root@hadoop1 ~]# hadoop fs -mkdir -p /user/hadoop
mkdir: Permission denied: user=root, access=WRITE, inode="/":hdfs:supergroup:drwxr-xr-x
[root@hadoop1 ~]# su hdfs
bash-4.1$ hadoop fs -ls /
bash-4.1$ hadoop fs -mkdir -p /user/hadoop
bash-4.1$ hadoop fs -chown hadoop:hadoop /user/hadoop
-bash-4.1$ hadoop fs -put /etc/hadoop/conf/*.xml /user/hadoop/input/
bash-4.1$ hadoop fs -ls /user/hadoop/input
Found 7 items
-rw-r--r--   1 hdfs hadoop       4436 2019-05-17 15:03 /user/hadoop/input/capacity-scheduler.xml
-rw-r--r--   1 hdfs hadoop       1010 2019-05-17 15:03 /user/hadoop/input/core-site.xml
-rw-r--r--   1 hdfs hadoop       3032 2019-05-17 15:03 /user/hadoop/input/fair-scheduler.xml
-rw-r--r--   1 hdfs hadoop       9683 2019-05-17 15:03 /user/hadoop/input/hadoop-policy.xml
-rw-r--r--   1 hdfs hadoop        987 2019-05-17 15:03 /user/hadoop/input/hdfs-site.xml
-rw-r--r--   1 hdfs hadoop       1009 2019-05-17 15:03 /user/hadoop/input/mapred-site.xml
-rw-r--r--   1 hdfs hadoop       1015 2019-05-17 15:03 /user/hadoop/input/yarn-site.xml

bash-4.1$  hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar grep /user/hadoop/input /user/hadoop/output "dfs[a-z.]+"
19/05/17 15:05:44 INFO mapred.Task: Task:attempt_local1573914124_0002_r_000000_0 is done. And is in the process of commiting
19/05/17 15:05:44 INFO mapred.LocalJobRunner: 
19/05/17 15:05:44 INFO mapred.Task: Task attempt_local1573914124_0002_r_000000_0 is allowed to commit now
19/05/17 15:05:44 INFO mapred.FileOutputCommitter: Saved output of task 'attempt_local1573914124_0002_r_000000_0' to hdfs://localhost:9000/user/hadoop/output
19/05/17 15:05:44 INFO mapred.LocalJobRunner: reduce > reduce
19/05/17 15:05:44 INFO mapred.Task: Task 'attempt_local1573914124_0002_r_000000_0' done.
19/05/17 15:05:45 INFO mapred.JobClient:  map 100% reduce 100%
19/05/17 15:05:45 INFO mapred.JobClient: Job complete: job_local1573914124_0002
19/05/17 15:05:45 INFO mapred.JobClient: Counters: 26
19/05/17 15:05:45 INFO mapred.JobClient:   File System Counters
19/05/17 15:05:45 INFO mapred.JobClient:     FILE: Number of bytes read=581599
19/05/17 15:05:45 INFO mapred.JobClient:     FILE: Number of bytes written=1254598
19/05/17 15:05:45 INFO mapred.JobClient:     FILE: Number of read operations=0
19/05/17 15:05:45 INFO mapred.JobClient:     FILE: Number of large read operations=0
19/05/17 15:05:45 INFO mapred.JobClient:     FILE: Number of write operations=0
19/05/17 15:05:45 INFO mapred.JobClient:     HDFS: Number of bytes read=42630
19/05/17 15:05:45 INFO mapred.JobClient:     HDFS: Number of bytes written=315
19/05/17 15:05:45 INFO mapred.JobClient:     HDFS: Number of read operations=80
19/05/17 15:05:45 INFO mapred.JobClient:     HDFS: Number of large read operations=0
19/05/17 15:05:45 INFO mapred.JobClient:     HDFS: Number of write operations=20
19/05/17 15:05:45 INFO mapred.JobClient:   Map-Reduce Framework
19/05/17 15:05:45 INFO mapred.JobClient:     Map input records=2
19/05/17 15:05:45 INFO mapred.JobClient:     Map output records=2
19/05/17 15:05:45 INFO mapred.JobClient:     Map output bytes=41
19/05/17 15:05:45 INFO mapred.JobClient:     Input split bytes=115
19/05/17 15:05:45 INFO mapred.JobClient:     Combine input records=0
19/05/17 15:05:45 INFO mapred.JobClient:     Combine output records=0
19/05/17 15:05:45 INFO mapred.JobClient:     Reduce input groups=1
19/05/17 15:05:45 INFO mapred.JobClient:     Reduce shuffle bytes=0
19/05/17 15:05:45 INFO mapred.JobClient:     Reduce input records=2
19/05/17 15:05:45 INFO mapred.JobClient:     Reduce output records=2
19/05/17 15:05:45 INFO mapred.JobClient:     Spilled Records=4
19/05/17 15:05:45 INFO mapred.JobClient:     CPU time spent (ms)=0
19/05/17 15:05:45 INFO mapred.JobClient:     Physical memory (bytes) snapshot=0
19/05/17 15:05:45 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=0
19/05/17 15:05:45 INFO mapred.JobClient:     Total committed heap usage (bytes)=260194304
19/05/17 15:05:45 INFO mapred.JobClient:   org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter
19/05/17 15:05:45 INFO mapred.JobClient:     BYTES_READ=57