【费元星原创】一键安装Hadoo2.7.6 集群完全分布式脚本-完美解决

有Bug 欢迎反馈,我不烦:feiyuanxing@gmail.com


1
#!/bin/bash 2 #@author:feiyuanxing 【既然笨到家,就要努力到家】 3 #@date:2017-01-05 4 #@E-Mail:feiyuanxing@gmail.com 5 #@TARGET:一键安装hadoop 2.7.6 centos 64位 6 #@CopyRight:本脚本遵守 未来星开源协议(http://feiyuanxing.com/kaiyuanxieyi/kaiyuanxieyi.html) 7 8 #讲解: 9 #0.前提条件: 10 # 0.1 已经配置好JAVA环境变量 11 # 0.2 已经配置免密登录环境 12 #1.请使用root用户执行,软件默认安装在work用户,通常会在linux的/home 挂载一个大型磁盘 13 #2.软件安装在/home/work/local/hadoop/hadoop 有人问为什么这么深 14 # 2.1 work目录下local包含所有软件 hadoop安装大数据生态圈其他软件 15 # 2.2 hadoop的数据安装在 /home/work/data ,此目录由于数量较大,可以单独挂载一个磁盘 16 # 17 #################################################################################### 18 # 个性化设置 19 # Hadoop 生态目录 20 INSTALL_HOME=/home/work/local/hadoop 21 22 #data 目录 23 DATA_HOME=/home/work/data 24 # Hadoop目录 25 ROOT_HOME=${INSTALL_HOME}/hadoop_tmp 26 27 #name 通常为主机(本机),缺省配置为master 28 NAMENODE=master 29 #name 通常为第一台从机,缺省配置为slave01 30 SECONDARYNAMENODE=slave01 31 #hadoop 版本 32 HADOOP_VERSION=hadoop-2.7.6.tar.gz 33 #################################################################################### 34 # Hadoop 生态数据路径 35 hadoop_logs=${DATA_HOME}/hadoop_logs 36 hadoop_tmp=${DATA_HOME}/hadoop_tmp 37 hadoop_data=${DATA_HOME}/hadoop_data 38 hadoop_name=${DATA_HOME}/hadoop_name 39 #当前脚本的位置 40 SCRIPT_HOME=$(cd `dirname $0`; pwd) 41 42 mkdir -p ${INSTALL_HOME} && cd ${INSTALL_HOME} 43 mkdir -p ${ROOT_HOME} && mkdir -p ${hadoop_logs} && mkdir -p ${hadoop_tmp}&& mkdir -p ${hadoop_data}&& mkdir -p ${hadoop_name} 44 45 #chown -R work:work /home/work/data/hadoop 46 47 #集群机器信息 48 INSTALL_IP_ARRAY=() 49 INSTALL_HOSTNAME_ARRAY=() 50 51 52 # 需要判断用户是否存在 53 function add_work_user(){ 54 adduer work -d /home/work 55 passwd work 56 57 } 58 59 #需要判断jdk是否存在 60 function install_jdk(){ 61 # 有jdk 62 echo $JAVA_HOME 63 64 } 65 66 67 #下载hadoop 68 function download_hodoop(){ 69 echo "(2-1/6)正在获取安装包:${INSTALL_HOME}/hadoop-2.7.6.tar.gz" 70 if [ ! -f ${INSTALL_HOME}/hadoop-2.7.6.tar.gz ] ;then 71 echo "开始下载" 72 wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.6/${HADOOP_VERSION} 73 fi 74 echo "(2-2/6)获取安装包完成:${INSTALL_HOME}/hadoop-2.7.6.tar.gz" 75 76 } 77 78 function configuration_ssh(){ 79 #设置本机免密码登录 80 (echo -e " " 81 sleep 1 82 echo -e " " 83 sleep 1 84 echo -e " ")|ssh-keygen -t rsa 85 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys 86 chmod 700 ~/.ssh 87 chmod 600 ~/.ssh/authorized_keys 88 } 89 90 function readFileToArra(){ 91 echo "(1/6)正在读取配置文件:${SCRIPT_HOME}/hostip_hadoop.txt" 92 if [ ! -f "${SCRIPT_HOME}"/hostip_hadoop.txt ];then 93 echo "请在本目录下的【hostip_hadoop.txt】文件中添加Hadoop集群机器信息" 94 echo "#USENAME@IP:PORT,PASSWD(第一个必须配置为主,通常为本机) 95 root@191.168.1.1:22,123456 96 root@191.168.1.2:22,123456" > ${SCRIPT_HOME}/hostip_hadoop.txt 97 exit; 98 else 99 INSTALL_IP_ARRAY=($(cat "${SCRIPT_HOME}"/hostip_hadoop.txt| grep -v "^#")) 100 INSTALL_HOSTNAME_ARRAY=($(cat "${SCRIPT_HOME}"/hostip_hadoop.txt| grep -v "^#"|awk -F '@' '{print $2}'|awk -F ':' '{print $1}')) 101 fi 102 103 #for(( i=0 ; i<${#INSTALL_IP_ARRAY[@]} ; i++)) do 104 # echo "打印数组:${INSTALL_IP_ARRAY[i]}" 105 #done; 106 } 107 108 #readFileToArra 109 110 function configure_hadoop(){ 111 112 echo "(3-1/6)正在解压安装包:" 113 #最终生成的文件名为install_hadoop.bin而我们的all.tar.gz被>>到该文件后面 114 #tail -c $size install_hadoop.bin >all.tar.gz 115 rm -rf hadoop hadoop_tmp 116 tar -zxf ${HADOOP_VERSION} 117 #pwd 118 mv hadoop-2.7.6 hadoop_tmp && cd hadoop_tmp 119 120 echo "(3-2/6)正在配置Hadoop环境变量:" 121 #设置环境变量 122 hadoop_home_before=`cat /etc/profile| grep "HADOOP_HOME"` 123 124 if [ -z "${hadoop_home_before}" ] ;then 125 echo "hadoop 环境变量已存在..." 126 sed -i '/HADOOP_HOME/d' /etc/profile 127 echo "export HADOOP_HOME=${INSTALL_HOME}" >> /etc/profile 128 echo 'export PATH=$PATH:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin' >> /etc/profile 129 #使得环境变量生效 130 source /etc/profile 131 sleep 1; 132 source /etc/profile 133 134 fi 135 136 echo "(4/6)正在配置Hadoop基本配置:" 137 #通过强大的sed命令来修改hadoop的配置文件 138 jdk_home=`echo $JAVA_HOME` 139 if [ -z ${jdk_home} ] ;then 140 #安装JDK 141 echo "程序已退出,请先配置JDK" 142 exit 143 fi 144 145 #echo "开始配置hadoop" 146 # 1.配置hadoop-env.sh 147 sed -i "s!${JAVA_HOME}!$(echo ${jdk_home})!g" ${ROOT_HOME}/etc/hadoop/hadoop-env.sh 148 # 2.配置core-site.xml 149 sed -i "/<configuration>/a\ <property> \t\t<name>fs.defaultFS</name> \t\t<value>hdfs://${NAMENODE}:9000</value> \t</property> \t<property> \t\t<name>hadoop.tmp.dir</name> \t\t<value>file:$(echo ${hadoop_tmp})</value> \t</property>" ${ROOT_HOME}/etc/hadoop/core-site.xml 150 sed -i "/<configuration>/a\ <property> \t\t<name>io.file.buffer.size</name> \t\t<value>131072</value> \t</property>" ${ROOT_HOME}/etc/hadoop/core-site.xml 151 # 3.配置hdfs-site.xml 152 sed -i "/<configuration>/a\ <property> \t\t<name>dfs.webhdfs.enabled</name> \t\t<value>true</value> \t</property>" ${ROOT_HOME}/etc/hadoop/hdfs-site.xml 153 sed -i "/<configuration>/a\ <property> \t\t<name>dfs.replication</name> \t\t<value>2</value> \t</property>" ${ROOT_HOME}/etc/hadoop/hdfs-site.xml 154 sed -i "/<configuration>/a\ <property> \t\t<name>dfs.datanode.data.dir</name> \t\t<value>file:${hadoop_name}</value> \t</property>" ${ROOT_HOME}/etc/hadoop/hdfs-site.xml 155 sed -i "/<configuration>/a\ <property> \t\t<name>dfs.namenode.name.dir</name> \t\t<value>file:${hadoop_data}</value> \t</property>" ${ROOT_HOME}/etc/hadoop/hdfs-site.xml 156 sed -i "/<configuration>/a\ <property> \t\t<name>dfs.namenode.secondary.http-address</name> \t\t<value>master:9001</value> \t</property>" ${ROOT_HOME}/etc/hadoop/hdfs-site.xml 157 # 4.配置mapred-site.xml 158 cp ${ROOT_HOME}/etc/hadoop/mapred-site.xml.template ${ROOT_HOME}/etc/hadoop/mapred-site.xml 159 sed -i "/<configuration>/a\ <property> \t\t<name>mapreduce.jobhistory.webapp.address</name> \t\t<value>master:19888</value> \t</property>" ${ROOT_HOME}/etc/hadoop/mapred-site.xml 160 sed -i "/<configuration>/a\ <property> \t\t<name>mapreduce.jobhistory.address</name> \t\t<value>master:10020</value> \t</property>" ${ROOT_HOME}/etc/hadoop/mapred-site.xml 161 sed -i "/<configuration>/a\ <property> \t\t<name>mapreduce.framework.name</name> \t\t<value>yarn</value> \t</property>" ${ROOT_HOME}/etc/hadoop/mapred-site.xml 162 # 5.配置yarn-site.xml 163 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.resourcemanager.webapp.address</name> \t\t<value>master:8088</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 164 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.resourcemanager.admin.address</name> \t\t<value>master:8033</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 165 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.resourcemanager.resource-tracker.address</name> \t\t<value>master:8031</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 166 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.resourcemanager.scheduler.address</name> \t\t<value>master:8030</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 167 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.resourcemanager.address</name> \t\t<value>master:8032</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 168 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> \t\t<value>org.apache.hadoop.mapred.ShuffleHandler</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 169 sed -i "/<configuration>/a\ <property> \t\t<name>yarn.nodemanager.aux-services</name> \t\t<value>mapreduce_shuffle</value> \t</property>" ${ROOT_HOME}/etc/hadoop/yarn-site.xml 170 # 6.配置slaves文件 171 echo "" > ${ROOT_HOME}/etc/hadoop/slaves 172 for(( i=0 ; i<${#INSTALL_HOSTNAME_ARRAY[@]} ; i++)) do 173 echo "${INSTALL_HOSTNAME_ARRAY[i]}" >> ${ROOT_HOME}/etc/hadoop/slaves 174 done; 175 176 echo "(5/6)正在发送hadoop到其他机器" 177 for line in ${INSTALL_IP_ARRAY[@]} 178 do 179 user=`echo ${line}| awk -F "@" '{print $1}'` 180 ip=`echo ${line}| cut -d "@" -f 2| cut -d ":" -f 1` 181 port=`echo ${line}| cut -d "@" -f 2| cut -d ":" -f 2|cut -d "," -f 1` 182 echo ${user} - ${ip} - ${port} - ${passwd} 183 ssh -p ${port} -o StrictHostKeyChecking=no "${user}@${ip}" "rm -rf ${DATA_HOME}/hadoop_*;rm -rf ${INSTALL_HOME}/hadoop;mkdir -p ${INSTALL_HOME}/hadoop;mkdir -p ${hadoop_logs} && mkdir -p ${hadoop_tmp}&& mkdir -p ${hadoop_data}&& mkdir -p ${hadoop_name}" 184 #echo "scp -P ${port} -o StrictHostKeyChecking=no -rpq ${ROOT_HOME} "${user}"@"${ip}":${INSTALL_HOME}" 185 scp -P ${port} -o StrictHostKeyChecking=no -rpq ${ROOT_HOME}/* "${user}"@"${ip}":${INSTALL_HOME}/hadoop 186 ssh -p ${port} -o StrictHostKeyChecking=no "${user}@${ip}" "chown -R work:work ${INSTALL_HOME}" 187 done 188 189 echo "(6/6)Hadoop配置成功" 190 191 } 192 193 #配置Hadoop启动服务 194 function start_hadoop(){ 195 #格式化hadoop 196 hdfs namenode -format 197 echo "格式化完成..开始运行" 198 #先启动HDFS 199 start-dfs.sh 200 #再启动YARN 201 start-yarn.sh 202 203 #检查是否启动成功 204 jps 205 206 } 207 #动态读取配置文件 208 readFileToArra 209 #add_work_user 210 download_hodoop; 211 #configuration_ssh 212 configure_hadoop 213 #start_hadoop
原文地址:https://www.cnblogs.com/feiyuanxing/p/9779237.html