spark 2.1.0 集群安装

jdk安装

http://www.cnblogs.com/xiaojf/p/6568426.html

scala2.11 安装

http://www.cnblogs.com/xiaojf/p/6568432.html

hadoop2.7 安装

http://www.cnblogs.com/xiaojf/p/6629351.html

开始spark2.1.0安装

解压

[root@m1 jar]# tar zxvf spark-2.1.0-bin-hadoop2.7.tgz -C ../

重命名

[root@m1 jar]# cd ..
[root@m1 soft]# ll
total 32
drwxr-xr-x. 10 root   root   4096 Mar 27 07:26 hadoop
drwxr-xr-x.  2 root   root   4096 Mar 27 07:15 jar
drwxr-xr-x.  8 root   root   4096 Dec 12 16:50 jdk
drwxr-xr-x.  7 root   root   4096 Mar 22 05:46 kafka
drwxrwxr-x.  6 root   root   4096 Mar  4  2016 scala-2.11.8
drwxr-xr-x. 12 xiaojf xiaojf 4096 Dec 15 18:18 spark-2.1.0-bin-hadoop2.7
drwxr-xr-x.  5 root   root   4096 Mar 27 07:42 tmp
drwxr-xr-x. 10 root   root   4096 Aug 23  2016 zookeeper-3.4.9
[root@m1 soft]# mv spark-2.1.0-bin-hadoop2.7 spark
[root@m1 soft]# ll
total 32
drwxr-xr-x. 10 root   root   4096 Mar 27 07:26 hadoop
drwxr-xr-x.  2 root   root   4096 Mar 27 07:15 jar
drwxr-xr-x.  8 root   root   4096 Dec 12 16:50 jdk
drwxr-xr-x.  7 root   root   4096 Mar 22 05:46 kafka
drwxrwxr-x.  6 root   root   4096 Mar  4  2016 scala-2.11.8
drwxr-xr-x. 12 xiaojf xiaojf 4096 Dec 15 18:18 spark
drwxr-xr-x.  5 root   root   4096 Mar 27 07:42 tmp
drwxr-xr-x. 10 root   root   4096 Aug 23  2016 zookeeper-3.4.9

配置环境变量

[root@m1 soft]# vi /etc/profile
[root@m1 soft]# source /etc/profile
export SPARK_HOME=/usr/local/soft/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH

修改配置文件

[root@m1 soft]# cd /usr/local/soft/spark/conf/
[root@m1 conf]# pwd
/usr/local/soft/spark/conf
[root@m1 conf]# ll
total 32
-rw-r--r--. 1 xiaojf xiaojf  987 Dec 15 18:18 docker.properties.template
-rw-r--r--. 1 xiaojf xiaojf 1105 Dec 15 18:18 fairscheduler.xml.template
-rw-r--r--. 1 xiaojf xiaojf 2025 Dec 15 18:18 log4j.properties.template
-rw-r--r--. 1 xiaojf xiaojf 7313 Dec 15 18:18 metrics.properties.template
-rw-r--r--. 1 xiaojf xiaojf  865 Dec 15 18:18 slaves.template
-rw-r--r--. 1 xiaojf xiaojf 1292 Dec 15 18:18 spark-defaults.conf.template
-rwxr-xr-x. 1 xiaojf xiaojf 3960 Dec 15 18:18 spark-env.sh.template
[root@m1 conf]# cp log4j.properties.template log4j.properties
[root@m1 conf]# cp slaves.template slaves
[root@m1 conf]# cp spark-defaults.conf.template spark-defaults.conf
[root@m1 conf]# cp spark-env.sh.template spark-env.sh

修改 spark-defaults.conf

[root@m1 conf]# vi spark-defaults.conf
spark.eventLog.enabled           true
spark.eventLog.dir              hdfs://m1:9000/historyserverforSpark
spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.yarn.historyServer.address   m1:18080
spark.history.fs.logDirectory      hdfs://m1:9000/historyserverforSpark
spark.driver.memory              5g

修改 spark-env.sh

[root@m1 conf]# vi spark-env.sh
export HADOOP_HOME=/usr/local/soft/hadoop
export HADOOP_CONF_DIR=/usr/local/soft/hadoop/etc/hadoop
export JAVA_HOME=/usr/local/soft/jdk
export SCALA_HOME=/usr/local/soft/scala
export SPARK_MASTER_IP=m1
export SPARK_WORKER_MEMORY=1G
export SPARK_EXECUTOR_MEMORY=1G
export SPARK_DRIVER_MEMORY=1G
export SPARK_WORKER_CORES=4

修改 slaves

[root@m1 conf]# vi slaves
s1
s2

分发代码到集群节点

[root@m1 soft]# scp -r spark root@s1:/usr/local/soft/
[root@m1 soft]# scp -r spark root@s2:/usr/local/soft/

启动

[root@m1 soft]# cd /usr/local/soft/spark/sbin/
[root@m1 sbin]# ll
total 92
-rwxr-xr-x. 1 xiaojf xiaojf 2803 Dec 15 18:18 slaves.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1429 Dec 15 18:18 spark-config.sh
-rwxr-xr-x. 1 xiaojf xiaojf 5688 Dec 15 18:18 spark-daemon.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1262 Dec 15 18:18 spark-daemons.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1190 Dec 15 18:18 start-all.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1272 Dec 15 18:18 start-history-server.sh
-rwxr-xr-x. 1 xiaojf xiaojf 2050 Dec 15 18:18 start-master.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1877 Dec 15 18:18 start-mesos-dispatcher.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1423 Dec 15 18:18 start-mesos-shuffle-service.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1279 Dec 15 18:18 start-shuffle-service.sh
-rwxr-xr-x. 1 xiaojf xiaojf 3151 Dec 15 18:18 start-slave.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1527 Dec 15 18:18 start-slaves.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1857 Dec 15 18:18 start-thriftserver.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1478 Dec 15 18:18 stop-all.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1056 Dec 15 18:18 stop-history-server.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1080 Dec 15 18:18 stop-master.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1227 Dec 15 18:18 stop-mesos-dispatcher.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1084 Dec 15 18:18 stop-mesos-shuffle-service.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1067 Dec 15 18:18 stop-shuffle-service.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1557 Dec 15 18:18 stop-slave.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1064 Dec 15 18:18 stop-slaves.sh
-rwxr-xr-x. 1 xiaojf xiaojf 1066 Dec 15 18:18 stop-thriftserver.sh
[root@m1 sbin]# ./start-all.sh 
starting org.apache.spark.deploy.master.Master, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.master.Master-1-m1.out
s1: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s1.out
s2: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s2.out

查看进程

[root@m1 sbin]# jps
28881 Master
18017 Kafka
28948 Jps
3742 QuorumPeerMain

这个时候还没有启动hadoop,所以先启动hadoop,再启动spark

[root@m1 sbin]# /usr/local/soft/hadoop/sbin/start-all.sh 
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [m1]
m1: starting namenode, logging to /usr/local/soft/hadoop/logs/hadoop-root-namenode-m1.out
s2: starting datanode, logging to /usr/local/soft/hadoop/logs/hadoop-root-datanode-s2.out
s1: starting datanode, logging to /usr/local/soft/hadoop/logs/hadoop-root-datanode-s1.out
Starting secondary namenodes [m1]
m1: starting secondarynamenode, logging to /usr/local/soft/hadoop/logs/hadoop-root-secondarynamenode-m1.out
starting yarn daemons
starting resourcemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-resourcemanager-m1.out
s2: starting nodemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-nodemanager-s2.out
s1: starting nodemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-nodemanager-s1.out
[root@m1 sbin]# jps
29504 ResourceManager
18017 Kafka
29347 SecondaryNameNode
29194 NameNode
29790 Jps
3742 QuorumPeerMain
[root@m1 sbin]# /usr/local/soft/spark/sbin/start-all.sh 
starting org.apache.spark.deploy.master.Master, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.master.Master-1-m1.out
s2: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s2.out
s1: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s1.out
[root@m1 sbin]# jps
29504 ResourceManager
18017 Kafka
29347 SecondaryNameNode
29816 Master
29194 NameNode
29885 Jps
3742 QuorumPeerMain

打开spark-shell 测试

[root@m1 sbin]# spark-shell 

完成

原文地址:https://www.cnblogs.com/xiaojf/p/6637469.html