jdk安装
http://www.cnblogs.com/xiaojf/p/6568426.html
scala2.11 安装
http://www.cnblogs.com/xiaojf/p/6568432.html
hadoop2.7 安装
http://www.cnblogs.com/xiaojf/p/6629351.html
开始spark2.1.0安装
解压
[root@m1 jar]# tar zxvf spark-2.1.0-bin-hadoop2.7.tgz -C ../
重命名
[root@m1 jar]# cd .. [root@m1 soft]# ll total 32 drwxr-xr-x. 10 root root 4096 Mar 27 07:26 hadoop drwxr-xr-x. 2 root root 4096 Mar 27 07:15 jar drwxr-xr-x. 8 root root 4096 Dec 12 16:50 jdk drwxr-xr-x. 7 root root 4096 Mar 22 05:46 kafka drwxrwxr-x. 6 root root 4096 Mar 4 2016 scala-2.11.8 drwxr-xr-x. 12 xiaojf xiaojf 4096 Dec 15 18:18 spark-2.1.0-bin-hadoop2.7 drwxr-xr-x. 5 root root 4096 Mar 27 07:42 tmp drwxr-xr-x. 10 root root 4096 Aug 23 2016 zookeeper-3.4.9 [root@m1 soft]# mv spark-2.1.0-bin-hadoop2.7 spark [root@m1 soft]# ll total 32 drwxr-xr-x. 10 root root 4096 Mar 27 07:26 hadoop drwxr-xr-x. 2 root root 4096 Mar 27 07:15 jar drwxr-xr-x. 8 root root 4096 Dec 12 16:50 jdk drwxr-xr-x. 7 root root 4096 Mar 22 05:46 kafka drwxrwxr-x. 6 root root 4096 Mar 4 2016 scala-2.11.8 drwxr-xr-x. 12 xiaojf xiaojf 4096 Dec 15 18:18 spark drwxr-xr-x. 5 root root 4096 Mar 27 07:42 tmp drwxr-xr-x. 10 root root 4096 Aug 23 2016 zookeeper-3.4.9
配置环境变量
[root@m1 soft]# vi /etc/profile [root@m1 soft]# source /etc/profile
export SPARK_HOME=/usr/local/soft/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
修改配置文件
[root@m1 soft]# cd /usr/local/soft/spark/conf/ [root@m1 conf]# pwd /usr/local/soft/spark/conf [root@m1 conf]# ll total 32 -rw-r--r--. 1 xiaojf xiaojf 987 Dec 15 18:18 docker.properties.template -rw-r--r--. 1 xiaojf xiaojf 1105 Dec 15 18:18 fairscheduler.xml.template -rw-r--r--. 1 xiaojf xiaojf 2025 Dec 15 18:18 log4j.properties.template -rw-r--r--. 1 xiaojf xiaojf 7313 Dec 15 18:18 metrics.properties.template -rw-r--r--. 1 xiaojf xiaojf 865 Dec 15 18:18 slaves.template -rw-r--r--. 1 xiaojf xiaojf 1292 Dec 15 18:18 spark-defaults.conf.template -rwxr-xr-x. 1 xiaojf xiaojf 3960 Dec 15 18:18 spark-env.sh.template [root@m1 conf]# cp log4j.properties.template log4j.properties [root@m1 conf]# cp slaves.template slaves [root@m1 conf]# cp spark-defaults.conf.template spark-defaults.conf [root@m1 conf]# cp spark-env.sh.template spark-env.sh
修改 spark-defaults.conf
[root@m1 conf]# vi spark-defaults.conf
spark.eventLog.enabled true spark.eventLog.dir hdfs://m1:9000/historyserverforSpark spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" spark.yarn.historyServer.address m1:18080 spark.history.fs.logDirectory hdfs://m1:9000/historyserverforSpark spark.driver.memory 5g
修改 spark-env.sh
[root@m1 conf]# vi spark-env.sh
export HADOOP_HOME=/usr/local/soft/hadoop export HADOOP_CONF_DIR=/usr/local/soft/hadoop/etc/hadoop export JAVA_HOME=/usr/local/soft/jdk export SCALA_HOME=/usr/local/soft/scala export SPARK_MASTER_IP=m1 export SPARK_WORKER_MEMORY=1G export SPARK_EXECUTOR_MEMORY=1G export SPARK_DRIVER_MEMORY=1G export SPARK_WORKER_CORES=4
修改 slaves
[root@m1 conf]# vi slaves s1 s2
分发代码到集群节点
[root@m1 soft]# scp -r spark root@s1:/usr/local/soft/ [root@m1 soft]# scp -r spark root@s2:/usr/local/soft/
启动
[root@m1 soft]# cd /usr/local/soft/spark/sbin/ [root@m1 sbin]# ll total 92 -rwxr-xr-x. 1 xiaojf xiaojf 2803 Dec 15 18:18 slaves.sh -rwxr-xr-x. 1 xiaojf xiaojf 1429 Dec 15 18:18 spark-config.sh -rwxr-xr-x. 1 xiaojf xiaojf 5688 Dec 15 18:18 spark-daemon.sh -rwxr-xr-x. 1 xiaojf xiaojf 1262 Dec 15 18:18 spark-daemons.sh -rwxr-xr-x. 1 xiaojf xiaojf 1190 Dec 15 18:18 start-all.sh -rwxr-xr-x. 1 xiaojf xiaojf 1272 Dec 15 18:18 start-history-server.sh -rwxr-xr-x. 1 xiaojf xiaojf 2050 Dec 15 18:18 start-master.sh -rwxr-xr-x. 1 xiaojf xiaojf 1877 Dec 15 18:18 start-mesos-dispatcher.sh -rwxr-xr-x. 1 xiaojf xiaojf 1423 Dec 15 18:18 start-mesos-shuffle-service.sh -rwxr-xr-x. 1 xiaojf xiaojf 1279 Dec 15 18:18 start-shuffle-service.sh -rwxr-xr-x. 1 xiaojf xiaojf 3151 Dec 15 18:18 start-slave.sh -rwxr-xr-x. 1 xiaojf xiaojf 1527 Dec 15 18:18 start-slaves.sh -rwxr-xr-x. 1 xiaojf xiaojf 1857 Dec 15 18:18 start-thriftserver.sh -rwxr-xr-x. 1 xiaojf xiaojf 1478 Dec 15 18:18 stop-all.sh -rwxr-xr-x. 1 xiaojf xiaojf 1056 Dec 15 18:18 stop-history-server.sh -rwxr-xr-x. 1 xiaojf xiaojf 1080 Dec 15 18:18 stop-master.sh -rwxr-xr-x. 1 xiaojf xiaojf 1227 Dec 15 18:18 stop-mesos-dispatcher.sh -rwxr-xr-x. 1 xiaojf xiaojf 1084 Dec 15 18:18 stop-mesos-shuffle-service.sh -rwxr-xr-x. 1 xiaojf xiaojf 1067 Dec 15 18:18 stop-shuffle-service.sh -rwxr-xr-x. 1 xiaojf xiaojf 1557 Dec 15 18:18 stop-slave.sh -rwxr-xr-x. 1 xiaojf xiaojf 1064 Dec 15 18:18 stop-slaves.sh -rwxr-xr-x. 1 xiaojf xiaojf 1066 Dec 15 18:18 stop-thriftserver.sh [root@m1 sbin]# ./start-all.sh starting org.apache.spark.deploy.master.Master, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.master.Master-1-m1.out s1: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s1.out s2: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s2.out
查看进程
[root@m1 sbin]# jps 28881 Master 18017 Kafka 28948 Jps 3742 QuorumPeerMain
这个时候还没有启动hadoop,所以先启动hadoop,再启动spark
[root@m1 sbin]# /usr/local/soft/hadoop/sbin/start-all.sh This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh Starting namenodes on [m1] m1: starting namenode, logging to /usr/local/soft/hadoop/logs/hadoop-root-namenode-m1.out s2: starting datanode, logging to /usr/local/soft/hadoop/logs/hadoop-root-datanode-s2.out s1: starting datanode, logging to /usr/local/soft/hadoop/logs/hadoop-root-datanode-s1.out Starting secondary namenodes [m1] m1: starting secondarynamenode, logging to /usr/local/soft/hadoop/logs/hadoop-root-secondarynamenode-m1.out starting yarn daemons starting resourcemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-resourcemanager-m1.out s2: starting nodemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-nodemanager-s2.out s1: starting nodemanager, logging to /usr/local/soft/hadoop/logs/yarn-root-nodemanager-s1.out [root@m1 sbin]# jps 29504 ResourceManager 18017 Kafka 29347 SecondaryNameNode 29194 NameNode 29790 Jps 3742 QuorumPeerMain
[root@m1 sbin]# /usr/local/soft/spark/sbin/start-all.sh starting org.apache.spark.deploy.master.Master, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.master.Master-1-m1.out s2: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s2.out s1: starting org.apache.spark.deploy.worker.Worker, logging to /usr/local/soft/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-s1.out [root@m1 sbin]# jps 29504 ResourceManager 18017 Kafka 29347 SecondaryNameNode 29816 Master 29194 NameNode 29885 Jps 3742 QuorumPeerMain
打开spark-shell 测试
[root@m1 sbin]# spark-shell
完成