Dream------spark--spark集群的环境搭建

1、下载安装scala
 
2、解压下载后的文件,配置环境变量:编辑/etc/profile文件,添加如下内容:
 
export SCALA_HOME=/root/scala-2.11.6
export PATH=$SCALA_HOME/bin:$PATH
 
运行source /etc/profile使环境变量的修改立即生效
 
3、配置conf/spark-env.sh环境变量
 
进入$SPARK_HOME/conf 目录, 将 spark-env.sh.template 重命名为 spark-env.sh,修改 spark-env.sh,添加如下内容
 
export JAVA_HOME=/usr/java/jdk1.8.0_45
     export SPARK_MASTER_IP=wls01
     #export SPARK_MASTER_IP=localhost
     export SPARK_MASTER_PORT=7077
     export SPARK_WORKER_CORES=1
     export SPARK_WORKER_INSTANCES=1
     export SPARK_WORKER_MEMORY=1g
  export HADOOP_HOMEE=/root/hadoop-2.6.0
     #export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=FILESYSTEM -Dspark.deploy.recoveryDirectory=/nfs/spark/recovery"
   
     export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
     export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
     
     export SPARK_HOME=/root/spark-1.4.1-bin-hadoop2.6
     export SPARK_JAR=/root/spark-1.4.1-bin-hadoop2.6/lib/spark-assembly-1.4.1-hadoop2.6.0.jar
     export PATH=$SPARK_HOME/bin:$PATH
 
 
4、 修改/conf/slaves文件
wls02
wls03
 
5、拷贝到其他机器
 
scp scala-2.11.6.tgz root@wls02:/root
scp scala-2.11.6.tgz root@wls03:/root
scp spark-1.4.1-bin-hadoop2.6.zip root@wls03:/root
scp spark-1.4.1-bin-hadoop2.6.zip root@wls02:/root
scp spark-1.4.1-bin-hadoop2.6/conf/* root@wls02:/root/spark-1.4.1-bin-hadoop2.6/conf/
 
 
6、修改权限(不修改貌似也行,因为本身就是可执行文件)
bin目录下
chmod a+x spark-shell
chmod a+x spark-submit
chmod a+x spark-class
chmod a+x compute-classpath.sh
 
sbin目录下
chmod a+x *.sh
 
7、启动
cd到sbin目录下
 
./start-all.sh
 
 
运行(集群模式,其他模式官网写的很清楚,直接运行就可以)
重点就是class,是自己的类,jars有mysql的驱动和项目打的jar包
 
  (官网的例子)
./bin/spark-submit --class org.apache.spark.examples.SparkPi 
    --master yarn-cluster 
    --num-executors 3 
    --driver-memory 4g 
    --executor-memory 2g 
    --executor-cores 1 
    --queue thequeue 
    lib/spark-examples*.jar 
    10
(自己写的推荐)
./bin/spark-submit --class 
com.tuijian.SparkToJDBC 
--master yarn-cluster 
--num-executors 3 
--driver-memory 1g 
--executor-memory 1g 
--executor-cores 1 
--jars /root/founderRecommend.jar 
lib/mysql*.jar
 
wls01:8080
 
这样也行
# /etc/profile
 
# System wide environment and startup programs, for login setup
# Functions and aliases go in /etc/bashrc
###########java
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/protobuf/lib
export JAVA_HOME=/usr/soft/jdk1.7.0_71
PATH=$PATH:$JAVA_HOME/bin
#CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:/usr/soft/jdk1.7.0_71/lib/tools.jar
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
#############hadoop 
export HADOOP_INSTALL=/usr/hadoopsoft/hadoop-2.5.2
export HADOOP_HOME=$HADOOP_INSTALL
 
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
 
###########scala
SCALA_HOME=/usr/soft/scala-2.10.4
MVN_HOME=/usr/soft/apache-maven-3.1.1
ANT_HOME=/usr/soft/apache-ant-1.9.4
MAHOUT_HOME=/root/mahout-mahout-0.9
 
PATH=$PATH:$SCALA_HOME/bin:$MVN_HOME/bin:$ANT_HOME/bin:$MAHOUT_HOME/bin
export FINDBUGS_HOME=/usr/soft/findbugs-3.0.0
PATH=$PATH:$FINDBUGS_HOME/bin
 
export PATH
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
###############################
export SPARK_HOME=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4
export MASTER=spark://spark001:7077
export SPARK_JAR=/usr/hadoopsoft/spark-1.3.1-bin-hadoop2.4/lib/spark-assembly-1.3.1-hadoop2.4.0.jar
export PATH=$SPARK_HOME/bin:$PATH
 
 
 
 #It's NOT a good idea to change this file unless you know what you
# are doing. It's much better to create a custom.sh shell script in
# /etc/profile.d/ to make custom changes to your environment, as this
# will prevent the need for merging in future updates.
 
pathmunge () {
    case ":${PATH}:" in
        *:"$1":*)
            ;;
        *)
            if [ "$2" = "after" ] ; then
                PATH=$PATH:$1
            else
                PATH=$1:$PATH
            fi
    esac
}
 
 
if [ -x /usr/bin/id ]; then
    if [ -z "$EUID" ]; then
        # ksh workaround
        EUID=`id -u`
        UID=`id -ru`
    fi
    USER="`id -un`"
    LOGNAME=$USER
    MAIL="/var/spool/mail/$USER"
fi
 
# Path manipulation
if [ "$EUID" = "0" ]; then
    pathmunge /sbin
    pathmunge /usr/sbin
    pathmunge /usr/local/sbin
else
    pathmunge /usr/local/sbin after
    pathmunge /usr/sbin after
    pathmunge /sbin after
fi
 
HOSTNAME=`/bin/hostname 2>/dev/null`
HISTSIZE=1000
if [ "$HISTCONTROL" = "ignorespace" ] ; then
    export HISTCONTROL=ignoreboth
else
    export HISTCONTROL=ignoredups
fi
 
export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL
 
# By default, we want umask to get set. This sets it for login shell
# Current threshold for system reserved uid/gids is 200
# You could check uidgid reservation validity in
# /usr/share/doc/setup-*/uidgid file
if [ $UID -gt 199 ] && [ "`id -gn`" = "`id -un`" ]; then
    umask 002
else
    umask 022
fi
 
for i in /etc/profile.d/*.sh ; do
    if [ -r "$i" ]; then
        if [ "${-#*i}" != "$-" ]; then
            . "$i"
        else
            . "$i" >/dev/null 2>&1
        fi
    fi
done
 
unset i
unset -f pathmunge
 
 
 
 
 
 
 
 
原文地址:https://www.cnblogs.com/wangliansong/p/4740370.html