Spak之开发代码 _pom 配置

1.spark on yarn

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._



/**
 * Created by zzy on 8/27/15.
 */
object SaprkOnYarn {
  def main(args: Array[String]) {


  if(args.length !=2){
    println("args lenght !=2")
    System.exit(0)
  }
  //    val conf = new SparkConf()
  //    conf.setMa


  val sc = new SparkContext     //

  val file = sc.textFile(args(0))     //

  file.cache    //

  val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _)

  tmp.saveAsTextFile(args(1))

}
}

2.spark on standalone

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._

/**
 * Created by zzy on 8/27/15.
 */
object SsdTest {
  def main(args: Array[String]) {

    if(args.length !=2){
      println("args lenght !=2")
      System.exit(0)
    }
//    val conf = new SparkConf()
//    conf.setMa

    val conf  = new SparkConf()     //
    conf.setMaster("spark://192.168.122.213:7077")    //
    conf.setSparkHome("/usr/local/spark/spark-1.4.1-bin-hadoop2.6")    //
    conf.setAppName("StandaloneSparktest")     //
    conf.set("SPARK_EXECUTOR_MEMORY", "1g")     //

    val sc = new SparkContext(conf)     //

    val file = sc.textFile(args(0))     //

    file.cache    //

    val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _)

    tmp.saveAsTextFile(args(1))

  }

}

3.pom 文件配置

<repositories>
    <repository>
        <id>Akka repository</id>
        <url>http://repo.akka.io/releases</url>
    </repository>
    <repository>
        <id>cloudera</id>
        <url>https://repository.cloudera.com/artifactory/cloudera-repos/.</url>
    </repository>
    <repository>
        <id>jboss</id>
        <url>http://repository.jboss.org/nexus/content/groups/public-jboss</url>
    </repository>
    <repository>
        <id>Sonatype snapshots</id>
        <url>http://oss.sonatype.org/content/repositories/snapshots/</url>
    </repository>
</repositories>

<build>
    <sourceDirectory>src/</sourceDirectory>
    <testSourceDirectory>src/</testSourceDirectory>

    <plugins>
        <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <executions>
                <execution>
                    <goals>
                        <goal>compile</goal>
                        <goal>testCompile</goal>
                    </goals>
                </execution>
            </executions>
            <configuration>
                <scalaVersion>2.10.3</scalaVersion>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>2.2</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <transformers>

                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>reference.conf</resource>
                            </transformer>

                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                            </transformer>

                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

<dependencies>
    <!--spark-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.5.0-cdh5.3.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-tools_2.10</artifactId>
        <version>1.1.0-cdh5.2.0-SNAPSHOT</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-assembly_2.10</artifactId>
        <version>1.2.0-cdh5.3.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-repl_2.10</artifactId>
        <version>1.2.0-cdh5.3.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-catalyst_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-network-common_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>

    <!--spark on yarn-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-yarn_2.10</artifactId>
        <version>1.2.0-cdh5.3.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-network-yarn_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>

    <!--spark-sql-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive-thriftserver_2.10</artifactId>
        <version>1.2.0-cdh5.3.3</version>
    </dependency>

    <!--spark-streaming-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-flume_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-flume-sink_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka_2.10</artifactId>
        <version>1.2.0-cdh5.3.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka-assembly_2.10</artifactId>
        <version>1.3.0-cdh5.4.0</version>
    </dependency>
</dependencies>

仅此献给努力的你我!


原文地址:https://www.cnblogs.com/chaoren399/p/4765181.html