Scala 版 word count

package com.oracle.scala

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

/**
 * WordCount示例
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    //创建SparkConf Spark运行时配置
    val sparkConf = new SparkConf().setAppName("WordCount")
      .setMaster("local")
      
     //通过传入的SparkConf配置对象实例化一个SparkContext
      val sc = new SparkContext(sparkConf)
    
    //从本地文件加载数据产生RDD
    val linesRdd = sc.textFile("C:\Users\Desktop\csd\wordcount.txt")
    
    //将每一行的记录通过指定分隔符分隔
    val wordsRdd =  linesRdd.flatMap { _.split(" ") }
    
    //将每一个单词设置为1  形成key-value
    val pairRdd =wordsRdd.map((_ , 1))
    
    //相同的key的value进行聚合
    val resultRdd = pairRdd.reduceByKey(_+_)
    
    //触发Job
   val results = resultRdd.collect()
   
   for(result <- results){
     println(result)
   }
    sc.stop()
  }
}

原文地址:https://www.cnblogs.com/TendToBigData/p/10501309.html