spark入门案例-------------WordCount

文件路径

代码一

package com.xiao.spark.core.wc

import org.apache.spark.{SparkConf, SparkContext}

object Spark01_WoldCount {
  def main(args: Array[String]): Unit = {



      // 建立和spark框架的连接
      val conf = new SparkConf().setMaster("local").setAppName("WordCount");
      val sc = new SparkContext(conf);
      // 实现业务逻辑
      // 读取文件,按行读取
      val lines = sc.textFile("datas");
      // 对数据进行分词
      // 扁平化:将整体拆分成个体的操作
      val words = lines.flatMap(_.split(" "))
      // 将数据根据单词进行分组,便于统计
      // (hello,hello,hello)
      val wordGroup = words.groupBy(word => word)
      // 转换数据结构 word => (word, 1)
      val wordCount = wordGroup.map{
        case (word,list) =>{
          (word,list.size)
        }
      }
      // 展示数据
      wordCount.foreach(println)
      // 关闭连接
      sc.stop();
  }
}

运行结果:

代码二

package com.xiao.spark.core.wc

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Spark02_WoldCount {
  def main(args: Array[String]): Unit = {



      // 建立和spark框架的连接
      val conf = new SparkConf().setMaster("local").setAppName("WordCount");
      val sc = new SparkContext(conf);
      // 实现业务逻辑
      // 读取文件,按行读取
      val lines = sc.textFile("datas");
      // 对数据进行分词
      // 扁平化:将整体拆分成个体的操作
      val words = lines.flatMap(_.split(" "))
      // 转换数据结构 word => (word, 1)
      val wordToOne: RDD[(String, Int)] = words.map {
        word =>
          (word, 1)
      }
      val wordCount: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _)

      // 展示数据
      wordCount.foreach(println)
      // 关闭连接
      sc.stop();
  }
}

运行结果:

原文地址:https://www.cnblogs.com/yangxiao-/p/14295326.html