scala Wordcount


package my.bigdata.scala08

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.io.Source


/** scala word count
* Created by lq on 2017/8/7.
*/
object Task2 {

/**
* basic 核心是外部变量 + map.getOrElse
*/
def scalaWC0(): Unit ={
val in = new java.util.Scanner(new java.io.File("myfile.txt"))
//var tt = null;
val words = new ArrayBuffer[String]
while(in.hasNext())
{
words ++= in.next().split("\s+")
}
var map = Map[String,Int]()
for(key <- words) {
map +=(key -> (map.getOrElse(key, 0)+1))
println((key -> (map.getOrElse(key, 0)+1)))
//
}
print(map)
}

/**
* basic 核心是外部变量 + map.getOrElse
*/
def scalaWC1(): Unit ={
val lines = Source.fromFile("myfile.txt").getLines();
val resMap = new mutable.HashMap[String,Long]()
//lines.map(line=>{line.split("\s+")}).
for(line <- lines){
val fields = line.split("\s+")
for(f <- fields){
val v = resMap.getOrElse(f,0L);
resMap.put(f,v+1L)
}
}
resMap.foreach(println(_))
}

/**
* 利用遍历+外部变量map实现
*/
def scalaWC5(): Unit = {
val lines = Source.fromFile("myfile.txt").getLines();
val resMap = new mutable.HashMap[String, Long]()
val res = lines.flatMap(_.split("\s+")).map(t=>{resMap+=((t,resMap.getOrElse(t,0L)+1L))})
println(resMap)
}



/**
* 利用遍历+外部变量map实现
*/
def scalaWC4(): Unit = {
val lines = Source.fromFile("myfile.txt").getLines();
val resMap = new mutable.HashMap[String, Long]()
val res = lines.flatMap(line => {
line.split("\s+")}).foldLeft(resMap)((x,y:String)=>{
resMap+=((y,resMap.getOrElse(y,0L)+1L))
})
println(resMap)
}


/**
* advance 没有用到外部变量, foldLeft 传入一个map,然后传入一个偏函数,偏函数结合遍历的数据处理map返回一个map,最后整个函数返回一个map
*/
def scalaWC41(): Unit = {
val lines = Source.fromFile("myfile.txt").getLines();
val res = lines.flatMap(_.split("\s+")).foldLeft(mutable.Map[String,Long]())((m,y:String)=>{
m += ((y,m.getOrElse(y,0L)+1L))
})
println(res)
}

/**
* advance 没有用到外部变量, foldLeft的遍历所有的功能
*/
def scalaWC2(): Unit ={
val lines = Source.fromFile("myfile.txt").getLines();
//val resMap = new mutable.HashMap[String,Long]()
val res = lines.map(line=>{line.split("\s+").toBuffer})
.toList.flatMap(x=>x).map((_,1)).groupBy(_._1)
.map(x=>(x._1,x._2.foldLeft(0)((sum,t) =>{sum + t._2})))
println(res)
}

/**
* advance 没有用到外部变量, reduceLeft的遍历所有的功能
*/
def scalaWC3(): Unit ={
val lines = Source.fromFile("myfile.txt").getLines();
val res = lines.map(line=>{line.split("\s+").toBuffer})
.toList.flatMap(x=>x).map((_,1)).groupBy(_._1)
.map(x=>(x._2.reduceLeft((x,y)=>{
(x._1,x._2+y._2)
})))
//出现
println(res)
}


/**
* 主函数
*
* @param args
*/
def main(args: Array[String]): Unit = {
// arrFun2()
// printJavaProp()
//customMap()
scalaWC41()
}
/*
总结:wc的实现思路
1.循环+外部map变量+map特性
2.利用集合的方法,通过各种变换,的到结果
3.集合具有遍历的方法有map,filter,foreach,reduceLeft,foldLeft,这些加上外部map变量+map特性都能实现WordCount

*/
}
 

原文地址:https://www.cnblogs.com/rocky-AGE-24/p/7301847.html