spark APP升级

2017-08-14,涛哥,共享女友,360应用,v1.0
2017-08-14,涛哥,共享女友,360应用,v1.2
2017-08-14,涛哥,共享女友,360应用,v1.3
2017-09-14,涛哥,同城交友,360应用,v1.0
2017-09-14,涛哥,同城交友,360应用,v1.0
2017-09-14,涛哥,同城交友,360应用,v1.3
2017-09-14,涛哥,同城交友,360应用,v1.5
2017-08-15,涛哥,约吗,360应用,v1.3


1条结果数据: 2017-08-14,涛哥,共享女友,360应用,v1.0,v1.3
求出最大升级情况
package com.bw.homework

import scala.io.Source

object TaoGe {
def main(args: Array[String]): Unit = {
val res1 :Iterator[String]=Source.fromFile("tao.txt").getLines()
val res3: List[Array[String]] = res1.map(t => {
val res2: Array[String] = t.split(",")
res2
}).toList
val res4: Map[(String, String,String,String), List[Array[String]]] = res3.groupBy(t => {
(t(0), t(1),t(2),t(3))
})
val res5: Map[(String, String,String,String), List[String]] = res4.mapValues(t => t.map(e => {
e(4)
}))
val res7: List[((String, String,String,String), (String, String))] = res5.toList.flatMap(t => {
val name: (String, String,String,String) = t._1
val app: List[(String, String)] = t._2 zip t._2.tail
val res6: List[((String, String,String,String), (String, String))] = app.map(e => {
(name, e)
})
res6
})
// res7.foreach(println)
println("-----------*****************-------------")
val res8: Map[(String, String,String,String), (String, String)] = res5.mapValues(t => {
(t.min, t.max)
})
res8.foreach(println)

}

}
-----------*****************-------------
((2017-08-14,涛哥,共享女友,360应用),(v1.0,v1.3))
((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.5))
((2017-08-15,涛哥,约吗,360应用),(v1.3,v1.3))




需求2:
2017-08-14,涛哥,共享女友,360应用,v1.0,v1.2
2017-08-14,涛哥,共享女友,360应用,v1.2,v1.3
求出每次升级情况
res7
((2017-08-14,涛哥,共享女友,360应用),(v1.0,v1.2))
((2017-08-14,涛哥,共享女友,360应用),(v1.2,v1.3))
((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.0))
((2017-09-14,涛哥,同城交友,360应用),(v1.0,v1.3))
((2017-09-14,涛哥,同城交友,360应用),(v1.3,v1.5))



(二)另一个案例
package com.bw.spark

import scala.io.{BufferedSource, Source}


object jishi {
  def main(args: Array[String]): Unit = {
    /**
      * 1.得出每个用户每个app每次升级结果
      * 张三 腾讯视频 v1.3 v1.4
      * 张三 腾讯视频 v1.4 v1.6
      * 张三 腾讯视频 v1.6 v1.9
      */
    val res1: Iterator[String] = Source.fromFile("D:\ideaworkspace\spark01\jishi").getLines()
    val res3: List[Array[String]] = res1.map(t => {
      val res2: Array[String] = t.split("\t")
      res2
    }).toList
    val res4: Map[(String, String), List[Array[String]]] = res3.groupBy(t => {
      (t(0), t(1))
    })
    val res5: Map[(String, String), List[String]] = res4.mapValues(t => t.map(e => {
      e(3)
    }))
    val res7: List[((String, String), (String, String))] = res5.toList.flatMap(t => {
      val name: (String, String) = t._1
      val app: List[(String, String)] = t._2 zip t._2.tail
      val res6: List[((String, String), (String, String))] = app.map(e => {
        (name, e)
      })
     res6

    })
    res7.foreach(println)
    println("-----------*****************-------------")
    val res8: Map[(String, String), (String, String)] = res5.mapValues(t => {
      (t.min, t.max)
    })
    res8.foreach(println)

  }
}


(三)package com.bw.homework

import scala.io.Source
//2017-09-14,涛哥,同城交友,360应用,v1.0
object TaoGe2 {
def main(args: Array[String]): Unit = {
val data :Iterator[String]=Source.fromFile("tao.txt").getLines()
val data1:Iterator[((String,String,String,String),String)]=data.map(t=>{
val strs=t.split(",")//逗号切分
((strs(0),strs(1),strs(2),strs(3)),strs(4))
})
val groupData:Map[(String,String,String,String),List[((String,String,String,String),String)]]=data1.toList.groupBy(_._1)
val maxAndMin:Map[(String,String,String,String),(String,String)]=groupData.mapValues(t=>{
val versions:List[String]=t.map(_._2)
(versions.min,versions.max)

})
maxAndMin.foreach(println)


println("==================================")
val allVersion:Map[(String,String,String,String),List[(String,String)]]=
groupData.mapValues(t=>{
val versions:List[String]=t.map(_._2).distinct
if(versions.length>1){
val sortVersion=versions.sorted
//v1.0 v1.1 v1.2 v1.4 v1.9
val tailVersions=sortVersion.tail
// v1.1 v1.2 v1.4 v1.9
val resVersion:List[(String,String)]=sortVersion zip tailVersions
resVersion
}else{
List((versions(0),versions(0)))
}

})
val result:List[((String,String,String,String),(String,String))]=
allVersion.toList.flatMap(t=>{
var info =t._1
var versions=t._2
val res:List[((String,String,String,String),(String,String))]=versions.map((info,_))
res
})
result.foreach(println)
}
}
原文地址:https://www.cnblogs.com/wxk161640207382/p/11189204.html