10-4 集合之应用

单词统计

hello nihao
shi jie
shijie
hello jie
en en en 
hao hao
hao en
scala> val lst = io.Source.fromFile("e:/1.txt").getLines.toList.flatMap(_.split(" +")).groupBy(s=>s).mapValues(_.length)
lst: scala.collection.immutable.Map[String,Int] = Map(shi -> 1, en -> 4, jie -> 2, shijie -> 1, hao -> 3, hello -> 2, nihao -> 1)

scala> lst.foreach(println)
(shi,1)
(en,4)
(jie,2)
(shijie,1)
(hao,3)
(hello,2)
(nihao,1)

分解

//读取文件
scala> val it1 = io.Source.fromFile("e:/1.txt") it1: scala.io.BufferedSource = non-empty iterator
//获取所有行的迭代器
scala> val it2 = it1.getLines it2: Iterator[String] = non-empty iterator
//将其转为列表
scala> val lst1 = it2.toList lst1: List[String] = List(hello nihao, shi jie, shijie, hello jie, "en en en ", hao hao, hao en)
//压平,生成字符串类型的列表,而不是数组类型的列表
scala> val lst2 = lst1.flatMap(_.split(" +")) lst2: List[String] = List(hello, nihao, shi, jie, shijie, hello, jie, en, en, en, hao, hao, hao, en)
//按元素进行分组
scala> val map1 = lst2.groupBy(s=>s) map1: scala.collection.immutable.Map[String,List[String]] =
      Map(shi -> List(shi),
en -> List(en, en, en, en),
jie -> List(jie, jie),
shijie -> List(shijie),
hao -> List(hao, hao, hao),
hello -> List(hello, hello),
nihao -> List(nihao))
//对 v 进行转换
scala> val map2 = map1.mapValues(_.length) map2: scala.collection.immutable.Map[String,Int] = Map(shi -> 1, en -> 4, jie -> 2, shijie -> 1, hao -> 3, hello -> 2, nihao -> 1)

最高气温

1971 42
1921 41
1902 -20
1993 19
1938 -12
1958 -10
1902 300
1918 45
1951 -22
1936 44
1955 -33
1995 -18
........
val lst1 = io.Source.fromFile("e:/t.txt").getLines.toList
val lst2 = lst1.map(s=>(s.split(" ")(0).toInt,s.split(" ")(1).toInt))
val lst3 = lst2.groupBy(_._1).mapValues(_.map(_._2).max).toList.sortBy(_._1)

求最大值、最小值、平均值

lst2.groupBy(_._1).mapValues(l=>((l.map(_._2).max),l.map(_._2).min,l.map(_._2).sum*1.0/l.size)).toList.sortBy(_._1).foreach(println)

 年份升序、温度降序

lst2.sortWith((a,b) => {if(a._1 == b._1)a._2 > b._2 else a._1 < b._1}).foreach(println)

商家评价标签

import java.util.regex.Pattern

import scala.collection.mutable
import scala.collection.JavaConverters._
/*
抽取用户对商家的评价
要求:
    全局按照每个商家的最大评价的数量降序排列
    按照每种评价的个数,对评价列表降序排序
 */
object 作业 {
    def main(args: Array[String]): Unit = {
//        mm1()
        mm2()
    }

    //方式1、使用正则匹配,得到评价字符串
    def mm1() {
        val it1 = scala.io.Source.fromFile("e:/temptags.txt", "utf8").getLines()
        val it2 = it1.map(s => {
            val sp = s.split("	")
            val p = Pattern.compile(".*"extInfoList":\[\{"title":"contentTags","values":\[(.+?)\]")
            val m = p.matcher(sp(1))
            (sp(0), if (m.find()) m.group(1) else "")   //没有评价则置为空串
        })
        val it3 = it2.filter(_._2 != "")    //去除没有评价的组员
        val map1 = it3.toList.groupBy(_._1) //按商家id分组
        val map2 = map1.mapValues(l=>{
            val lst1 = l.flatMap(_._2.split(","))   //切分评价串,并炸开为单个评价
            val map3 = lst1.groupBy(s=>s)   //按单个评价分组
            val map4 = map3.map(t=>(t._1,t._2.size))    //  构造元组,( 评价,评价个数 )
            val lst2 = map4.toList
            val lst3 = lst2.sortBy(-_._2)  //按照评价个数倒排
            lst3.take(5)    //取出前5个
        })

        val lst33 = map2.toList.sortBy(-_._2(0)._2) //总倒排
        lst33.foreach(println)
    }

    //方式2、解析json,得到评价字符串
    def mm2() {
        val it1 = scala.io.Source.fromFile("E:\studynode\徐\文件\temptags.txt", "utf8").getLines()
        val it2 = it1.map(s => {
            val sp = s.split("	")
            val lst = JSONUtil.parseJson(sp(1))

            (sp(0), lst.asScala.mkString(","))   //没有评价则置为空串
        })
        val it3 = it2.filter(_._2 != "")    //去除没有评价的组员
        val map1 = it3.toList.groupBy(_._1) //按商家id分组
        val map2 = map1.mapValues(l=>{
            val lst1 = l.flatMap(_._2.split(","))   //切分评价串,并炸开为单个评价
            val map3 = lst1.groupBy(s=>s)   //按单个评价分组
            val map4 = map3.map(t=>(t._1,t._2.size))    //  构造元组,( 评价,评价个数 )
            val lst2 = map4.toList
            val lst3 = lst2.sortBy(-_._2)  //按照评价个数倒排
            lst3.take(5)    //取出前5个
        })

        val lst33 = map2.toList.sortBy(-_._2(0)._2) //总倒排
        lst33.foreach(println)
    }
}
//方式2、pom中引入依赖
<dependencies> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency> </dependencies>
//方式2、解析json工具类
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import java.util.ArrayList;
import java.util.List;

public class JSONUtil {
    private JSONUtil(){}

    public static List<String> parseJson(String line) {

        List<String> list = new ArrayList<String>();

        JSONObject jsonObject = JSON.parseObject(line);
        JSONArray extInfoList = jsonObject.getJSONArray("extInfoList");

        if(extInfoList != null && extInfoList.size() != 0){

            for (Object o : extInfoList) {
                JSONObject jo = (JSONObject)o;
                if(jo.get("title").equals("contentTags")){
                    JSONArray values = jo.getJSONArray("values");
                    for (Object value : values) {
                        list.add(value.toString());
                    }
                }
            }
        }
        return list;

    }
}
渐变 --> 突变
原文地址:https://www.cnblogs.com/lybpy/p/9742598.html