Spark算上下基线

def baseLine(sc: SparkContext): Unit = {
    println("--------------------baseLine start--------------------")
    var data1Rdd = sc.textFile("/test/baseLineTestData.txt")
    var map = HashMap[String, HashMap[String, collection.mutable.ArrayBuffer[Double]]]()
    
    val data1 = data1Rdd.collect()
    
    data1.map { line =>
      println("--------------------data1.foreach start--------------------")
      val parts = line.split('|')
      val ip = parts(0)
      val port = parts(1)
      val startTime = parts(2)
      val endTime = parts(3)
      val sun = parts(4).toDouble
      
      println("ip:"+ip)
      println("port:"+port)
      println("startTime:"+startTime)
      println("endTime:"+endTime)
      println("sun:"+sun)

      //ip+port,14:02 14:07 List
      //ip+port,15:02 15:07 List

      val key1 = ip + "_" + port
      println("key1:"+key1)

      val key2 = startTime.split(" ")(1) + "_" + endTime.split(" ")(1)
      println("key2:"+key2)

      var tmpMap = map.get(key1)

      if (tmpMap != null && tmpMap.size > 0) {
        println("--------------------map is not null--------------------")
        val sumArray = tmpMap.get(key2)
        if (sumArray != null) {
          sumArray += sun
        }
      } else {
        println("--------------------map is null--------------------")
        //如果当前Key不存在的话,是一个全新的Ip
        val sumArray = collection.mutable.ArrayBuffer[Double]()

        val secondMap = HashMap[String, collection.mutable.ArrayBuffer[Double]]()
        secondMap += (key2 -> sumArray)
        map += (key1 -> secondMap)
      }
    }
    
    println("--------------------get data is end--------------------")

    map.map(e => {
      println("--------------------Statistics start --------------------")
      val resultKey1 = e._1
      val resultVal1 = e._2
      println("resultKey1:" + resultKey1)
      resultVal1.foreach(f => {
        val resultKey2 = f._1
        val resultVal2 = f._2
        println("resultKey2:" + resultKey2)

        val dataArray = resultVal2.map(f => Vectors.dense(f))

        val summary: MultivariateStatisticalSummary = Statistics.colStats(sc.parallelize(dataArray))

        //
        println("--------------------mean:"+summary.mean+" --------------------")
        println("--------------------variance:"+summary.variance+" --------------------")
        
        println("--------------------mean apply 0:"+summary.mean.toArray.apply(0)+" --------------------")
        println("--------------------variance apply 0:"+summary.variance.apply(0)+" --------------------")
        
        val upbase = summary.mean.toArray.apply(0) + 1.960 * Math.sqrt(summary.variance.apply(0))
        val downbase = summary.mean.toArray.apply(0) - 1.960 * Math.sqrt(summary.variance.apply(0))
        println("------------------- " + upbase + " ---------- " + downbase)
        val df = new DecimalFormat(".##")
        val upbaseString = df.format(upbase)
        val downbaseString = df.format(downbase)
        //resultMap.put(key, value)
        val result3 = HashMap[Double, Double]()
        //result3 +=(upbase -> downbase)
        println("ip port:" + resultKey1 + ",time:" + resultKey2 + ",upbase:" + upbase + ",downbase:" + downbase)
      })
    })
    
    println("--------------------baseLine end --------------------")

  }

需求:计算某一个IP的端口在某一个时间点的流量上下基线

数据样例:

192.168.10.110|8080|2015-10-14 14:02|2015-10-14 14:07|3210981
192.168.10.110|8080|2015-10-13 14:02|2015-10-13 14:07|3210881
192.168.10.110|8080|2015-10-12 14:02|2015-10-12 14:07|3210781
192.168.10.110|8080|2015-10-11 14:02|2015-10-11 14:07|3210681
192.168.10.110|8080|2015-10-10 14:02|2015-10-10 14:07|3210581
192.168.10.110|8080|2015-10-09 14:02|2015-10-09 14:07|3210481
192.168.10.110|8080|2015-10-08 14:02|2015-10-08 14:07|3210381
192.168.10.110|8080|2015-10-07 14:02|2015-10-07 14:07|3210281
192.168.10.110|8080|2015-10-06 14:02|2015-10-06 14:07|3210181
192.168.10.110|8080|2015-10-05 14:02|2015-10-05 14:07|3210081
192.168.10.110|8080|2015-10-04 14:02|2015-10-04 14:07|3219981
192.168.10.110|8080|2015-10-03 14:02|2015-10-03 14:07|3218981
192.168.10.110|8080|2015-10-02 14:02|2015-10-02 14:07|3217981
192.168.10.110|8080|2015-10-01 14:02|2015-10-01 14:07|3216981
192.168.10.110|8080|2015-09-30 14:02|2015-09-30 14:07|3215981
192.168.10.110|8080|2015-09-29 14:02|2015-09-29 14:07|3214981
192.168.10.110|8080|2015-09-28 14:02|2015-09-28 14:07|3213981
192.168.10.110|8080|2015-09-27 14:02|2015-09-27 14:07|3212981
192.168.10.110|8080|2015-09-26 14:02|2015-09-26 14:07|3211981
192.168.10.110|8080|2015-09-25 14:02|2015-09-25 14:07|3220981
192.168.10.110|8080|2015-09-24 14:02|2015-09-24 14:07|3230981
192.168.10.110|8080|2015-09-23 14:02|2015-09-23 14:07|3240981
192.168.10.110|8080|2015-09-22 14:02|2015-09-22 14:07|3250981
192.168.10.110|8080|2015-09-21 14:02|2015-09-21 14:07|3260981
192.168.10.110|8080|2015-09-20 14:02|2015-09-20 14:07|3270981
192.168.10.110|8080|2015-09-19 14:02|2015-09-19 14:07|3280981
192.168.10.110|8080|2015-09-18 14:02|2015-09-18 14:07|3290981
192.168.10.110|8080|2015-09-17 14:02|2015-09-17 14:07|3210982
192.168.10.110|8080|2015-09-16 14:02|2015-09-16 14:07|3210983
192.168.10.110|8080|2015-09-15 14:02|2015-09-15 14:07|3210984
192.168.10.110|8080|2015-09-14 14:02|2015-09-14 14:07|3210985
192.168.10.110|8080|2015-09-13 14:02|2015-09-13 14:07|3210986
192.168.10.110|8080|2015-09-12 14:02|2015-09-12 14:07|3210987
192.168.10.110|8080|2015-09-11 14:02|2015-09-11 14:07|3210988
192.168.10.110|8080|2015-09-10 14:02|2015-09-10 14:07|3110989
192.168.10.110|8080|2015-09-09 14:02|2015-09-09 14:07|3210981
192.168.10.110|8080|2015-09-07 14:02|2015-09-07 14:07|3310981
192.168.10.110|8080|2015-09-06 14:02|2015-09-06 14:07|3410981
192.168.10.110|8080|2015-09-05 14:02|2015-09-05 14:07|2510981
192.168.10.110|8081|2015-10-14 14:02|2015-10-14 14:07|2210981
192.168.10.110|8081|2015-10-13 14:02|2015-10-13 14:07|2210881
192.168.10.110|8081|2015-10-12 14:02|2015-10-12 14:07|2210781
192.168.10.110|8081|2015-10-11 14:02|2015-10-11 14:07|2210681
192.168.10.110|8081|2015-10-10 14:02|2015-10-10 14:07|2210581
192.168.10.110|8081|2015-10-09 14:02|2015-10-09 14:07|2210481
192.168.10.110|8081|2015-10-08 14:02|2015-10-08 14:07|2210381
192.168.10.110|8081|2015-10-07 14:02|2015-10-07 14:07|2210281
192.168.10.110|8081|2015-10-06 14:02|2015-10-06 14:07|2210181
192.168.10.110|8081|2015-10-05 14:02|2015-10-05 14:07|2210081
192.168.10.110|8081|2015-10-04 14:02|2015-10-04 14:07|2219981
192.168.10.110|8081|2015-10-03 14:02|2015-10-03 14:07|2218981
192.168.10.110|8081|2015-10-02 14:02|2015-10-02 14:07|2217981
192.168.10.110|8081|2015-10-01 14:02|2015-10-01 14:07|2216981
192.168.10.110|8081|2015-09-30 14:02|2015-09-30 14:07|2215981
192.168.10.110|8081|2015-09-29 14:02|2015-09-29 14:07|2214981
192.168.10.110|8081|2015-09-28 14:02|2015-09-28 14:07|2213981
192.168.10.110|8081|2015-09-27 14:02|2015-09-27 14:07|2212981
192.168.10.110|8081|2015-09-26 14:02|2015-09-26 14:07|2211981
192.168.10.110|8081|2015-09-25 14:02|2015-09-25 14:07|2220981
192.168.10.110|8081|2015-09-24 14:02|2015-09-24 14:07|2230981
192.168.10.110|8081|2015-09-23 14:02|2015-09-23 14:07|2240981
192.168.10.110|8081|2015-09-22 14:02|2015-09-22 14:07|2250981
192.168.10.110|8081|2015-09-21 14:02|2015-09-21 14:07|2260981
192.168.10.110|8081|2015-09-20 14:02|2015-09-20 14:07|2270981
192.168.10.110|8081|2015-09-19 14:02|2015-09-19 14:07|2280981
192.168.10.110|8081|2015-09-18 14:02|2015-09-18 14:07|2290981
192.168.10.110|8081|2015-09-17 14:02|2015-09-17 14:07|2210982
192.168.10.110|8081|2015-09-16 14:02|2015-09-16 14:07|2210983
192.168.10.110|8081|2015-09-15 14:02|2015-09-15 14:07|2210984
192.168.10.110|8081|2015-09-14 14:02|2015-09-14 14:07|2210985
192.168.10.110|8081|2015-09-13 14:02|2015-09-13 14:07|2210986
192.168.10.110|8081|2015-09-12 14:02|2015-09-12 14:07|2210987
192.168.10.110|8081|2015-09-11 14:02|2015-09-11 14:07|2210988
192.168.10.110|8081|2015-09-10 14:02|2015-09-10 14:07|2110989
192.168.10.110|8081|2015-09-09 14:02|2015-09-09 14:07|2210981
192.168.10.110|8081|2015-09-07 14:02|2015-09-07 14:07|2310981
192.168.10.110|8081|2015-09-06 14:02|2015-09-06 14:07|2410981
192.168.10.110|8081|2015-09-05 14:02|2015-09-05 14:07|2510981

  

原文地址:https://www.cnblogs.com/qq27271609/p/4883086.html