GraphX的三大图算法

1. PageRank

http://blog.csdn.net/hguisu/article/details/7996185

2. Connected Components

3. Triangle Counting

例子:

users.txt

1,BarackObama,Barack Obama
2,ladygaga,Goddess of Love
3,jeresig,John Resig
4,justinbieber,Justin Bieber
6,matei_zaharia,Matei Zaharia
7,odersky,Martin Odersky
8,anonsys

followers.txt

2 1
4 1
1 2
6 3
7 3
7 6
6 7
3 7

算法实战:

package main.scala

import org.apache.spark.graphx.GraphLoader
import org.apache.spark.{SparkConf, SparkContext}

object graphx_algorism {
  System.setProperty("hadoop.home.dir","E:/zhuangji/winutil/")

  def main(args:Array[String]):Unit={

    val conf=new SparkConf().setMaster("local[2]").setAppName("graph_algorism").set("spark.cores.max","10") //set spark.cores.max 可以设置核数
    val sc=new SparkContext(conf)

    // graph初始化,从文件中读
    val graph=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt")
    val users=sc.textFile("E:/Java_WS/ScalaDemo/data/users.txt").map{
      line=>val fields=line.split(",")
        (fields(0).toLong,fields(1))
    }
    

    // 1.
    //PageRank
    val ranks=graph.pageRank(0.001).vertices  // 0.001 是PageRank 的参数,尚未知道是什么意思
    ranks.collect.foreach(println)
    val ranksByUsername=users.join(ranks).map{
      case(id,(username,rank))=>(username,rank)
    }
    println(ranksByUsername.collect().mkString("
"))
    

    //2.
    // Connected Components: LianTongTi
    val cc=graph.connectedComponents().vertices
    println(cc.collect)
    val ccByUsername=users.join(cc).map{
      case(id,(username,cc))=>(username,cc)
    }
    println(ccByUsername.collect().mkString("
"))
    

    //3.
    //Triangle Count
    val graphT=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt",true).partitionBy(PartitionStrategy.RandomVertexCut)
    val triCounts=graphT.triangleCount().vertices
    val triCountByUsername=users.join(triCounts).map{case(id,(username,tc))=>(username,tc)}
    println(triCountByUsername.collect().mkString("
"))

}

  

原文地址:https://www.cnblogs.com/skyEva/p/5916931.html