pagerank 算法入门

http://blog.csdn.net/midgard/article/details/7061721

这篇文章很不错。

 1 #include <vector>
 2 #include <set>
 3 #include <string>
 4 #include <iostream>
 5 
 6 using namespace std;
 7 
 8 // use graph store webpage, weight representlink times
 9 class Node {
10 public:
11     explicit Node(string name, double pr = 1) :
12             name_(name), page_rank_(pr) {
13     }
14 
15     ~Node() {
16         linkin_nodes_.clear();
17     }
18     void InsertLinkdInNode(Node* node) {
19         //如果没有链接
20         if (linkin_nodes_.find(node) == linkin_nodes_.end()) {
21             linkin_nodes_.insert(node);
22         }
23         node->InsertLinkOutNode(this);
24     }
25 
26     void InsertLinkOutNode(Node* node) {
27         //如果没有链接
28         if (linkout_nodes_.find(node) == linkout_nodes_.end()) {
29             linkout_nodes_.insert(node);
30         }
31     }
32 
33     void InsertLinkdInNode1(Node* node) {
34         //如果没有链接
35         if (linkin_nodes_.find(node) == linkin_nodes_.end()) {
36             linkin_nodes_.insert(node);
37         }
38 
39     }
40 
41     void InsertLinkOutNode1(Node* node) {
42         //如果没有链接
43         if (linkout_nodes_.find(node) == linkout_nodes_.end()) {
44             linkout_nodes_.insert(node);
45         }
46         node->InsertLinkdInNode1(this);
47     }
48 
49     double GetPageRank() {
50         return page_rank_;
51     }
52 
53     void SetPageRank(double pr) {
54         page_rank_ = pr;
55     }
56 
57     double CalcRank() {
58         double pr = 0;
59         set<Node*>::const_iterator citr = linkin_nodes_.begin();
60         for (; citr != linkin_nodes_.end(); ++citr) {
61             Node * node = *citr;
62             pr += node->GetPageRank() / node->GetOutBoundNum();
63         }
64         return pr;
65     }
66 
67     size_t GetOutBoundNum() {
68         return linkout_nodes_.size();
69     }
70 
71     size_t GetInBoundNum() {
72         return linkin_nodes_.size();
73     }
74 
75     void PrintNode() {
76         cout << "Node:" << name_ << " 's pagerank is: " << page_rank_ << endl;
77     }
78 private:
79     string name_;
80     set<Node*> linkin_nodes_;
81     set<Node*> linkout_nodes_;
82     double page_rank_;
83 };
84 
85 class PageRank {
86 public:
87     PageRank(double q = 0.85);
88     ~PageRank(void);
89     void Calc(vector<Node*> & nodes, int n);
90     double Calc(Node* node);
91     void PrintPageRank(vector<Node*> & nodes);
92 private:
93     double q_; //阻尼系数
94 };
 1 #include "PageRank.h"
 2 #include <iostream>
 3 
 4 PageRank::PageRank(double q) :
 5         q_(q) {
 6     // q_ must < 1
 7 }
 8 
 9 PageRank::~PageRank(void) {
10 }
11 
12 // 迭代计算n次
13 void PageRank::Calc(vector<Node*> & nodes, int n) {
14     for (int i = 0; i < n; ++i) {
15         vector<Node*>::const_iterator citr = nodes.begin();
16         for (; citr != nodes.end(); ++citr) {
17             Node * node = *citr;
18             Calc(node);
19         }
20     }
21 }
22 
23 void PageRank::PrintPageRank(vector<Node*> & nodes) {
24     double total_pr = 0;
25     vector<Node*>::const_iterator citr = nodes.begin();
26     for (; citr != nodes.end(); ++citr) {
27         Node * node = *citr;
28         node->PrintNode();
29         total_pr += node->GetPageRank();
30     }
31     cout << "Total PR:" << total_pr << endl;
32 }
33 
34 double PageRank::Calc(Node * node) {
35     double pr = node->CalcRank();
36     if (pr < 0.00000000000000000000001 && pr > -0.00000000000000000000001) //pr == 0
37             {
38         pr = 1 - q_;
39     } else {
40         pr = pr * q_ + 1 - q_;
41     }
42     node->SetPageRank(pr);
43     return pr;
44 }
 1 #include <iostream>
 2 #include <vector>
 3 #include <string>
 4 #include <map>
 5 #include "PageRank.h"
 6 
 7 using namespace std;
 8 
 9 void InitGraph(vector<Node*> & nodes) { // 邻接表存储方式
10                                         // example 1
11     Node * a = new Node("A");
12     Node * b = new Node("B");
13     Node * c = new Node("C");
14     Node * d = new Node("D");
15     nodes.push_back(a);
16     nodes.push_back(b);
17     nodes.push_back(c);
18     nodes.push_back(d);
19     // link in node
20     // a <- b, c, d
21     a->InsertLinkdInNode(b);
22     a->InsertLinkdInNode(c);
23     a->InsertLinkdInNode(d);
24     // b <- d
25     d->InsertLinkOutNode1(b);
26 //b->InsertLinkdInNode(d);
27     // c <- b, d
28     c->InsertLinkdInNode(b);
29     c->InsertLinkdInNode(d);
30 }
31 
32 void TestPageRank() {
33     // build graph
34     vector<Node*> nodes;
35     InitGraph(nodes);
36     PageRank pr;
37     // 迭代计算5次 pagerank
38     pr.Calc(nodes, 80);
39     pr.PrintPageRank(nodes);
40 }
41 
42 int main(int argc, const char ** argv) {
43     TestPageRank();
44     return 0;
45 }
原文地址:https://www.cnblogs.com/kakamilan/p/2654273.html