皮尔逊相关度评价(Pearson correlation Score)

 1 def sim_pearson(prefs, person1, person2):
 2     si = {}
 3 
 4     for it in prefs[person1]:
 5         if it in prefs[person2]:
 6             si[it] = 1
 7 
 8     n = len(si)
 9     if n == 0: return 0
10 
11     sum1 = sum([prefs[person1][it] for it in si])
12     sum2 = sum([prefs[person2][it] for it in si])
13 
14     square_sum1 = sum([prefs[person1][it] ** 2 for it in si])
15     square_sum2 = sum([prefs[person2][it] ** 2 for it in si])
16 
17     p_sum = sum([prefs[person1][it] * prefs[person2][it] for it in si])
18 
19     num = p_sum - (sum1 * sum2 / n)
20     den = sqrt((square_sum1 - sum1 ** 2 / n) * (square_sum2 - sum2 ** 2 / n))
21     if den == 0: return 0
22 
23     r = num / den
24     return r
25 
26 print sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
原文地址:https://www.cnblogs.com/tuwenmin/p/3026202.html