euclidean distance

# A dictionary of movie critics and their ratings of a small
# set of movies

from math import sqrt

critics = { 'Lisa Rose': { 'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
'Just My Luck':3.0, 'Superman Returens':3.5, 'You, Me and Dupree': 2.5,
'The Night Listener': 3.0 },
'Gene Seymour': { 'Lady in the Water':3.0,  'Snakes on a Plane':3.5,
'Just My Luck':1.5, 'Superman Returns':5.0, 'The Night Listener':3.0,
'You, Me and Dupree':3.5 },
'Michael Phillips': { 'Lady in the Water':2.5, 'Snake on a Pleane':3.0,
'Superman Returns':35, 'The Night Listener':4.0 },
'Claudia Puig': { 'Snakes on a Plane':3.5, 'Just My Luck':3.0,
'The Night Listener':4.5, 'Superman Returns':4.0,
'You, Me and Dupree':2.5 },
'Mick LaSalle': { 'Lady in the water':3.0, 'Snakes on a Plane':4.0,
'Just My Luck':2.0, 'Superman Returns':3.0, 'The Night Listener':3.0,
'You, Me and Dupree':2.0 },
'Jack Matthews': { 'Lady in the Water': 3.0, 'Snake on a Plane': 4.0,
'The Night Listener':3.0, 'Superman Returns':5.0, 'You, Me and Dupree':3.5},
'Toby': {'Snake on a Plane': 4.5, 'You, Me and Dupree':1.0, 'Superman Returns':4.0 }
}


#Returns a distance-based similarity score for person1 and person2
def sim_distance( prefs, person1, person2 ):

        #Get the list of shared_items
        si = {}
        for item in prefs[person1]:
                if item in prefs[person2]:
                        si[item] = 1

        #if they have no ratings in common, return 0
        if len(si) == 0: return 0

        #Add up the squares of all the differences
        sum_of_squares = sum( [pow(prefs[person1][item]-prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])

        return 1/( 1 + sum_of_squares )




if __name__ == "__main__":
        #print critics['Lisa Rose']['Lady in the Water']

        print sim_distance( critics, 'Lisa Rose', 'Gene Seymour')

  

原文地址:https://www.cnblogs.com/lxgeek/p/2239627.html