机器学习 k-临近算法

程序清单一:

from numpy import *
import operator

def creatDataSet():
    group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ["A","A","B","B"]
    return group,labels
    
def classify0(inX,dataSet,labels,k):
     dataSetSize = dataSet.shape[0]
     diffMat = tile(inX,(dataSetSize,1)) - dataSet
     sqDiffMat = diffMat**2
     sqDistances = sqDiffMat.sum(axis = 1)
     distances = sqDistances **0.5
     sortedDistIndicis = distances.argsort()
     print(sortedDistIndicis)
     classCount = {}
     for i in range(k):
         voteIlabel = labels[sortedDistIndicis[i]]
         print(voteIlabel)
         classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
         print(classCount)
     print(classCount)
     sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1),reverse = True)
     print(sortedClassCount)
     print(sortedClassCount[0][0])
     return sortedClassCount[0][0]
group,labels = creatDataSet()
classify0([0,0],group,labels,3)    

源代码中print是为了便于理解代码。

numpy 库中 :tile函数用法 :http://www.cnblogs.com/zibu1234/p/4210521.html

sorted:http://blog.163.com/zhuandi_h/blog/static/1802702882012111284632184/

python iteritems(),itemgetter(),sorted():http://blog.csdn.net/u013713637/article/details/39521187  

原文地址:https://www.cnblogs.com/jackzone/p/6079395.html