python实现简单kNN

注释写得很清楚了,熟悉了一下python的一些基本语法和numpy中的一些操作。

 1 from numpy import *
 2 import operator
 3 
 4 def createDataSet():
 5     # generate the samples and labels.
 6     group = array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]])
 7     labels = ['A', 'A', 'B', 'B']
 8     print group
 9     return group, labels
10 
11 def classify(inX, dataSet, labels, k):
12     dataSetSize = dataSet.shape[0]                  # get the size of one dimension.
13                                 # calculate the distance between inX and samples.
14     diffMat = tile(inX, (dataSetSize, 1)) - dataSet # repeat inX to generate a dataSetSize * 1 matrix. Then subtract the corresponding number in dataSet.
15     sqDiffMat = diffMat ** 2                        # get the square of each D-value.
16     sqDistances = sqDiffMat.sum(axis=1)             # get the sum of each pair of numbers.
17     distances = sqDistances ** 0.5                  # get the square root of each sum. Those are distances between inX and samples.
18 
19     sortedDistIndicies = distances.argsort()        # return the index if 'distances' is sorted.
20     classCount = {}                                 # make a directory {label:display times}.
21     for i in range(k):                                                  # get first kth nearest samples.
22         voteIlabel = labels[sortedDistIndicies[i]]                      # get the ith's label.
23         classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1      # count the number of this label.
24     sortedClassCount = sorted(classCount.iteritems(),                   # get the most frequent label.
25                               key=operator.itemgetter(1), reverse=True)
26     return sortedClassCount[0][0]                                       # return the most frequent label.
27 
28 dataSet, labels = createDataSet()
29 print classify([-100.0,-100.1], dataSet, labels, 1)
原文地址:https://www.cnblogs.com/kirai/p/5656630.html