K近邻算法

import  numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn import datasets

raw_data_X = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]
]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

print(X_train)
print(X_train[True,1])
print("888888888")
print(X_train[y_train==0,1])
print("eeeeeeeeeeeee")
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
#plt.show()
print(X_train)
x = np.array([8.093607318,3.365731514])

plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
plt.scatter(x[0],x[1],color='b')
plt.show()

from math import sqrt
#使用欧拉距离计算距离
distances = [sqrt(np.sum((x_train-x)**2)) for x_train in X_train]
print(distances)
nearest = np.argsort(distances)
print(nearest)
print(nearest[:5])
k = 6
topK_y = [y_train[i] for i in nearest[:k]]
print(topK_y)
from collections import Counter
votes = Counter(topK_y)
print(votes)
predict_y = votes.most_common(1)[0][0]
print(predict_y)
原文地址:https://www.cnblogs.com/heguoxiu/p/10135647.html