借鉴了老师的代码,但是没有封装
效果还是挺好的 average accuracy>90%
1 from random import shuffle 2 import numpy as np 3 from collections import Counter 4 5 6 def read(path: str) -> tuple: 7 f = open(path, "r") 8 text = f.readlines() 9 f.close() 10 X, y = [], [] 11 class_map, class_idx = {}, 0 12 for row in text: 13 row = row.strip() 14 if len(row) == 0: 15 continue 16 items = row.split(",") 17 X.append([float(item) for item in items[:-1]]) 18 if items[-1] not in class_map: 19 class_idx += 1 20 class_map[items[-1]] = class_idx 21 y.append(class_map[items[-1]]) 22 return X, y, len(X[0]), len(y), class_map, class_idx 23 24 25 def randomIdArray(n: int) -> list: 26 array = [i for i in range(n)] 27 shuffle(array) 28 return array 29 30 31 def splitData(X, y, n, m, rate: float) -> tuple: 32 if not (1 < rate * n and rate <= 1.0): 33 print("Please input rate (0,1] and ensure train size is not empty!") 34 return 0, 0, 0, 0 35 train_size = int(rate * n) 36 train_X, train_y = [], [] 37 id = randomIdArray(n) 38 for _ in range(train_size): 39 i = id[_] 40 train_X.append(X[i]) 41 train_y.append(y[i]) 42 test_X, test_y = [], [] 43 for _ in range(train_size, n, 1): 44 i = id[_] 45 test_X.append(X[i]) 46 test_y.append(y[i]) 47 test_size = n - train_size 48 return train_X, train_y, train_size, test_X, test_y, test_size 49 50 51 def KNN(K: int, X, y, n, class_size): 52 predict_y = [] 53 for x in X: 54 distances = [] 55 for i in range(n): 56 distance = np.linalg.norm(np.array(X[i]) - np.array(x)) 57 distances.append([distance, y[i]]) 58 distances.sort(key=lambda x: x[0]) 59 60 dis = [distances[i][1] for i in range(K)] 61 dic = Counter(dis) 62 keyMaxValue = -1 63 for key in dic.keys(): 64 if keyMaxValue == -1: 65 keyMaxValue = key 66 elif dic[key] > dic[keyMaxValue]: 67 keyMaxValue = key 68 predict_y.append(keyMaxValue) 69 return predict_y 70 71 72 def KNN_training(train_X, train_y, train_size, min_K, learing_rate): 73 accuracys = [] 74 for K in range(min_K, int(train_size * learing_rate), 2): 75 predict_train_y = KNN(K, train_X, train_y, train_size, class_sz) 76 accuracy = (len([i for i in range(train_size) if train_y[i] == predict_train_y[i]]) / train_size) 77 accuracys.append([-accuracy, K]) 78 print("For train set K =", K, "accuracy =", accuracy) 79 accuracys.sort(key=lambda x: x[0]) 80 K = accuracys[0][1] 81 print("Training done K =", K) 82 return K 83 84 85 def predict(K, test_X, test_y, test_size, class_sz): 86 predict_test_y = KNN(K, test_X, test_y, test_size, class_sz) 87 accuracy = (len([i for i in range(test_size) if test_y[i] == predict_test_y[i]]) / test_size) 88 print("For test set K =", K, "accuracy =", accuracy) 89 90 91 if __name__ == '__main__': 92 X, y, m, n, class_map, class_sz = read("iris.data") 93 train_X, train_y, train_size, test_X, test_y, test_size = splitData(X, y, n, m, 0.75) 94 K = KNN_training(train_X, train_y, train_size, 5, 0.15) 95 predict(K, test_X, test_y, test_size, class_sz)