INT104-lab11 [聚类] [iris数据集] [K-means Algorithm]

[K-means Algorithm][3D子图]

由于K-means Algorithm是基于随机点选取的,

所以可能结果较差,甚至RE ! ! !

  1 import numpy as np
  2 import random
  3 import matplotlib.pyplot as plt
  4 from sklearn.manifold import TSNE
  5 
  6 
  7 def read(path: str) -> list:
  8     with open(path, "r") as f:
  9         text = f.readlines()
 10         D = []
 11         for row in text:
 12             features = str.split(row, ",")
 13             X = []
 14             for feature in features:
 15                 X.append(feature)
 16             if len(X) == 5:
 17                 D.append(X)
 18     return D
 19 
 20 
 21 def init(D) -> tuple:
 22     n, m = len(D), len(D[0]) - 1
 23     X, Y = [], []
 24     for i in range(n):
 25         x = []
 26         for j in range(m):
 27             x.append(float(D[i][j]))
 28         X.append(x)
 29         Y.append(str.split(D[i][m], "
")[0])
 30     return X, Y, n, m
 31 
 32 
 33 def randomDataset(seed: int, D) -> list:
 34     # set the random seed then the order is fixed and random
 35     # random.seed(seed)
 36     random.shuffle(D)
 37     return D
 38 
 39 
 40 def setPoints(K: int, X: list, n: int, m: int) -> tuple:
 41     minValues, maxValues = [], []
 42     for j in range(m):
 43         maxValue, minValue = -10000.0, 10000.0
 44         for i in range(n):
 45             maxValue = max(maxValue, X[i][j])
 46             minValue = min(minValue, X[i][j])
 47         maxValues.append(maxValue)
 48         minValues.append(minValue)
 49     P = []
 50     for i in range(K):
 51         X = []
 52         for j in range(m):
 53             X.append(random.uniform(minValues[j], maxValues[j]))
 54         P.append(X)
 55     return minValues, maxValues, P
 56 
 57 
 58 def getType(a, b, c):
 59     if a < b and a < b:
 60         return 1
 61     if b < a and b < c:
 62         return 2
 63     return 3
 64 
 65 
 66 eps = 1e-3
 67 
 68 
 69 def compare(P, newP, K, m):
 70     for i in range(K):
 71         for j in range(m):
 72             if abs(P[i][j] - newP[i][j]) > eps:
 73                 return False
 74     return True
 75 
 76 
 77 def G(A, m):
 78     n = len(A)
 79     sumA = [0 for _ in range(m)]
 80     for i in range(n):
 81         for j in range(m):
 82             sumA[j] += A[i][j]
 83     return [(x / n) for x in sumA]
 84 
 85 
 86 def getNewP(X, dis, n, m):
 87     A, B, C = [], [], []
 88     for i in range(n):
 89         if dis[i][0] == 1:
 90             A.append(X[i])
 91         elif dis[i][0] == 2:
 92             B.append(X[i])
 93         else:
 94             C.append(X[i])
 95     return [G(A, m), G(B, m), G(C, m)]
 96 
 97 
 98 def K_means_algorithm(K: int, X: list, Y: list, n: int, m: int):
 99     minValues, maxValues, P = setPoints(K, X, n, m)
100     K_distances = []
101 
102     print(minValues)
103     print(maxValues)
104     print(np.array(P))
105 
106     while True:
107 
108         for i in range(n):
109             dis1 = euclideanDistance(P[0], X[i], m)
110             dis2 = euclideanDistance(P[1], X[i], m)
111             dis3 = euclideanDistance(P[2], X[i], m)
112             Type = getType(dis1, dis2, dis3)
113             K_distances.append([Type, dis1, dis2, dis3])
114 
115         newP = getNewP(X, K_distances, n, m)
116 
117         if compare(P, newP, K, m):
118             break
119         P = newP
120         print(np.array(newP))
121         print("Yes")
122     return P, K_distances
123 
124 
125 def similarity(A, B, m) -> float:
126     Sigma_AixBi = 0
127     Sigma_Ai_Square = 0
128     Sigma_Bi_Square = 0
129     for i in range(m):
130         Sigma_AixBi += A[i] * B[i]
131         Sigma_Ai_Square += A[i] * A[i]
132         Sigma_Bi_Square += B[i] * B[i]
133     return Sigma_AixBi / (np.sqrt(Sigma_Ai_Square) * np.sqrt(Sigma_Bi_Square))
134 
135 
136 def euclideanDistance(A, B, m) -> float:
137     Sigma_Xi_Yi_square = 0
138     for i in range(m):
139         Sigma_Xi_Yi_square += (A[i] - B[i]) * (A[i] - B[i])
140     return np.sqrt(Sigma_Xi_Yi_square)
141 
142 
143 def answer(X, Y, dis, n):
144     x = np.array(X)
145     tsne = TSNE(n_components=3)
146     tsne.fit_transform(x)
147     one_x, one_y, one_z = [], [], []
148     two_x, two_y, two_z = [], [], []
149     three_x, three_y, three_z = [], [], []
150     _one_x, _one_y, _one_z = [], [], []
151     _two_x, _two_y, _two_z = [], [], []
152     _three_x, _three_y, _three_z = [], [], []
153     for i in range(n):
154         _x = tsne.embedding_[i][0]
155         _y = tsne.embedding_[i][1]
156         _z = tsne.embedding_[i][2]
157         if dis[i][0] == 1:
158             one_x.append(_x)
159             one_y.append(_y)
160             one_z.append(_z)
161         elif dis[i][0] == 2:
162             two_x.append(_x)
163             two_y.append(_y)
164             two_z.append(_z)
165         else:
166             three_x.append(_x)
167             three_y.append(_y)
168             three_z.append(_z)
169         if Y[i] == "Iris-setosa":
170             _one_x.append(_x)
171             _one_y.append(_y)
172             _one_z.append(_z)
173         elif Y[i] == "Iris-versicolor":
174             _two_x.append(_x)
175             _two_y.append(_y)
176             _two_z.append(_z)
177         else:
178             _three_x.append(_x)
179             _three_y.append(_y)
180             _three_z.append(_z)
181     # answer
182     fig = plt.figure(figsize=(12, 6), facecolor='w')
183     ax1 = fig.add_subplot(121, projection='3d')
184     plt.title('answer')
185     ax1.scatter(one_x, one_y, one_z)
186     ax1.scatter(two_x, two_y, two_z)
187     ax1.scatter(three_x, three_y, three_z)
188     # data
189     ax2 = fig.add_subplot(122, projection='3d')
190     plt.title('data')
191     ax2.scatter(_one_x, _one_y, _one_z)
192     ax2.scatter(_two_x, _two_y, _two_z)
193     ax2.scatter(_three_x, _three_y, _three_z)
194     plt.show()
195     print("Showing done")
196 
197 
198 if __name__ == '__main__':
199     dataset = read("iris.data")
200     dataset = randomDataset(17, dataset)
201     X, Y, n, m = init(dataset)
202     P, dis = K_means_algorithm(3, X, Y, n, m)
203 
204     print("Done!")
205     print(np.array(P))
206 
207     answer(X, Y, dis, n)
208     plt.show()

~~Jason_liu O(∩_∩)O
原文地址:https://www.cnblogs.com/JasonCow/p/14819150.html