C均值算法实现

###总结

1、np.array是没有空数组的，需要先成立一个空的list，给他添加元素，然后将它转化为np.array的格式

2、直接使两个center相等，以后他们会一直相等，因为使用的是同一个指针。

###源程序

import numpy as np
import matplotlib.pyplot as plt

dataset = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [2, 1],
                   [1, 2], [2, 2], [3, 2], [6, 6], [7, 6],
                   [8, 6], [6, 7], [7, 7], [8, 7], [9, 7],
                   [7, 8], [8, 8], [9, 8], [8, 9], [9, 9]])

plt.figure(1)
plt.scatter(dataset[:, 0], dataset[:, 1])

C = 2
C0 = []
C1 = []
center = dataset[0:C, :]
center = center.astype(float)
center_ = np.copy(center) #注意不能直接等于哦
while 1:

    #使用欧氏距离聚类
    for xi in dataset:
        if np.linalg.norm(center[0, :] - xi) < np.linalg.norm(center[1, :] - xi):
            C0.append(xi)
        else:
            C1.append(xi)

    #计算聚类中心
    C0 = np.array(C0)
    C1 = np.array(C1)
    center_[0] = np.mean(C0, axis=0)
    center_[1] = np.mean(C1, axis=0)

    #判断是否达到最优
    if np.sum(np.abs(center-center_)) <= 0.0:
        break

    #更新聚类中心
    center = center_
    C0 = []
    C1 = []

plt.figure(2)
plt.scatter(C0[:, 0], C0[:, 1], marker='o', color='b')
plt.scatter(C1[:, 0], C1[:, 1], marker='o', color='r')
plt.show()