用sklearn.cluster对列表数据聚类

一、按聚类数进行聚类

def list_cluster(data, n_cluster):
    new_data = [[i, 1] for i in data]
    new_data = np.array(new_data)
    cluster_rst = AgglomerativeClustering(n_clusters=n_cluster, affinity='euclidean', linkage='ward').fit_predict(new_data)

    return_data = []
    for i in range(n_cluster):
        subData = new_data[cluster_rst == i]
        return_data.append(list(subData[:, 0]))

    return return_data

print(list_cluster([1,2,3,53,1,23], 2))

输出：

[[1, 2, 3, 1, 23], [53]]

print(list_cluster([1,2,3,53,1,23], 3))

输出：

[[1, 2, 3, 1], [53], [23]]

二、按差值聚类：

def list_cluster_gap(data, maxgap):
    data.sort()
    groups = [[data[0]]]
    for x in data[1:]:
        if abs(x - groups[-1][-1]) <= maxgap:
            groups[-1].append(x)
        else:
            groups.append([x])
    return groups

print(list_cluster_gap([1,2,3,53,1,23], 20))

输出：

[[1, 1, 2, 3, 23], [53]]