DBSCAN——python实现

# -*- coding: utf-8 -*-  
from matplotlib.pyplot import *
from collections import defaultdict
import random
import json
"""
    计算两点欧式距离的函数
"""
def dist(p1,p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** (0.5)


all_points = []
index = 1000
#use python build-in library to load the json file
flickr_data = json.load(file("Paris_points.json"))
for i in range(index):
    Coord = [flickr_data['latitudes'][i],flickr_data['longitudes'][i]]
    all_points.append(Coord)

"""
    设置E和minPts的值
"""    
E = 0.001
minPts = 7


"""
    随机产生100个直角坐标,测试用,测试时用E = 8, minPts = 8
"""
#all_points = []
# for i in range(100):
#     randCoord = [random.randint(1,50),random.randint(1,50)]
#     if not randCoord in all_points:
#         all_points.append(randCoord)


"""
    找出核心点
"""
other_points = []
core_points = []
plotted_points = []
for point in all_points:
    point.append(0)    #assign initial level 0,即定义核心点的类型,每个核心点作为一个中心
    total = 0
    for otherPoint in all_points:
        distance = dist(otherPoint,point)
        if distance <= E:
            total += 1

    if total > minPts:
        core_points.append(point)
        plotted_points.append(point)
    else:
        other_points.append(point)
    


"""
    找到边界点
"""
border_points = []
for core in core_points:
    for other in other_points:
        if dist(core,other) <= E:
            border_points.append(other)
            plotted_points.append(other)



"""
    完成分类的算法,给核心点都贴上标签
"""
cluster_label = 0

for point in core_points:
    if point[2] == 0:
        cluster_label += 1
        point[2] = cluster_label

    for point2 in plotted_points:
        distance = dist(point2, point)
        if point2[2] == 0 and distance <= E:
            #print point,point2
            point2[2] = point[2]


"""
    当所有的点都分配到相应的标签后,我们把同一簇的划分到一起
"""
cluster_list = defaultdict(lambda:[[],[]])
for point in plotted_points:
    cluster_list[point[2]][0].append(point[0])
    cluster_list[point[2]][1].append(point[1])

markers = ['+','*','.','d','^','v','>','<','p']
#markers = ['b.','g.','r.','c.','m.','y.','k.']


"""
    画出所有点的图
"""
figure(1)
allx = []
ally = []
for plot_point in all_points:
    allx.append(plot_point[0])
    ally.append(plot_point[1])
plot(allx, ally,"r.")
title("total points=" + str(len(all_points)) + " E =" + str(E) + " Min Points=" + str(minPts))


"""
    画出核心点的图
"""
figure(2)
i = 0
print cluster_list
for value in cluster_list:
    cluster = cluster_list[value]
    plot(cluster[0],cluster[1],markers[i])
    i = i % 8 + 1
    #i = i % 6 + 1
title(str(len(cluster_list)) + " clusters created with E = "+ str(E) + " Min Points=" + str(minPts))

"""
    画出噪音点的图
"""
figure(3)
noise_points = []
for point in all_points:
    if not point in core_points and not point in border_points:
        noise_points.append(point)
noisex = []
noisey = []
for point in noise_points:
    noisex.append(point[0])
    noisey.append(point[1])
plot(noisex,noisey,"x")


title("noise Points = "+ str(len(noise_points)) + " E ="+str(E)+" Min Points="+str(minPts))   
#axis((0,60,0,60))  
show()  
原文地址:https://www.cnblogs.com/GDUT-xiang/p/5714110.html