MaxDICluster

   1 import pickle
   2 import numpy as np
   3 import pandas as pd
   4 import networkx as nx
   5 import geopandas as gpd
   6 import scipy.sparse as sp
   7 import matplotlib.pyplot as plt
   8 from scipy.spatial import cKDTree
   9 import xml.etree.cElementTree as et
  10 from joblib import Parallel, delayed
  11 from shapely.geometry import Point, MultiLineString, Polygon
  12 from sklearn.metrics.pairwise import pairwise_kernels, rbf_kernel
  13 from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph
  14 
  15 # get coo_matrix from G.
  16 def get_coo_matrix_from_G(G, weight=None):
  17     from scipy.sparse import coo_matrix
  18     row = np.array([u for u, _ in G.edges(data=False)])
  19     col = np.array([v for _, v in G.edges(data=False)])
  20     if weight is not None:
  21         data = np.array([w['weight'] for _, __, w in G.edges(data=True)])
  22     else:
  23         data = np.ones(shape=row.shape, dtype=np.float)
  24     coo_mat = coo_matrix((data, (row, col)), dtype=np.float)
  25     row = coo_mat.row
  26     col = coo_mat.col
  27     data = coo_mat.data
  28     return row, col, data
  29 
  30 def get_Vigidegi(row, col, data, ndq):
  31     deg_ndq = {}  # ndq degrees
  32     nodes = []
  33     weights = []
  34     for nd in ndq:
  35         index_row = np.where(row == nd)
  36         index_col = np.where(col == nd)
  37         u = list(row[index_col]) + list(col[index_row])
  38         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
  39         w = list(data[index_data])
  40         deg_ndq[nd] = np.sum(w)
  41         nodes += u
  42         weights += w
  43     for nd in ndq:
  44         for _ in range(nodes.count(nd)):
  45             nodes.remove(nd)
  46     # V_G = np.sum(data) * 2.0
  47     deg_i = deg_ndq
  48     gi = len(nodes)
  49     Vi = np.sum(weights)
  50     return Vi, gi, deg_i
  51 
  52 def get_Vgdeg(row, col, data, ndq_a, ndq_b):
  53     ndq = ndq_a + ndq_b
  54     L_X, L_Y, L_XY = len(ndq_a), len(ndq_b), len(ndq)
  55     deg_ndq = {}  # ndq degrees
  56     nodes_a, weights_a = [], []
  57     nodes_b, weights_b = [], []
  58     nodes, weights = [], []
  59     for nd in ndq[:L_X]:
  60         index_row = np.where(row == nd)
  61         index_col = np.where(col == nd)
  62         u = list(row[index_col]) + list(col[index_row])
  63         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
  64         w = list(data[index_data])
  65         deg_ndq[nd] = np.sum(w)
  66         nodes += u
  67         weights += w
  68     nodes_a += nodes
  69     weights_a += weights
  70     for nd in ndq[L_X:]:
  71         index_row = np.where(row == nd)
  72         index_col = np.where(col == nd)
  73         u = list(row[index_col]) + list(col[index_row])
  74         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
  75         w = list(data[index_data])
  76         deg_ndq[nd] = np.sum(w)
  77         nodes += u
  78         weights += w
  79     nodes_b += nodes[len(nodes_a):len(nodes)]
  80     weights_b += weights[len(weights_a):len(weights)]
  81     for nd in ndq[:L_X]:
  82         for _ in range(nodes.count(nd)):
  83             nodes.remove(nd)
  84         for _ in range(nodes_a.count(nd)):
  85             nodes_a.remove(nd)
  86     for nd in ndq[L_X:]:
  87         for _ in range(nodes.count(nd)):
  88             nodes.remove(nd)
  89         for _ in range(nodes_b.count(nd)):
  90             nodes_b.remove(nd)
  91     # V_G = np.sum(data) * 2.0
  92     deg_ij = deg_ndq
  93     deg_i = {nd: deg_ndq[nd] for nd in ndq_a}
  94     deg_j = {nd: deg_ndq[nd] for nd in ndq_b}
  95     gi = len(nodes_a)
  96     Vi = np.sum(weights_a)
  97     gj = len(nodes_b)
  98     Vj = np.sum(weights_b)
  99     gij = len(nodes)
 100     Vij = np.sum(weights)
 101     return Vi, Vj, Vij, gi, gj, gij, deg_i, deg_j, deg_ij
 102 
 103 def deltDI_ij(row, col, data, ndq_a, ndq_b):
 104     ndq = ndq_a + ndq_b
 105     L_X, L_Y, L_XY = len(ndq_a), len(ndq_b), len(ndq)
 106     deg_ndq = {}  # ndq degrees
 107     nodes_a, weights_a = [], []
 108     nodes_b, weights_b = [], []
 109     nodes, weights = [], []
 110     for nd in ndq[:L_X]:
 111         index_row = np.where(row == nd)
 112         index_col = np.where(col == nd)
 113         u = list(row[index_col]) + list(col[index_row])
 114         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
 115         w = list(data[index_data])
 116         deg_ndq[nd] = np.sum(w)
 117         nodes += u
 118         weights += w
 119     nodes_a += nodes
 120     weights_a += weights
 121     for nd in ndq[L_X:]:
 122         index_row = np.where(row == nd)
 123         index_col = np.where(col == nd)
 124         u = list(row[index_col]) + list(col[index_row])
 125         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
 126         w = list(data[index_data])
 127         deg_ndq[nd] = np.sum(w)
 128         nodes += u
 129         weights += w
 130     nodes_b += nodes[len(nodes_a):len(nodes)]
 131     weights_b += weights[len(weights_a):len(weights)]
 132 
 133     for nd in ndq[:L_X]:
 134         for _ in range(nodes.count(nd)):
 135             nodes.remove(nd)
 136         for _ in range(nodes_a.count(nd)):
 137             nodes_a.remove(nd)
 138     for nd in ndq[L_X:]:
 139         for _ in range(nodes.count(nd)):
 140             nodes.remove(nd)
 141         for _ in range(nodes_b.count(nd)):
 142             nodes_b.remove(nd)
 143     # deg_ij = deg_ndq
 144     # deg_i = {nd: deg_ndq[nd] for nd in ndq_a}
 145     # deg_j = {nd: deg_ndq[nd] for nd in ndq_b}
 146     V_G = np.sum(data) * 2.0
 147     g_i = len(nodes_a)
 148     V_i = np.sum(weights_a)
 149     g_j = len(nodes_b)
 150     V_j = np.sum(weights_b)
 151     g_ij = len(nodes)
 152     V_ij = np.sum(weights)
 153 
 154     if V_i < 1e-5:
 155         V_i = 1.0
 156     if V_j < 1e-5:
 157         V_j = 1.0
 158     if V_ij < 1e-5:
 159         V_ij = 1.0
 160     if V_G < 1e-5:
 161         V_G = 1.0
 162 
 163     delt = -(V_i - g_i) * np.log2(V_i) - (V_j - g_j) * np.log2(V_j) 
 164            + (V_ij - g_ij) * np.log2(V_ij) 
 165            + (V_i + V_j - V_ij - g_i - g_j + g_ij) * np.log2(V_G)
 166 
 167     return delt / V_G
 168 
 169 def parts_DI(row, col, data, ndq):
 170     deg_ndq = {}  # ndq degrees
 171     nodes = []
 172     weights = []
 173     for nd in ndq:
 174         index_row = np.where(row == nd)
 175         index_col = np.where(col == nd)
 176         u = list(row[index_col]) + list(col[index_row])
 177         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
 178         w = list(data[index_data])
 179         deg_ndq[nd] = np.sum(w)
 180         nodes += u
 181         weights += w
 182     for nd in ndq:
 183         for _ in range(nodes.count(nd)):
 184             nodes.remove(nd)
 185     V_G = np.sum(data) * 2.0
 186     # deg_i = deg_ndq
 187     gi = len(nodes)
 188     Vi = np.sum(weights)
 189     V_div = Vi / V_G
 190     V_div_hat = (Vi - gi) / V_G
 191 
 192     return 0.0 if V_div < 1e-5 else -V_div_hat * np.log2(V_div)
 193 
 194 def producer(N):
 195     ndi = N[0]
 196     N.remove(ndi)
 197     for ndj in N:
 198         yield list(ndi), list(ndj)
 199 
 200 def partition_producer(partition):
 201     for L in partition:
 202         yield L
 203 
 204 def pLog2p(p_i, eps=1e-10):
 205     ind = np.where(p_i < eps)
 206     if len(ind) > 0:
 207         p_i[p_i < eps] = 1.0
 208         return p_i * np.log2(p_i)
 209     else:
 210         return p_i * np.log2(p_i)
 211 
 212 def Log2p(p_i, eps=1e-10):
 213     ind = np.where(p_i < eps)
 214     if len(ind) > 0:
 215         p_i[p_i < eps] = 1.0
 216         return np.log2(p_i)
 217     else:
 218         return np.log2(p_i)
 219 
 220 # oneStruInforEntropy
 221 def get_oneStruInforEntropy(G, weight=None):
 222     G_du = G.degree(weight=weight)
 223     G_volume = sum(G_du[index_node] for index_node in G.nodes)
 224     G_pu_dic = {index_node: G_du[index_node] * 1.0 / (1.0 * G_volume) for index_node in G.nodes}
 225     G_pu = [G_pu_dic[index_node] for index_node in G.nodes]
 226     # Shonnon Entropy
 227     HP_G_Shonnon = sum(pLog2p(np.array(G_pu))) * (-1.0)
 228     return HP_G_Shonnon
 229 
 230 # StruInforEntropy
 231 def StruInforEntropy(G, partition, weight=None):
 232     # nodes = G.nodes
 233     # n = G.number_of_nodes()
 234     # m = G.number_of_edges()
 235     sub_set = partition.copy()
 236     degree = G.degree(weight=weight)
 237     G_volume = sum(degree[index_node] for index_node in G.nodes)
 238     Vij, gij, deg_ij = [], [], []
 239     for ind in range(len(sub_set)):
 240         sub_degree = 0
 241         dij = []
 242         for node in sub_set[ind]:
 243             sub_degree += degree[node]
 244             dij.append(degree[node])
 245         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
 246         Vij.append(sub_degree)
 247         gij.append(gj_c)
 248         deg_ij.append(np.array(dij))
 249     gij = np.array(gij, dtype=float)
 250     Vij = np.array(Vij, dtype=float)
 251     p_i = [deg_ij[i] / Vij[i] for i in range(len(Vij))]
 252     pLogp = [pLog2p(pi, eps=1e-10) for pi in p_i]
 253     sum_pLogp = np.array([np.sum(plp) for plp in pLogp], dtype=float)
 254 
 255     first = np.sum((-1.0) * Vij / (G_volume) * sum_pLogp)
 256     second = np.sum((-1.0) * gij / (G_volume) * Log2p(Vij / (G_volume)))
 257 
 258     HG = first + second
 259 
 260     return HG, Vij, gij, deg_ij
 261 
 262 # StruInforEntropy
 263 def get_oneStruInforEntropy_Partition(G, partition, weight=None):
 264     # nodes = G.nodes
 265     # n = G.number_of_nodes()
 266     # m = G.number_of_edges()
 267     sub_set = partition.copy()
 268     degree = G.degree(weight=weight)
 269     G_volume = sum(degree[index_node] for index_node in G.nodes)
 270     Vij, gij, deg_ij = [], [], []
 271     for ind in range(len(sub_set)):
 272         sub_degree = 0
 273         dij = []
 274         for node in sub_set[ind]:
 275             sub_degree += degree[node]
 276             dij.append(degree[node])
 277         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
 278         Vij.append(sub_degree)
 279         gij.append(gj_c)
 280         deg_ij.append(np.array(dij))
 281     gij = np.array(gij, dtype=float)
 282     Vij = np.array(Vij, dtype=float)
 283     p_i = [deg_ij[i] / Vij[i] for i in range(len(Vij))]
 284     pLogp = [pLog2p(pi, eps=1e-10) for pi in p_i]
 285     sum_pLogp = np.array([np.sum(plp) for plp in pLogp], dtype=float)
 286 
 287     first = np.sum((-1.0) * Vij / (G_volume) * sum_pLogp)
 288     second = np.sum((-1.0) * pLog2p(Vij / G_volume))
 289 
 290     oneHG = first + second
 291 
 292     return oneHG, Vij, gij, deg_ij
 293 
 294 # # test: 2D-StruInforEntropy
 295 # partition = [{0, 1, 2, 3}, {4, 5, 6, 7, 8}, {9, 10, 11, 12, 13}]
 296 # HP_G, Vj, g_j, Gj_deg = StruInforEntropy(G, partition)
 297 
 298 def delt_Xij(Xi, Xj, G, weight=None):
 299     Xij = Xi + Xj
 300     sub_set = [Xi, Xj, list(set(Xij))]
 301     degree = G.degree(weight=weight)
 302     G_volume = sum(degree[index_node] for index_node in G.nodes)
 303     Vij, gij = [], []
 304     for ind in range(len(sub_set)):
 305         sub_degree = 0
 306         for node in sub_set[ind]:
 307             sub_degree += degree[node]
 308         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
 309         Vij.append(sub_degree)
 310         gij.append(gj_c)
 311     gij = np.array(gij)
 312     Vij = np.array(Vij)
 313     g_i, g_j, g_ij = gij[0], gij[1], gij[2]
 314     V_i, V_j, V_ij = Vij[0], Vij[1], Vij[2]
 315     log_Vij = Log2p(Vij, eps=1e-10)
 316     delt_G_Pij = 1.0 / (G_volume) * ((V_i - g_i) * log_Vij[0] +
 317                                      (V_j - g_j) * log_Vij[1] -
 318                                      (V_ij - g_ij) * log_Vij[2] +
 319                                      (g_i + g_j - g_ij) * np.log2(G_volume + 1e-10))
 320     return delt_G_Pij
 321 
 322 def delt_RXij(Xi, Xj, G, weight=None):
 323     Xij = Xi + Xj
 324     sub_set = [Xi, Xj, list(set(Xij))]
 325     # sub_set = [[0, 1], [2, 3], [0, 1, 2, 3]]
 326     # n = G.number_of_nodes()
 327     # m = G.number_of_edges()
 328     degree = G.degree(weight=weight)
 329     G_volume = sum(degree[index_node] for index_node in G.nodes) + 1e-10
 330     Vij, gij = [], []
 331     for ind in range(len(sub_set)):
 332         sub_degree = 0
 333         for node in sub_set[ind]:
 334             sub_degree += degree[node]
 335         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
 336         Vij.append(sub_degree)
 337         gij.append(gj_c)
 338     g_i, g_j, g_ij = gij[0], gij[1], gij[2]
 339     V_i, V_j, V_ij = Vij[0], Vij[1], Vij[2]
 340     # log_Vij = Log2p(Vij, eps=1e-10)
 341 
 342     RP_i = -(V_i - g_i) / G_volume * np.log2(V_i / G_volume + 1e-10)
 343     RP_j = -(V_j - g_j) / G_volume * np.log2(V_j / G_volume + 1e-10)
 344     RP_ij = -(V_ij - g_ij) / G_volume * np.log2(V_ij / G_volume + 1e-10)
 345 
 346     delt_G_RPij = RP_i + RP_j - RP_ij
 347 
 348     return delt_G_RPij
 349 
 350 def doWhile(G, NodeA, weight=None):
 351     count = 0
 352     L = len(NodeA)
 353     for Xi in NodeA:
 354         NodeB = NodeA.copy()
 355         NodeB.remove(Xi)
 356         for Xj in NodeB:
 357             delt_ij = delt_Xij(Xi, Xj, G, weight=weight)  # 函数
 358             if delt_ij > 0:
 359                 return True
 360     #         if delt_ij < 0:
 361     #             count += 1
 362     # if count - L * (L - 1) != 0:
 363     #     return True
 364     return False
 365 
 366 def doRWhile(G, NodeA, weight=None):
 367     count = 0
 368     L = len(NodeA)
 369     for Xi in NodeA:
 370         NodeB = NodeA.copy()
 371         NodeB.remove(Xi)
 372         for Xj in NodeB:
 373             delt_ij = delt_RXij(Xi, Xj, G, weight=weight)  # 函数
 374             if delt_ij <= 0:
 375                 return True
 376     #         if delt_ij > 0:
 377     #             count += 1
 378     # if count - L * (L - 1) != 0:
 379     #     return True
 380     return False
 381 
 382 def doWhile_js(row, col, data, NodeA):
 383     count = 0
 384     L = len(NodeA)
 385     for Xi in NodeA:
 386         NodeB = NodeA.copy()
 387         NodeB.remove(Xi)
 388         for Xj in NodeB:
 389             delt_ij = deltDI_ij(row, col, data, ndq_a=Xi, ndq_b=Xj)  # 函数
 390             if delt_ij <= 0:
 391                 return True
 392     #         if delt_ij > 0:
 393     #             count += 1
 394     # if count - L * (L - 1) != 0:
 395     #     return True
 396     return False
 397 
 398 ######################################算法主体min2DStruInforEntropyPartition -- 开始###########################################
 399 def min2DStruInforEntropyPartition(G, weight=None):
 400     # Input 算法主体 -- 开始
 401     print("Partition by min2DHG ..........")
 402     nodes = list(G.nodes())
 403     nodes.sort()  # 节点编号升序排列
 404     global NodeA
 405     NodeA = [[node] for node in nodes]
 406     print("Init-Input:", NodeA)  # Input Data
 407     doWhileFlg = True
 408     NodeA.reverse()  # 逆序
 409     while doWhileFlg:
 410         Xi = NodeA.pop()
 411         Nj = NodeA.copy()
 412         delt_max = 0
 413         Xj_m = None
 414         for Xj in Nj:
 415             delt_ij = delt_Xij(Xi, Xj, G, weight=weight)  # 函数
 416             if delt_ij > 0 and delt_ij > delt_max:
 417                 Xj_m = Xj
 418                 delt_max = delt_ij
 419         if Xj_m in Nj and Xj_m is not None:
 420             Nj.remove(Xj_m)
 421             Xij = Xi + Xj_m
 422             Nj.insert(0, Xij)
 423             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_ij_HG:', delt_max)
 424         elif Xj_m is None:
 425             Nj.insert(0, Xi)  # 首位值插入
 426         NodeA = Nj
 427         doWhileFlg = doWhile(G, NodeA, weight=weight)  # 是否继续循环
 428         # print(NodeA)
 429     # print('Output:', NodeA)
 430     sub_set = NodeA.copy()  # Final Result
 431     # Output: NodeA 算法主体 -- 结束
 432 
 433     # sort
 434     results = []
 435     for sb_result in sub_set:
 436         sb_result.sort()
 437         results.append(sb_result)
 438     results.sort()
 439     print('Output:', results)
 440     return results
 441 ######################################算法主体min2DStruInforEntropyPartition -- 结束###########################################
 442 
 443 ######################################算法主体maxDIPartition -- 开始###########################################
 444 def maxDIPartition(G, weight=None, pk_partion=None):
 445     # Input 算法主体 -- 开始
 446     print("Partition by maxDI ..........")
 447     nodes = list(G.nodes())
 448     nodes.sort()  # 节点编号升序排列
 449     global NodeA
 450     if pk_partion is None:
 451         nodes = list(G.nodes())
 452         nodes.sort()  # 节点编号升序排列
 453         NodeA = [[node] for node in nodes]
 454     else:
 455         NodeA = pk_partion
 456     print("Init-Input:", NodeA)  # Input Data
 457     doWhileFlg = True
 458     NodeA.reverse()  # 逆序
 459     while doWhileFlg:
 460         Xi = NodeA.pop()
 461         Nj = NodeA.copy()
 462         delt_min = 0
 463         Xj_m = None
 464         for Xj in Nj:
 465             delt_ij = delt_RXij(Xi, Xj, G, weight=weight)  # 函数
 466             if delt_ij < 0 and delt_ij < delt_min:
 467                 Xj_m = Xj
 468                 delt_min = delt_ij
 469         if Xj_m in Nj and Xj_m is not None:
 470             Nj.remove(Xj_m)
 471             Xij = Xi + Xj_m
 472             Nj.insert(0, Xij)
 473             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_RXij:', delt_min)
 474         elif Xj_m is None:
 475             Nj.insert(0, Xi)  # 首位值插入
 476         NodeA = Nj
 477         doWhileFlg = doRWhile(G, NodeA, weight=weight)  # 是否继续循环
 478         # print(NodeA)
 479     # print('Output:', NodeA)
 480     sub_set = NodeA.copy()  # Final Result
 481     # Output: NodeA 算法主体 -- 结束
 482 
 483     # sort
 484     results = []
 485     for sb_result in sub_set:
 486         sb_result.sort()
 487         results.append(sb_result)
 488     results.sort()
 489     print('Output:', results)
 490     return results
 491 ######################################算法主体maxDIPartition -- 结束###########################################
 492 
 493 ######################################算法主体maxDI 优化 -- 开始###########################################
 494 def maxDI(G, weight=None, pk_partion=None):
 495     # Input 算法主体 -- 开始
 496     print("Partition by maxDI ..........")
 497     row, col, data = get_coo_matrix_from_G(G, weight=weight)
 498     nodes = list(G.nodes())
 499     nodes.sort()  # 节点编号升序排列
 500     global NodeA
 501     if pk_partion is None:
 502         nodes = list(G.nodes())
 503         nodes.sort()  # 节点编号升序排列
 504         NodeA = [[node] for node in nodes]
 505     else:
 506         NodeA = pk_partion
 507     print("Init-Input:", NodeA)  # Input Data
 508     doWhileFlg = True
 509     NodeA.reverse()  # 逆序
 510     while doWhileFlg:
 511         Xi = NodeA.pop()
 512         Nj = NodeA.copy()
 513         delt_min = 0
 514         Xj_m = None
 515         for Xj in Nj:
 516             delt_ij = deltDI_ij(row, col, data, ndq_a=Xi, ndq_b=Xj)  # 函数
 517             if delt_ij < 0 and delt_ij < delt_min:
 518                 Xj_m = Xj
 519                 delt_min = delt_ij
 520         if Xj_m in Nj and Xj_m is not None:
 521             Nj.remove(Xj_m)
 522             Xij = Xi + Xj_m
 523             Nj.insert(0, Xij)
 524             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_RXij:', delt_min)
 525         elif Xj_m is None:
 526             Nj.insert(0, Xi)  # 首位值插入
 527         NodeA = Nj
 528         doWhileFlg = doWhile_js(row, col, data, NodeA)  # 是否继续循环
 529         # print(NodeA)
 530     # print('Output:', NodeA)
 531     sub_set = NodeA.copy()  # Final Result
 532     # Output: NodeA 算法主体 -- 结束
 533 
 534     # sort
 535     results = []
 536     for sb_result in sub_set:
 537         sb_result.sort()
 538         results.append(sb_result)
 539     results.sort()
 540     print('Output:', results)
 541     return results
 542 ######################################算法主体maxDI 优化 -- 结束###########################################
 543 
 544 ######################################算法主体maxDI 迭代优化-Parallel -- 开始###########################################
 545 def iter_maxDI(G, iter_max=100, pk_partion=None, weight=None, n_jobs=4, verbose=0):
 546     # iter_max = 100
 547     # n_jobs = 4
 548     row, col, data = get_coo_matrix_from_G(G, weight=weight)
 549     global N
 550     if pk_partion is None:
 551         nodes = list(G.nodes())
 552         nodes.sort()  # 节点编号升序排列
 553         N = [[node] for node in nodes]
 554     else:
 555         N = pk_partion
 556     out = Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(parts_DI)(row, col, data, P) for P in partition_producer(N))
 557     # print('(iter:%d ---> DI:%.2f bits)' % (0, np.sum(out)))
 558     DI = np.zeros(iter_max + 1)
 559     DI[0] = float('%.3f' % np.sum(out))
 560     print('(iter:%d ---> DI:%.3f bits)' % (0, DI[0]))
 561     for iter in range(iter_max):
 562         ndi = N[0]
 563         out = Parallel(n_jobs=n_jobs, verbose=verbose)(
 564             delayed(deltDI_ij)(row, col, data, ndi, ndj) for ndi, ndj in producer(N))
 565         out_min = min(out)
 566         if out_min < 0:
 567             ndj = N[out.index(out_min)]
 568             N.remove(ndj)
 569             N.append(ndi + ndj)
 570         elif ndi not in N:
 571             N.append(ndi)
 572         out = Parallel(n_jobs=n_jobs, verbose=verbose)(
 573             delayed(parts_DI)(row, col, data, P) for P in partition_producer(N))
 574         DI[iter + 1] = float('%.3f' % np.sum(out))
 575         if (iter + 1) % 50 == 0:
 576             print('(iter:%d ---> DI:%.3f bits)' % (iter+1, DI[iter+1]))
 577         # if (iter + 1) >= 2000 and np.var(DI[-2000:-1], ddof=1) < 1e-10:  # 计算样本方差 ( 计算时除以 N - 1 )
 578         #     DI = DI[:iter + 2]
 579         #     break
 580 
 581     # sort results
 582     results = []
 583     for sb_result in N:
 584         sb_result.sort()
 585         results.append(sb_result)
 586     results.sort()
 587 
 588     return results, DI
 589 ######################################算法主体maxDI 迭代优化-Parallel -- 开始###########################################
 590 
 591 # get DI by Parallel
 592 def get_DI_Parallel(G, partion=None, weight=None, n_jobs=4, verbose=0):
 593     row, col, data = get_coo_matrix_from_G(G, weight=weight)
 594     out = Parallel(n_jobs=n_jobs, verbose=verbose)(
 595         delayed(parts_DI)(row, col, data, P) for P in partition_producer(partion))
 596     DI = float('%.3f' % np.sum(out))
 597     return DI
 598 # get DI by maxDIPartition
 599 def get_DI(G, weight=None):
 600     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
 601     results = maxDIPartition(G, weight=weight)
 602     twoHG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight=weight)
 603     oneHG = get_oneStruInforEntropy(G, weight=weight)
 604     DI = oneHG - twoHG
 605     DI = float('%.3f' % DI)
 606     return DI
 607 
 608 # n, m = G.number_of_nodes(), G.number_of_edges()
 609 # Eg_List = [(u, v, w['weight']) for (u, v, w) in G.edges(data=True)]
 610 def create_weight_graph_txt_file(G, filename='InputGraph'):
 611     file = open(filename + '.txt', mode='w')
 612     str_number_of_nodes = '{0}
'.format(G.number_of_nodes())
 613     file.write(str_number_of_nodes)
 614     for (u, v, w) in G.edges(data=True):
 615         str_edge = '{0} {1} {2}
'.format(u, v, w['weight'])
 616         file.write(str_edge)
 617     file.close()
 618 
 619 # Plot Graph
 620 def show_graph(G, weight='weight', with_labels=False, save=False, filename='./graph.svg'):
 621     # test: outer point
 622     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
 623     # pos = nx.spring_layout(G)  # set layout
 624     pos = nx.kamada_kawai_layout(G)
 625     # nx.draw(G, pos=pos, node_size=300, with_labels=with_labels, node_color='red')
 626     nodecolor = G.degree(weight=weight)  # 度数越大,节点越大,连接边颜色越深
 627     nodecolor2 = pd.DataFrame(nodecolor)  # 转化称矩阵形式
 628     nodecolor3 = nodecolor2.iloc[:, 1]  # 索引第二列
 629     edgecolor = range(G.number_of_edges())  # 设置边权颜色
 630 
 631     nx.draw(G,
 632             pos,
 633             with_labels=with_labels,
 634             node_size=nodecolor3 * 6 * 10,
 635             node_color=nodecolor3 * 5,
 636             edge_color=edgecolor)
 637 
 638     if save:
 639         plt.savefig(filename, dpi=600, transparent=True)
 640     plt.show()
 641 
 642 # label_result
 643 def label_result(nsamples, result):
 644     y_result = np.zeros((nsamples,)) - 1  # -1 为噪声点
 645     cls = 0
 646     for p in result:
 647         y_result[p] = cls
 648         cls += 1
 649     return y_result
 650 
 651 # Decomposition
 652 def XDecomposition(X, n_components=2):
 653     from sklearn.decomposition import PCA
 654     (m, n) = X.shape
 655     if n_components >= n:
 656         return X
 657     else:
 658         pca = PCA(n_components=n_components, svd_solver='full')
 659         pca.fit(X)
 660         X_trans = pca.transform(X)
 661         # pca.explained_variance_ratio_
 662         # pca.singular_values_
 663         return X_trans
 664 
 665 # Add  node attributes
 666 def set_node_attributes(G, X):
 667     attr = {}
 668     for node in G.nodes(data=False):
 669         attr[node] = {'x_'+str(j): X[node, j] for j in range(X.shape[1])}
 670     nx.set_node_attributes(G, attr)
 671     return G
 672 # Add  part_nodes attributes
 673 def set_part_node_attributes(G, nodelist, X):
 674     attr = {}
 675     for node in nodelist:
 676         attr[node] = {'x_'+str(j): X[node, j] for j in range(X.shape[1])}
 677     nx.set_node_attributes(G, attr)
 678     return G
 679 # update node_attributes
 680 def update_node_attributes(G, X, node_new):
 681     attr = {}
 682     attr[node_new] = {'x_' + str(j): X[node_new, j] for j in range(X.shape[1])}
 683     nx.set_node_attributes(G, attr)
 684     return G
 685 
 686 # query_GraphND_cKDTree
 687 def query_GraphND_cKDTree(G, points, num_feature):
 688     # X --> m x n; points  --> k x n
 689     # points = np.array([X, Y]).T
 690     nodes = {'x_' + str(j): nx.get_node_attributes(G, 'x_' + str(j)) for j in range(num_feature)}
 691     nodes_pd = pd.DataFrame(nodes)
 692     data = nodes_pd[['x_'+str(j) for j in range(num_feature)]]
 693 
 694     tree = cKDTree(data=data, compact_nodes=True, balanced_tree=True)
 695 
 696     dist, idx = tree.query(points, k=1)
 697     node = nodes_pd.iloc[idx].index
 698 
 699     return node, dist
 700 
 701 # get_poly_from_osm_circleline
 702 def get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_car_seg_6ring road.osm", toepsg=2436):# toepsg == 3857; 2436
 703     # 精确的北京六环道路 见 beijing_car_seg_6ring road.osm: (北京六环附近及往内的可驾驶道路网络(路网graph为连通图))https://www.cnblogs.com/jeshy/p/14763489.html
 704     # beijing_car_seg_6ring road.osm as the input data
 705     # root = et.parse(r"./beijing_car_seg_6ring road.osm").getroot()
 706     root = et.parse(circleline_osmfile).getroot()
 707     nodes = {}
 708     for node in root.findall('node'):
 709         nodes[int(node.attrib['id'])] = (float(node.attrib['lon']), float(node.attrib['lat']))
 710     edges = []
 711     for way in root.findall('way'):
 712         element_nd = way.findall('nd')
 713         if len(element_nd) > 2:
 714             for i in range(len(element_nd) - 1):
 715                 node_s, node_e = int(element_nd[i].attrib['ref']), int(element_nd[i+1].attrib['ref'])
 716                 path = (node_s, node_e)
 717                 edges.append(path)
 718         else:
 719             node_s, node_e = int(element_nd[0].attrib['ref']), int(element_nd[1].attrib['ref'])
 720             path = (node_s, node_e)
 721             edges.append(path)
 722     edge = edges[0]
 723     E = []
 724     E.append(edge)
 725     edges.remove(edge)
 726     point_s, point_e = nodes[E[0][0]], nodes[E[0][1]]
 727     Point_lists = []
 728     Point_lists.append(list(point_s))
 729     Point_lists.append(list(point_e))
 730     while len(edges) > 0:
 731         (node_f_s, node_f_e) = E[-1]
 732         for (node_n_s, node_n_e) in edges:
 733             if node_f_e == node_n_s:
 734                 E.append((node_n_s, node_n_e))
 735                 point_f_e = nodes[node_n_e]
 736                 Point_lists.append(list(point_f_e))
 737                 # print((node_n_s, node_n_e))
 738                 edges.remove((node_n_s, node_n_e))
 739                 break
 740     # Points.pop()
 741     # print(E[0], '...', E[-2], E[-1])
 742     # print(Point_lists[0], '...', Point_lists[-2], Point_lists[-1])
 743 
 744     road_line_arr = np.array(Point_lists)  # 转换成二维坐标表示
 745     sixc_ring_poly = Polygon(road_line_arr)  # Polygon
 746 
 747     crs = {'init': 'epsg:4326'}
 748     gpoly = gpd.GeoSeries(sixc_ring_poly, crs=crs)
 749 
 750     if toepsg is not None:
 751         gpoly = gpoly.to_crs(epsg=toepsg)
 752     print('output gpoly.crs:', gpoly.crs)
 753 
 754     # poly = gpoly[0]
 755     # print('area(km*km):', poly.area / 1.0e6)
 756     # print('length(km):', poly.length / 1.0e3)
 757 
 758     return road_line_arr, sixc_ring_poly, gpoly
 759 
 760 # X2GDF
 761 def X2GDF(X, input_crs = "EPSG:2436", output_crs = "EPSG:4326", plot=True):
 762     # input_crs = "EPSG:2436"# input_crs = "EPSG:4326"
 763     # output_crs = "EPSG:4326"# output_crs = "EPSG:2436"
 764     coords, IDs = [], []
 765     for i in range(X.shape[0]):
 766         coords.append(Point(X[i, 0], X[i, 1]))# (X, Y)
 767         IDs.append("%d" % i)
 768     data = {'ID': IDs, 'geometry': coords}
 769     gdf = gpd.GeoDataFrame(data, crs=input_crs)
 770     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
 771     if output_crs is not None:
 772         gdf.to_crs(crs=output_crs)# (Lon, Lat)
 773 
 774     if plot:
 775         gdf.plot()
 776         plt.show()#展示
 777 
 778     return gdf
 779 # GDF2X
 780 def GDF2X(gdata, to_epsg=2436, plot=True):
 781     print('input gdf crs:', gdata.crs)  # 查看数据对应的投影信息(坐标参考系) ---> epsg == 4326  (WGS_84)
 782     gdata = gdata.to_crs(epsg=to_epsg)# epsg == 2436 (X, Y), 4326 (lon, lat)
 783     print('output X crs:', gdata.crs)
 784     # print(gdata.columns.values)# 列名
 785     # print(gdata.head())  # 查看前5行数据
 786     if plot:
 787         gdata.plot()
 788         plt.show()#展示
 789     geo = gdata.geometry.values
 790     X0, Y0 = geo.x, geo.y
 791     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
 792     for i in range(X0.shape[0]):
 793         X[i, 0], X[i, 1] = X0[i],  Y0[i]
 794     return X
 795 
 796 # LonLat2XY
 797 def LonLat2XY(X, input_crs = "EPSG:4326", output_crs = "EPSG:3857", shpfile_name = "point_shp.shp"):
 798     # index = ['node_'+str(node) for node in range(X.shape[0])]
 799     # columns = ['column_'+str(col) for col in range(X.shape[1])]
 800     # df = pd.DataFrame(X, index=index, columns=columns)
 801     coords, IDs = [], []
 802     for i in range(X.shape[0]):
 803         coords.append(Point(X[i, 0], X[i, 1]))  # (X, Y)
 804         IDs.append("%d" % i)
 805 
 806     data = {'ID': IDs, 'geometry': coords}
 807     gdf = gpd.GeoDataFrame(data, crs=input_crs)
 808 
 809     print('input crs', gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
 810     if output_crs is not None:
 811         gdf.to_crs(crs=output_crs)  # (Lon, Lat)
 812 
 813     print('output crs', gdf.crs)  # 查看数据对应的投影信息(坐标参考系)  output_crs = "EPSG:3857" or "EPSG:2436"
 814     if shpfile_name is not None:# shpfile = "point_shap.shp"
 815         gdf.to_file(filename=shpfile_name)
 816         print('shpfile is saved')
 817 
 818     geo = gdf.geometry.values
 819     X0, Y0 = geo.x, geo.y
 820     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
 821     for i in range(X0.shape[0]):
 822         X[i, 0], X[i, 1] = X0[i], Y0[i]
 823 
 824     return X
 825 
 826 # get_partion_result_outline
 827 def get_partion_result_outline(X, partion_result, input_crs = "EPSG:2436", output_crs = None, shpfile = "partion_outline_shp.shp", save_shp = True, plot = True):
 828     import alphashape
 829     # input_crs = "EPSG:2436"
 830     # output_crs = None
 831     # shpfile = "partion_outline_shp.shp"
 832     # save_shp = True
 833     # plot = True
 834     Polygs, Numbers, IDs = [], [], []
 835     for i in range(len(partion_result)):
 836         X_uniques = np.unique(X[partion_result[i]], axis=0)# 去除重复行
 837         if X_uniques.shape[0] > 2:
 838             gdf_alpha_shape = alphashape.alphashape(X2GDF(X_uniques, input_crs=input_crs, output_crs=output_crs, plot=None))
 839             polyg = gdf_alpha_shape.geometry.values# GeoDataFrameArray
 840             Polygs.append(polyg[0])
 841             Numbers.append(len(partion_result[i]))
 842             IDs.append("%d" % i)
 843     data = {'ID': IDs, 'num': Numbers, 'geometry': Polygs}
 844     gdf = gpd.GeoDataFrame(data, crs=input_crs)
 845 
 846     if output_crs is not None:
 847         gdf.to_crs(crs=output_crs)
 848 
 849     if save_shp:
 850         gdf.to_file(shpfile)
 851 
 852     if plot:
 853         gdf.plot()
 854         plt.show()  # 展示
 855 
 856     return gdf
 857 
 858 # X2Shpfile
 859 def X2Shpfile(X, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs="EPSG:4326"):
 860     # input_crs = "EPSG:2436"# input_crs = "EPSG:4326"
 861     # output_crs = "EPSG:4326"# output_crs = "EPSG:2436"
 862     # shpfile = "X_shp.shp"
 863     coords, IDs = [], []
 864     for i in range(X.shape[0]):
 865         coords.append(Point(X[i, 0], X[i, 1]))  # (X, Y)
 866         IDs.append("%d" % i)
 867     data = {'ID': IDs, 'geometry': coords}
 868     gdf = gpd.GeoDataFrame(data, crs=input_crs)
 869     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
 870     if output_crs is not None:
 871         gdf.to_crs(crs=output_crs)  # (Lon, Lat)
 872     gdf.to_file(shpfile)
 873 # Edge2Shpfile
 874 def Edge2Shpfile(G, X, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs="EPSG:4326"):
 875     # input_crs = "EPSG:2436"  # input_crs = "EPSG:4326"
 876     # output_crs = "EPSG:4326"  # output_crs = "EPSG:2436"
 877     # shpfile = "edge_shp.shp"
 878     coords, weights, edgeIDs = [], [], []
 879     if data:
 880         edges = [(u, v, w['weight']) for u, v, w in G.edges(data=data)]
 881     else:
 882         edges = [(u, v, 1.0) for u, v in G.edges(data=data)]
 883     id = 0
 884     for u, v, w in edges:
 885         edgeIDs.append(id)
 886         coords.append((([X[u, 0], X[u, 1]]), ([X[v, 0], X[v, 1]])))
 887         weights.append(w)
 888         id += 1
 889     data = {'edgeID': edgeIDs, 'weight': weights, 'geometry': MultiLineString(coords)}
 890     gdf = gpd.GeoDataFrame(data, crs=input_crs)
 891     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
 892     if output_crs is not None:
 893         gdf.to_crs(crs=output_crs)
 894     gdf.to_file(shpfile)
 895 
 896 # FixingGraph
 897 def FixingGraph(G, X, merage_components=False):
 898     # Add  node attributes
 899     G = set_node_attributes(G, X)
 900     node_index = [nd for nd in G.nodes]
 901     if G.number_of_nodes() < X.shape[0]:
 902         print("Graph Fixing (Connecting the Isolated Points by distances)......... ")
 903         # Fixed Graph
 904         node_all_index = [nd for nd in range(X.shape[0])]
 905         node_non_index = list(set(node_all_index) - set(node_index))
 906         for ndi in node_non_index:
 907             ptq = X[ndi:ndi + 1, :]
 908             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
 909             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
 910             G.add_edges_from(add_edge)
 911             # update the new node attributes
 912             G = update_node_attributes(G, X, node_new=ndi)
 913             node_index.append(ndi)
 914         print("G(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
 915     # # onnected components in G
 916     if merage_components:
 917         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
 918         Len_C = len(connected_components)
 919         print('Graph Components Numbers: ', Len_C)
 920         print("Graph Components Connecting ......... ")
 921         while Len_C >= 2:
 922             first_components = connected_components[0]
 923             # first_sub_graph
 924             first_sub_graph = G.subgraph(first_components).copy()
 925 
 926             # remove_nodes_from first_sub_graph for G
 927             G.remove_nodes_from(first_components)
 928 
 929             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
 930             points = X[index_v, :]
 931 
 932             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
 933 
 934             index_us, distances = list(index_us), list(distances)
 935             min_index = distances.index(min(distances))  # 返回最小值 index
 936             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
 937             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
 938             G.add_edges_from(bridge_edge)
 939 
 940             # update the new node attributes
 941             G = update_node_attributes(G, X, node_new=v)
 942 
 943             # add the subgraph to G
 944             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
 945             G.add_edges_from(sub_graph_edges)
 946             nodelist = [node for node in G.nodes(data=False)]
 947             G = set_part_node_attributes(G, nodelist, X)
 948 
 949             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
 950             Len_C = len(connected_components)
 951         print("G(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
 952 
 953     return G
 954 
 955 # getKNNGraph
 956 def getKNNGraph(X, n_neighbors=10, threshold=2.25, merage_components=False, save=True, shpfile=False):
 957     # threshold = threshold  ########### setup parameter ###########
 958     if save:
 959         output = open('X.pkl', 'wb')
 960         pickle.dump(X, output)
 961         output.close()
 962 
 963         # from sklearn.preprocessing import StandardScaler
 964         # X_trans = StandardScaler().fit_transform(X)
 965         # output = open('trans_X.pkl', 'wb')
 966         # pickle.dump(X_trans, output)
 967         # output.close()
 968         print("X is saved")
 969 
 970     knn_graph = kneighbors_graph(X, n_neighbors=n_neighbors, mode='distance', include_self=False, n_jobs=-1)
 971     # knn_graph = kneighbors_graph(X_trans, n_neighbors=n_neighbors, mode='distance', include_self=False, n_jobs=8)
 972     graph = knn_graph
 973     affinity = 0.5 * (graph + graph.T)
 974     edges_list, node_index = [], []
 975     G = nx.Graph()
 976     if hasattr(affinity, 'tocoo'):
 977         A = affinity.tocoo()  # csr_matrix --> coo
 978         # edges_list = [(i, j, {'weight': 1.0/v}) for i, j, v in zip(A.row, A.col, A.data)]  # coo matrix
 979         for i, j, v in zip(A.row, A.col, A.data):
 980             edge = (i, j, {'weight': 1.0 / (v + 1e-5)})
 981             edges_list.append(edge)
 982             node_index.append(i)
 983             node_index.append(j)
 984     G.add_edges_from(edges_list)
 985     node_index = list(set(node_index))
 986     print("knnG(all kneighbors ): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
 987 
 988     print("Graph Edge Removing ......... ")
 989     for node in node_index:
 990         neighbors = [nd for nd in nx.neighbors(G, node)]
 991         if len(neighbors) > 1:
 992             X_i = X[node, :]
 993             dis_dict = {}
 994             for nd in neighbors:
 995                 delt_X_k = X_i - X[nd, :]
 996                 dis = np.linalg.norm(delt_X_k)
 997                 dis_dict[nd] = dis
 998             dis_order = sorted(dis_dict.items(), key=lambda x: x[1], reverse=False)  # sort by ascending
 999             (min_id, min_dis), (max_id, max_dis) = dis_order[0], dis_order[-1]
1000             if threshold >= min_dis and max_dis >= threshold:
1001                 for nd in neighbors:
1002                     if dis_dict[nd] > threshold:  # remove
1003                         rm_edge = [(node, nd)]
1004                         G.remove_edges_from(rm_edge)
1005             elif threshold < min_dis:
1006                 for nd in neighbors:  # remove all
1007                     rm_edge = [(node, nd)]
1008                     G.remove_edges_from(rm_edge)
1009                 add_edge = [(node, nd, {'weight': 1.0 / min_dis})]  # add the small
1010                 G.add_edges_from(add_edge)
1011             else:  # threshold >= max_dis
1012                 pass  # save all
1013     print("knnG(edgermove): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1014 
1015     # Add  node attributes
1016     G = set_node_attributes(G, X)
1017     if G.number_of_nodes() < X.shape[0]:
1018         print("Graph Fixing ......... ")
1019         # Fixed Graph
1020         node_all_index = [nd for nd in range(X.shape[0])]
1021         node_non_index = list(set(node_all_index) - set(node_index))
1022         for ndi in node_non_index:
1023             ptq = X[ndi:ndi+1, :]
1024             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
1025             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
1026             G.add_edges_from(add_edge)
1027             # update the new node attributes
1028             G = update_node_attributes(G, X, node_new=ndi)
1029             node_index.append(ndi)
1030         print("knnG(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1031         # test
1032         # Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
1033         # X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
1034         # print("Shpfile is saved")
1035     # # onnected components in G
1036     if merage_components:
1037         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1038         Len_C = len(connected_components)
1039         print('Graph Components Numbers: ', Len_C)
1040         print("Graph Components Connecting ......... ")
1041         while Len_C >= 2:
1042             first_components = connected_components[0]
1043             # first_sub_graph
1044             first_sub_graph = G.subgraph(first_components).copy()
1045 
1046             # remove_nodes_from first_sub_graph for G
1047             G.remove_nodes_from(first_components)
1048 
1049             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
1050             points = X[index_v, :]
1051 
1052             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
1053 
1054             index_us, distances = list(index_us), list(distances)
1055             min_index = distances.index(min(distances))# 返回最小值 index
1056             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
1057             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
1058             G.add_edges_from(bridge_edge)
1059 
1060             # update the new node attributes
1061             G = update_node_attributes(G, X, node_new=v)
1062 
1063             # add the subgraph to G
1064             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
1065             G.add_edges_from(sub_graph_edges)
1066             nodelist = [node for node in G.nodes(data=False)]
1067             G = set_part_node_attributes(G, nodelist, X)
1068 
1069             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1070             Len_C = len(connected_components)
1071         print("knnG(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1072 
1073     if save:
1074         output = open('knnG.pkl', 'wb')
1075         pickle.dump(G, output)
1076         output.close()
1077         print("knnG is saved")
1078 
1079     if shpfile:
1080         # X Decomposition
1081         X_pca = XDecomposition(X, n_components=2)
1082         Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
1083         X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
1084         print("Shpfile is saved")
1085 
1086     return G
1087 # getRadiusGraph
1088 def getRadiusGraph(X, radius=2.25, merage_components=False, save=True, shpfile=False):
1089     # threshold = threshold  ########### setup parameter ###########
1090     if save:
1091         output = open('X.pkl', 'wb')
1092         pickle.dump(X, output)
1093         output.close()
1094         # from sklearn.preprocessing import StandardScaler
1095         # X_trans = StandardScaler().fit_transform(X)
1096         # output = open('trans_X.pkl', 'wb')
1097         # pickle.dump(X_trans, output)
1098         # output.close()
1099         print("X is saved")
1100 
1101     radius_graph = radius_neighbors_graph(X, radius=radius, mode='distance', include_self=False, n_jobs=-1)
1102     graph = radius_graph
1103     affinity = 0.5 * (graph + graph.T)
1104     edges_list, node_index = [], []
1105     G = nx.Graph()
1106     if hasattr(affinity, 'tocoo'):
1107         A = affinity.tocoo()  # csr_matrix --> coo
1108         # edges_list = [(i, j, {'weight': 1.0/v}) for i, j, v in zip(A.row, A.col, A.data)]  # coo matrix
1109         for i, j, v in zip(A.row, A.col, A.data):
1110             edge = (i, j, {'weight': 1.0 / (v + 1e-5)})
1111             edges_list.append(edge)
1112             node_index.append(i)
1113             node_index.append(j)
1114     G.add_edges_from(edges_list)
1115     node_index = list(set(node_index))
1116     print("radiusG(all radius neighbors ): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1117 
1118     # Add  node attributes
1119     G = set_node_attributes(G, X)
1120     if G.number_of_nodes() < X.shape[0]:
1121         print("Graph Fixing ......... ")
1122         # Fixed Graph
1123         node_all_index = [nd for nd in range(X.shape[0])]
1124         node_non_index = list(set(node_all_index) - set(node_index))
1125         for ndi in node_non_index:
1126             ptq = X[ndi:ndi+1, :]
1127             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
1128             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
1129             G.add_edges_from(add_edge)
1130             # update the new node attributes
1131             G = update_node_attributes(G, X, node_new=ndi)
1132             node_index.append(ndi)
1133         print("radiusG(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1134         # Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
1135         # X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
1136         # print("Shpfile is saved")
1137 
1138     # # onnected components in G
1139     if merage_components:
1140         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1141         Len_C = len(connected_components)
1142         print('Graph Components Numbers: ', Len_C)
1143         print("Graph Components Connecting ......... ")
1144         while Len_C >= 2:
1145             first_components = connected_components[0]
1146             # first_sub_graph
1147             first_sub_graph = G.subgraph(first_components).copy()
1148 
1149             # remove_nodes_from first_sub_graph for G
1150             G.remove_nodes_from(first_components)
1151 
1152             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
1153             points = X[index_v, :]
1154 
1155             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
1156 
1157             index_us, distances = list(index_us), list(distances)
1158             min_index = distances.index(min(distances))# 返回最小值 index
1159             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
1160             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
1161             G.add_edges_from(bridge_edge)
1162 
1163             # update the new node attributes
1164             G = update_node_attributes(G, X, node_new=v)
1165 
1166             # add the subgraph to G
1167             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
1168             G.add_edges_from(sub_graph_edges)
1169             nodelist = [node for node in G.nodes(data=False)]
1170             G = set_part_node_attributes(G, nodelist, X)
1171 
1172             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1173             Len_C = len(connected_components)
1174         print("radiusG(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1175 
1176     if save:
1177         output = open('radiusG.pkl', 'wb')
1178         pickle.dump(G, output)
1179         output.close()
1180         print("radiusG is saved")
1181 
1182     if shpfile:
1183         # X Decomposition
1184         X_pca = XDecomposition(X, n_components=2)
1185         Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
1186         X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
1187         print("Shpfile is saved")
1188 
1189     return G
1190 # getRBFKernelsGraph
1191 def getRBFKernelsGraph(X, gamma=1., threshold=5., merage_components=False, n_jobs=-1, shpfile=False):
1192     params = {}
1193     params['gama'] = gamma
1194     K = pairwise_kernels(X, metric='rbf', filter_params=True, n_jobs=n_jobs, **params)  # A kernel matrix K
1195     # K.tocoo()
1196     Kcsr = sp.csr_matrix(K)
1197     K = Kcsr.tocoo()
1198     print('A kernel matrix K: ', K.shape)
1199     # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = sigma^-2 ; sigma^2 is the Gaussian kernel of variance.
1200     affinity = 0.5 * (K + K.T)
1201     edges_list, node_index = [], []
1202     if hasattr(affinity, 'tocoo'):
1203         A = affinity.tocoo()
1204         # edges_list = [(i, j, {'weight': (v + 1e-5)}) for i, j, v in zip(A.row, A.col, A.data) if
1205         #               v >= threshold and i < j]  # coo matrix
1206         for i, j, v in zip(A.row, A.col, A.data):  # coo matrix
1207             if v >= threshold and i < j:
1208                 edges_list.append((i, j, {'weight': v}))
1209                 node_index.append(i)
1210                 node_index.append(j)
1211     else:
1212         A = affinity
1213         (m, _) = A.shape
1214         # edges_list = [(i, j, {'weight': (A[i, j] + 1e-5)}) for i in range(m) for j in range(m) if
1215         #               A[i, j] >= threshold and i < j]
1216         for i in range(m):
1217             for j in range(m):
1218                 if A[i, j] >= threshold and i < j:
1219                     edges_list.append((i, j, {'weight': A[i, j]}))
1220                     node_index.append(i)
1221                     node_index.append(j)
1222 
1223     G = nx.Graph()
1224     G.add_edges_from(edges_list)
1225     node_index = list(set(node_index))
1226     print("RBFKernelsGraph: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1227 
1228     # Add  node attributes
1229     G = set_node_attributes(G, X)
1230     if G.number_of_nodes() < X.shape[0]:
1231         print("RBFKernelsGraph Fixing (Isolated Points) ......... ")
1232         # Fixed Graph
1233         node_all_index = [nd for nd in range(X.shape[0])]
1234         node_non_index = list(set(node_all_index) - set(node_index))
1235         for ndi in node_non_index:
1236             ptq = X[ndi:ndi + 1, :]
1237             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
1238             pty = X[indices[0]:indices[0] + 1, :]
1239             rbf = rbf_kernel(X=ptq, Y=pty, gamma=gamma)
1240             # add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
1241             # add_edge = [(ndi, indices[0], {'weight': rbf[0][0]})]
1242             w = np.sqrt(np.log(rbf[0][0] + 1e-5) / (-gamma)) / (distances[0] + 1e-5)
1243             add_edge = [(ndi, indices[0], {'weight': w})]
1244             G.add_edges_from(add_edge)
1245             # update the new node attributes
1246             G = update_node_attributes(G, X, node_new=ndi)
1247             node_index.append(ndi)
1248         print("RBFKernelsGraph(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(),
1249                                                                                         G.number_of_edges()))
1250     # # onnected components in G
1251     if merage_components:
1252         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1253         Len_C = len(connected_components)
1254         print('RBFKernelsGraph Components Numbers: ', Len_C)
1255         print("RBFKernelsGraph Components Connecting ......... ")
1256         while Len_C >= 2:
1257             first_components = connected_components[0]
1258             # first_sub_graph
1259             first_sub_graph = G.subgraph(first_components).copy()
1260 
1261             # remove_nodes_from first_sub_graph for G
1262             G.remove_nodes_from(first_components)
1263 
1264             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
1265             points = X[index_v, :]
1266 
1267             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
1268 
1269             index_us, distances = list(index_us), list(distances)
1270             min_index = distances.index(min(distances))  # 返回最小值 index
1271             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
1272 
1273             ptv, ptu = X[v:v + 1, :], X[u:u + 1, :]
1274             rbf = rbf_kernel(X=ptv, Y=ptu, gamma=gamma)
1275             w = np.sqrt(np.log(rbf[0][0] + 1e-5) / (-gamma)) / (dis + 1e-5)
1276             bridge_edge = [(v, u, {'weight': w})]
1277             # bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})] # only distance
1278 
1279             # add bridge edge
1280             G.add_edges_from(bridge_edge)
1281 
1282             # update the new node attributes
1283             G = update_node_attributes(G, X, node_new=v)
1284 
1285             # add the subgraph to G
1286             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
1287             G.add_edges_from(sub_graph_edges)
1288             nodelist = [node for node in G.nodes(data=False)]
1289             G = set_part_node_attributes(G, nodelist, X)
1290 
1291             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
1292             Len_C = len(connected_components)
1293         print("RBFKernelsGraph(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(),
1294                                                                                           G.number_of_edges()))
1295 
1296     if shpfile:
1297         X_pca = XDecomposition(X, n_components=2)
1298         if X_pca.shape[1] <= X.shape[1]:
1299             Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
1300             X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
1301             print("Shpfile is saved")
1302 
1303     return G
1304 
1305 # Priori Knowledge 1
1306 def get_pk_MiniBatchKMeans(X, nodes, n_clusters=2, init_size=2, batch_size=500, n_init=10, max_no_improvement=10, verbose=0):
1307     # # # # # # # # # # # # # # Priori Knowledges # # # # # # # # # # # # # # #
1308     from sklearn.cluster import MiniBatchKMeans
1309     from sklearn import metrics
1310     # Priori Knowledge 1
1311     # n_clusters = 2542
1312     mbkm = MiniBatchKMeans(init='k-means++', n_clusters=n_clusters,
1313                            batch_size=batch_size, n_init=n_init,
1314                            init_size=init_size,
1315                            max_no_improvement=max_no_improvement, verbose=verbose).fit(X)
1316     labels_ = mbkm.labels_
1317     print("X -> SihCoe score: %0.3f" % metrics.silhouette_score(X, labels_))
1318     print("X -> CN score: %0.3f" % metrics.calinski_harabasz_score(X, labels_))
1319     pk_partion = []
1320     if -1 in labels_:
1321         index = np.where(labels_ == -1)[0]
1322         pk_p = nodes[index].tolist()
1323         for pk in pk_p:
1324             pk_partion.append([pk])
1325         n_clusters = len(set(labels_)) - 1
1326         # print('n_clusters', n_clusters)
1327         for lab in range(0, n_clusters, 1):
1328             index = np.where(labels_ == lab)[0]
1329             pk_p = nodes[index].tolist()
1330             pk_partion.append(pk_p)
1331     else:
1332         n_clusters = len(set(labels_))
1333         for lab in range(0, n_clusters, 1):
1334             index = np.where(labels_ == lab)[0]
1335             pk_p = nodes[index].tolist()
1336             pk_partion.append(pk_p)
1337     # sort
1338     results = []
1339     for sb_result in pk_partion:
1340         sb_result.sort()
1341         results.append(sb_result)
1342     results.sort()
1343     pk_partion = results
1344 
1345     output = open('mbkm_pk_partion.pkl', 'wb')
1346     pickle.dump(pk_partion, output)
1347     output.close()
1348 
1349     return pk_partion
1350 # Priori Knowledge 2
1351 def get_pk_DBSCAN(X, nodes, eps=0.02, min_samples=25):
1352     # Priori Knowledge 2
1353     from sklearn.cluster import DBSCAN
1354     from sklearn import metrics
1355     db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
1356     labels_ = db.labels_
1357     print("X -> SC score: %0.3f" % metrics.silhouette_score(X, labels_))
1358     print("X -> CN score: %0.3f" % metrics.calinski_harabasz_score(X, labels_))
1359     pk_partion = []
1360     if -1 in labels_:
1361         index = np.where(labels_ == -1)[0]
1362         pk_p = nodes[index].tolist()
1363         for pk in pk_p:
1364             pk_partion.append([pk])
1365         n_clusters = len(set(labels_)) - 1
1366         for lab in range(0, n_clusters, 1):
1367             index = np.where(labels_ == lab)[0]
1368             pk_p = nodes[index].tolist()
1369             pk_partion.append(pk_p)
1370     else:
1371         n_clusters = len(set(labels_))
1372         print('n_clusters', n_clusters)
1373         for lab in range(0, n_clusters, 1):
1374             index = np.where(labels_ == lab)[0]
1375             pk_p = nodes[index].tolist()
1376             pk_partion.append(pk_p)
1377     # sort
1378     results = []
1379     for sb_result in pk_partion:
1380         sb_result.sort()
1381         results.append(sb_result)
1382     results.sort()
1383     pk_partion = results
1384     # print(pk_partion)
1385     # print(len(pk_partion))
1386 
1387     output = open('db_pk_partion.pkl', 'wb')
1388     pickle.dump(pk_partion, output)
1389     output.close()
1390 
1391     return pk_partion
1392 
1393 # get_X_fromSHP
1394 def get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True):
1395     gdata = gpd.read_file(filename=filename, bbox=None, mask=None, rows=None)#读取磁盘上的矢量文件
1396     # gdata = gpd.read_file(filename=r'CUsersjeshyDocumentsArcGISJ50.gdb', layer='BOUA')#读取gdb中的矢量数据
1397     print('Input geodata crs:', gdata.crs)  # 查看数据对应的投影信息(坐标参考系) ---> epsg == 4326  (WGS_84)
1398     gdata = gdata.to_crs(epsg=epsg4326_srid)# epsg == 2436 (X, Y)
1399     print('Output geodata crs:', gdata.crs)
1400     print('Column names', gdata.columns.values)# 列名
1401     # print(gdata.head())  # 查看前5行数据
1402     if plot:
1403         gdata.plot()
1404         plt.show()#展示
1405 
1406     geo = gdata.geometry.values
1407     X0, Y0 = geo.x, geo.y
1408     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
1409     for i in range(X0.shape[0]):
1410         X[i, 0], X[i, 1] = X0[i],  Y0[i]
1411 
1412     print('n_samples:', X0.shape[0])
1413     output = open('X.pkl', 'wb')
1414     pickle.dump(X, output)
1415     output.close()
1416     print('X.pkl is saved')
1417 
1418     from sklearn.preprocessing import StandardScaler
1419     X_trans = StandardScaler().fit_transform(X)
1420     output = open('trans_X.pkl', 'wb')
1421     pickle.dump(X_trans, output)
1422     output.close()
1423     print('trans_X.pkl is saved')
1424 
1425     return X
1426 # get_G_X_formOSM
1427 def get_G_X_formOSM(osmfile=r'./osm/bj_six_ring.osm', simplify=True, gexf_save=True, show_graph=False):
1428     from networkx import Graph
1429     from osmnx import graph_from_xml, plot_graph, settings, config
1430     # settings and config
1431     utn = settings.useful_tags_node
1432     oxna = settings.osm_xml_node_attrs
1433     oxnt = settings.osm_xml_node_tags
1434     utw = settings.useful_tags_way
1435     oxwa = settings.osm_xml_way_attrs
1436     oxwt = settings.osm_xml_way_tags
1437     utn = list(set(utn + oxna + oxnt))
1438     utw = list(set(utw + oxwa + oxwt))
1439     crs = settings.default_crs
1440     config(all_oneway=True, useful_tags_node=utn, useful_tags_way=utw, default_crs=crs)
1441 
1442     root = et.parse(osmfile).getroot()
1443     nodes_dict = {}
1444     for node in root.findall('node'):
1445         nodes_dict[int(node.attrib['id'])] = (float(node.attrib['lon']), float(node.attrib['lat']))
1446 
1447     M = graph_from_xml(osmfile, simplify=simplify)
1448     # print('G: number_of_nodes:', M.number_of_nodes(), 'number_of_edges:', M.number_of_edges())
1449 
1450     # plot_graph
1451     if show_graph:
1452         plot_graph(M,
1453                     figsize=(16, 16),
1454                     bgcolor=None,
1455                     node_color="#999999", node_size=15, node_alpha=None, node_edgecolor="none",
1456                     edge_color="#999999", edge_linewidth=1, edge_alpha=None,
1457                     dpi=600,
1458                     save=True, filepath='./osm.png')
1459 
1460     # Convert M to Graph G
1461     G = nx.Graph()
1462     G.add_edges_from(Graph(M).edges())
1463     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1464 
1465     # write_gexf
1466     if gexf_save:
1467         nx.write_gexf(G, 'osm.gexf')
1468 
1469     # relabel node id
1470     mapping, index, X = {}, 0, []
1471     for node in G.nodes:
1472         mapping[node] = index
1473         lon, lat = nodes_dict[node][0], nodes_dict[node][1]
1474         X.append(np.array([lon, lat], dtype=float))
1475         index += 1
1476     X = np.array(X, dtype=float)
1477     G = nx.relabel_nodes(G, mapping)
1478 
1479     return G, X
1480 
1481 def get_initial_partition(pk_partion):
1482     initial_partition = {nd: clus for clus in range(len(pk_partion)) for nd in pk_partion[clus]}
1483     return initial_partition
1484 def findPrioriKnowledges(G, initial_partition=None, edge_data_flg=False):
1485     import infomap
1486     infomapX = infomap.Infomap("--two-level")
1487     # print("Building Infomap network from a NetworkX graph...")
1488 
1489     if edge_data_flg:
1490         for v, u, w in G.edges(data=edge_data_flg):
1491             e = (v, u, w['weight'])
1492             infomapX.addLink(*e)
1493     else:
1494         for e in G.edges(data=edge_data_flg):
1495             infomapX.addLink(*e)
1496 
1497     # print("Find communities with Infomap...")
1498     infomapX.run(initial_partition=initial_partition, silent=True)
1499     # print("Found {} modules with codelength: {}".format(infomapX.numTopModules(), infomapX.codelength))
1500     communities = {}
1501     for node in infomapX.iterLeafNodes():
1502         communities[node.physicalId] = node.moduleIndex()
1503     nx.set_node_attributes(G, values=communities, name='community')
1504     return infomapX.numTopModules(), communities
1505 # color map from http://colorbrewer2.org/
1506 def drawGraph(G):
1507     import matplotlib.colors as colors
1508     # position map
1509     # pos = nx.spring_layout(G)
1510     pos = nx.kamada_kawai_layout(G)
1511     # community ids
1512     communities = [v for k, v in nx.get_node_attributes(G, 'community').items()]
1513     numCommunities = max(communities) + 1
1514     # color map from http://colorbrewer2.org/
1515     cmapLight = colors.ListedColormap(['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f'], 'indexed',
1516                                       numCommunities)
1517     cmapDark = colors.ListedColormap(['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928'], 'indexed', numCommunities)
1518     # Draw edges
1519     nx.draw_networkx_edges(G, pos)
1520     # Draw nodes
1521     nodeCollection = nx.draw_networkx_nodes(G,
1522                                             pos=pos,
1523                                             node_color=communities,
1524                                             cmap=cmapLight)
1525     # Set node border color to the darker shade
1526     darkColors = [cmapDark(v) for v in communities]
1527     nodeCollection.set_edgecolor(darkColors)
1528     # Draw node labels
1529     for n in G.nodes():
1530         plt.annotate(n,
1531                      xy=pos[n],
1532                      textcoords='offset points',
1533                      horizontalalignment='center',
1534                      verticalalignment='center',
1535                      xytext=[0, 0],
1536                      color=cmapDark(communities[n]))
1537     plt.axis('off')
1538     # plt.savefig("karate.png")
1539     plt.show()
1540 # Priori Knowledge 3
1541 def get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=False):
1542     # print("Partition by Infomap ..........")
1543     numModules, pks = findPrioriKnowledges(G, initial_partition=initial_partition, edge_data_flg=edge_data_flg)
1544     if gshow:
1545         drawGraph(G)
1546     list_values = [val for val in pks.values()]
1547     list_nodes = [node for node in pks.keys()]
1548     print('pk sortting .......')
1549     result = []
1550     for ndpa in range(numModules):
1551         nodes_part_index = np.argwhere(np.array(list_values) == ndpa).ravel()
1552         nodes_part = list(np.array(list_nodes)[nodes_part_index])
1553         nodes_part.sort()
1554         result.append(nodes_part)
1555     result.sort()
1556     # print("pk size:", len(result))
1557     # print("pk result:", result)
1558 
1559     # output = open('info_pk_partion.pkl', 'wb')
1560     # pickle.dump(result, output)
1561     # output.close()
1562 
1563     return result
1564 
1565 
1566 # MaxDIClustering Mian Class
1567 class MaxDIClustering():
1568     def __init__(self, affinity='radius',
1569                  n_neighbors=10,  # getKNNGraph
1570                  threshold=2.25,  # getKNNGraph, getRBFKernelsGraph
1571                  radius=2.25,  # getRadiusGraph
1572                  gamma=0.25,
1573                  # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
1574                  merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
1575                  pk='1',  # -- PK
1576                  n_clusters=2542, init_size=2542,  # get_pk_MiniBatchKMeans -- PK
1577                  eps=1000, min_samples=50,  # get_pk_DBSCAN -- PK
1578                  n_jobs=None):
1579         self.affinity = affinity
1580         self.radius = radius
1581         self.n_neighbors = n_neighbors
1582         self.threshold = threshold
1583         self.gamma = gamma  # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
1584         self.merage_components = merage_components
1585         self.n_clusters = n_clusters,
1586         self.init_size = init_size,
1587         self.eps = eps
1588         self.min_samples = min_samples
1589         self.pk = pk
1590         self.n_jobs = n_jobs
1591 
1592     def fit(self, X):
1593         if self.affinity == 'radius':
1594             self.G = getRadiusGraph(X, radius=self.radius, merage_components=self.merage_components, save=False,
1595                                     shpfile=False)
1596         elif self.affinity == 'knn':
1597             self.G = getKNNGraph(X, n_neighbors=self.n_neighbors, threshold=self.threshold,
1598                                  merage_components=self.merage_components, save=False, shpfile=False)
1599         elif self.affinity == 'rbf':  # RBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
1600             self.G = getRBFKernelsGraph(X, gamma=self.gamma, threshold=self.threshold,
1601                                         merage_components=self.merage_components, n_jobs=self.n_jobs, shpfile=False)
1602         else:
1603             raise ValueError("Unknown Graph Type %r" % self.affinity)
1604 
1605         self.nodes = np.array([nd for nd in self.G.nodes], dtype=int)
1606 
1607         # Priori Knowledge 1
1608         if self.pk == '1':
1609             pk_partion_hat = get_pk_MiniBatchKMeans(X, nodes=self.nodes, n_clusters=self.n_clusters,
1610                                                 init_size=self.init_size)
1611             # pk_partion_hat = get_PKResult(self.G, initial_partition=get_initial_partition(pk_partion_hat), gshow=False,
1612             #                               edge_data_flg=True)
1613             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
1614                                           verbose=0)
1615         # Priori Knowledge 2
1616         elif self.pk == '2':
1617             pk_partion_hat = get_pk_DBSCAN(X_trans=X, nodes=self.nodes, eps=self.eps, min_samples=self.min_samples)
1618             # pk_partion_hat = get_PKResult(self.G, initial_partition=get_initial_partition(pk_partion_hat), gshow=False,
1619             #                               edge_data_flg=True)
1620             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
1621                                           verbose=0)
1622         # Priori Knowledge 3
1623         elif self.pk == '3':
1624             pk_partion_hat = get_PKResult(self.G, initial_partition=None, gshow=False, edge_data_flg=True)
1625             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
1626                                           verbose=0)
1627         # None Priori Knowledge
1628         else:
1629             pk_partion_hat = None
1630 
1631         self.OSIE_ = get_oneStruInforEntropy(self.G, weight='weight')
1632         self.OSIE_ = float('%.3f' % self.OSIE_)
1633 
1634         self.results_, self.DIs_ = iter_maxDI(self.G, iter_max=100, pk_partion=pk_partion_hat, weight='weight',
1635                                              n_jobs=self.n_jobs, verbose=0)
1636         self.DI_ = self.DIs_[-1]
1637 
1638         self.labels_ = label_result(nsamples=X.shape[0], result=self.results_).astype(int)
1639 
1640         self.DI_rate_ = self.DI_ / self.OSIE_
1641 
1642         from sklearn.metrics import silhouette_score, calinski_harabasz_score
1643         self.si_score_ = silhouette_score(X, self.labels_, metric='euclidean')
1644         self.ch_score_ = calinski_harabasz_score(X, self.labels_)
1645 
1646         return self
1647 
1648 
1649 # # # # # # # # # # # # # # # test and Demonstration # # # # # # # # # # # # # # # #
1650 # test cKDTree and pairwise_kernels Graph
1651 def test():
1652 
1653     # test
1654     output = open('park_X.pkl', 'rb')
1655     X = pickle.load(output)
1656     output.close()
1657     output = open('busstop_X.pkl', 'rb')
1658     X = pickle.load(output)
1659     output.close()
1660 
1661     # m, n = X.shape
1662 
1663     # show
1664     print(X.shape)# (42161, 2)
1665     print(X[[0, 1, 2]])# [[ 451389.26206748 4425262.84838121] .... ]
1666 
1667     # cKDTree
1668     tree = cKDTree(data=X, leafsize=8, compact_nodes=True, balanced_tree=True)
1669     X_query = X[[0, 1, 2]]
1670     dis, index = tree.query(x=X_query, k=1, n_jobs=-1)
1671     X_query_out = X[index]
1672 
1673     print(dis, index, sep='
')# [0. 0. 0.] 
 [0   1   6931]
1674     print(X_query)# [[ 451389.26206748 4425262.84838121] ... ]
1675     print(X_query_out)# [[ 451389.26206748 4425262.84838121] ... ]
1676 
1677     M = 25000# 42161 --> size is too large.
1678     threshold = 0.998
1679 
1680     params = {}
1681     params['gama'] = 1
1682     K = pairwise_kernels(X[:M, :], metric='rbf', filter_params=True, n_jobs=-1, **params)# A kernel matrix K
1683 
1684     # K.tocoo()
1685     Kcsr = sp.csr_matrix(K)
1686     K = Kcsr.tocoo()
1687     print('A kernel matrix K: ', K.shape)
1688     # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
1689 
1690     affinity = 0.5 * (K + K.T)
1691     edges_list, node_index = [], []
1692     if hasattr(affinity, 'tocoo'):
1693         A = affinity.tocoo()
1694         # edges_list = [(i, j, {'weight': (v + 1e-5)}) for i, j, v in zip(A.row, A.col, A.data) if
1695         #               v >= threshold and i < j]  # coo matrix
1696         for i, j, v in zip(A.row, A.col, A.data):  # coo matrix
1697             if v >= threshold and i < j:
1698                 edges_list.append((i, j, {'weight': v}))
1699                 node_index.append(i)
1700                 node_index.append(j)
1701     # else:
1702     #     A = affinity
1703     #     (m, _) = A.shape
1704     #     # edges_list = [(i, j, {'weight': (A[i, j] + 1e-5)}) for i in range(m) for j in range(m) if
1705     #     #               A[i, j] >= threshold and i < j]
1706     #     for i in range(m):
1707     #         for j in range(m):
1708     #             if A[i, j] >= threshold and i < j:
1709     #                 edges_list.append((i, j, {'weight': A[i, j]}))
1710     #                 node_index.append(i)
1711     #                 node_index.append(j)
1712 
1713     G = nx.Graph()
1714     G.add_edges_from(edges_list)
1715     # node_index = list(set(node_index))
1716     print("KernelsGraph: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
1717 
1718     # FixingGraph
1719     G = FixingGraph(G, X, merage_components=True)
1720 
1721     # save
1722     output = open('busstop_KernelsGraph_part.pkl', 'wb')
1723     pickle.dump(G, output)
1724     output.close()
1725 # rbf_Clustering by MaxDIClustering
1726 def rbf_Clustering():
1727     # Load the geodata
1728     output = open('park_X.pkl', 'rb')# park_X = get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True)
1729     park_X = pickle.load(output)
1730     output.close()
1731 
1732     output = open('busstop_X.pkl', 'rb')# busstop_X = get_X_fromSHP(filename=r'./busstop/BeijingBusStops_84.shp', epsg4326_srid=2436, plot=True)
1733     busstop_X = pickle.load(output)# (42161, 2) # Maybe MemoryError --> A RBF kernel matrix K: (42161, 42161)!
1734     output.close()
1735 
1736     output = open('./busstop_RBFGraph.pkl', 'rb')
1737     RBFGraph = pickle.load(output)
1738     output.close()
1739 
1740     print('------------------------------Extract Partion Outlines----------------------------------')
1741     # Select Anyone Graph Extracted Method: getRadiusGraph、getKNNGraph、getRBFKernelsGraph
1742     G = getRadiusGraph(X=park_X, radius=500, merage_components=True, save=False, shpfile=False)
1743     partion_result = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
1744     outline_gdf = get_partion_result_outline(X=park_X, partion_result=partion_result, input_crs="EPSG:2436",
1745                                              output_crs=None,
1746                                              shpfile="partion_outline_shp.shp", save_shp=True, plot=False)
1747     output = open('Extract_Partion_Outlines_gdf.pkl', 'wb')
1748     pickle.dump(outline_gdf, output)
1749     output.close()
1750     print('------------------------------Extract Partion Outlines End----------------------------------')
1751 
1752     print('------------------------------RBFKernelsGraph----------------------------------')
1753     print('park_X......')
1754     X = park_X
1755     # Clustering by MaxDIClustering
1756     max_di_clus = MaxDIClustering(affinity='rbf',
1757                                   threshold=0.998,  # getKNNGraph, getRBFKernelsGraph
1758                                   gamma=1.0,
1759                                   # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
1760                                   merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
1761                                   pk='3',  # -- PK
1762                                   n_jobs=4).fit(X)
1763     output = open('park_X_max_di_clus_rbf.pkl', 'wb')
1764     pickle.dump(max_di_clus, output)
1765     output.close()
1766     print('DI_', max_di_clus.DI_)
1767     print('OSIE_', max_di_clus.OSIE_)
1768     print('DI_rate_', max_di_clus.DI_rate_)
1769     print('si_score_', max_di_clus.si_score_)
1770     # DI_ 6.775
1771     # OSIE_ 9.205
1772     # DI_rate_ 0.736
1773     # si_score_ 0.299
1774 
1775     print('busstop_X......')
1776     X = busstop_X  # (42161, 2) # MemoryError: Unable to allocate 13.2 GiB for an array with shape (42161, 42161) and data type float64
1777     # # X clip by beijing-6ring-road.osm # #
1778     X_gdf = X2GDF(X, input_crs="EPSG:2436", output_crs=None, plot=None)
1779     # _, __, gpoly_mask = get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_car_seg_6ring road.osm", toepsg=2436)  # (23946, 2) # MemoryError too
1780     _, __, gpoly_mask = get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_5ring.osm", toepsg=2436)
1781     gdata = gpd.clip(gdf=X_gdf, mask=gpoly_mask, keep_geom_type=True)
1782     X = GDF2X(gdata, to_epsg=2436, plot=False)
1783     # Clustering by MaxDIClustering
1784     max_di_clus = MaxDIClustering(affinity='rbf',
1785                                   threshold=0.998,  # getKNNGraph, getRBFKernelsGraph
1786                                   gamma=1.0,
1787                                   # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = sigma^-2 ; sigma^2 is the Gaussian kernel of variance.
1788                                   merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
1789                                   pk='3',  # -- PK
1790                                   n_jobs=4).fit(X)
1791     output = open('busstop_X_max_di_clus_rbf.pkl', 'wb')
1792     pickle.dump(max_di_clus, output)
1793     output.close()
1794     print('DI_', max_di_clus.DI_)
1795     print('OSIE_', max_di_clus.OSIE_)
1796     print('DI_rate_', max_di_clus.DI_rate_)
1797     print('si_score_', max_di_clus.si_score_)
1798     # A kernel matrix K:  (13318, 13318)
1799     # RBFKernelsGraph: number_of_nodes:8278, number_of_edges:15797
1800     # RBFKernelsGraph Fixing (Isolated Points) .........
1801     # RBFKernelsGraph(Fixed): number_of_nodes:13318, number_of_edges:20837
1802     # RBFKernelsGraph Components Numbers:  2271
1803     # RBFKernelsGraph Components Connecting .........
1804     # RBFKernelsGraph(Finally): number_of_nodes:13318, number_of_edges:23107
1805     # DI_ 7.862
1806     # OSIE_ 9.827
1807     # DI_rate_ 0.800
1808     # si_score_ 0.694
1809 
1810     G = RBFGraph  # 提前使用更高配置计算机计算得到的 RBF-Graph
1811     X = busstop_X  # (42161, 2)
1812     # FixingGraph
1813     G = FixingGraph(G, X, merage_components=True)
1814 
1815     pk_partion_hat = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
1816     OSIE_ = float('%.3f' % get_oneStruInforEntropy(G, weight='weight'))
1817 
1818     # pk_DI_ = get_DI_Parallel(G, partion=pk_partion_hat, weight='weight', n_jobs=-1, verbose=0)
1819     results_, DI_ = iter_maxDI(G, iter_max=100, pk_partion=pk_partion_hat, weight='weight', n_jobs=-1, verbose=0)
1820 
1821     DI_ = DI_[-1]
1822     labels_ = label_result(nsamples=X.shape[0], result=results_).astype(int)
1823     DI_rate_ = DI_ / OSIE_
1824     from sklearn.metrics import silhouette_score, calinski_harabasz_score
1825     si_score_ = silhouette_score(X, labels_, metric='euclidean')
1826     ch_score_ = calinski_harabasz_score(X, labels_)
1827     print('DI_', DI_)
1828     print('OSIE_', OSIE_)
1829     print('DI_rate_', DI_rate_)
1830     print('si_score_', si_score_)
1831     # DI_ 8.751
1832     # OSIE_ 10.52
1833     # DI_rate_ 0.832
1834     # si_score_ 0.587
1835     print('------------------------------RBFKernelsGraph - end----------------------------------')
1836 # test_StruInforEntropy
1837 def test_StruInforEntropy():
1838     # 无权重图 or all weights == 1.0
1839     G = nx.Graph()
1840     edges_list = [(0, 1, {'weight': 1.0}),
1841                   (0, 2, {'weight': 1.0}),  # test: outer point
1842                   (0, 3, {'weight': 1.0}),
1843                   # (0, 4, {'weight': 1.0}),
1844                   (1, 2, {'weight': 1.0}),  # test: outer point
1845                   (1, 3, {'weight': 1.0}),
1846                   (2, 3, {'weight': 1.0}),  # test: outer point
1847                   (2, 4, {'weight': 5.0}),
1848                   (4, 5, {'weight': 1.0}),
1849                   (4, 7, {'weight': 1.0}),
1850                   (5, 6, {'weight': 1.0}),
1851                   (5, 7, {'weight': 1.0}),
1852                   (5, 8, {'weight': 1.0}),
1853                   (6, 8, {'weight': 1.0}),
1854                   (7, 8, {'weight': 1.0}),
1855                   (8, 9, {'weight': 5.0}),
1856                   (9, 10, {'weight': 1.0}),
1857                   (9, 12, {'weight': 1.0}),
1858                   (9, 13, {'weight': 1.0}),
1859                   (10, 11, {'weight': 1.0}),
1860                   (10, 12, {'weight': 1.0}),
1861                   (11, 12, {'weight': 1.0}),
1862                   (11, 13, {'weight': 1.0}),
1863                   (12, 13, {'weight': 1.0})]
1864     G.add_edges_from(edges_list)
1865 
1866     # plot graph
1867     show_graph(G, with_labels=True)
1868 
1869     # a = [9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1870     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1871     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1872     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1873     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1874     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1875     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1876     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1877     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1878     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1879     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
1880     #      10.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492]
1881     # total_var = np.var(a)  # 计算总体方差 ( 计算时除以样本数 N )
1882     # sample_var = np.var(a, ddof=1)  # 计算样本方差 ( 计算时除以 N - 1 )
1883 
1884     # # min2DStruInforEntropyPartition
1885     # results = min2DStruInforEntropyPartition(G, weight='weight')
1886     # print("Partition-Size(by min2DHG):", len(results))
1887     # print("Partition(by min2DHG):", results)
1888     # # 2D-StruInforEntropy
1889     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
1890     # oneHG = get_oneStruInforEntropy(G, weight='weight')
1891     # print("1DStruInforEntropy:", oneHG)
1892     # print("2DStruInforEntropy:", HG)
1893     # print('--'*15)
1894 
1895     # # maxDI
1896     # results = maxDIPartition(G, weight='weight')# 基础版
1897     # print("Partition-Size(by maxDI):", len(results))
1898     # print("Partition(by maxDI):", results)
1899     # # 2D-StruInforEntropy
1900     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
1901     # oneHG = get_oneStruInforEntropy(G, weight='weight')
1902     # print("1DStruInforEntropy:", oneHG)
1903     # print("2DStruInforEntropy:", HG)
1904     # print("maxDI:", oneHG-HG)
1905     # oneHG, Vj, g_j, Gj_deg = get_oneStruInforEntropy_Partition(G, partition=results, weight='weight')
1906     # print("1DStruInforEntropy_Partition:", oneHG)
1907 
1908     # print('--'*15)
1909     # results = [[0, 1, 2, 3], [4, 5, 6, 7, 8], [9, 10, 11, 12, 13]]
1910     # print(results)
1911     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
1912     # print("2DStruInforEntropy:", HG)
1913     # print('Vj', Vj)
1914     # print('g_j', g_j)
1915     # print('--'*15)
1916 
1917     # edges_di = []
1918     # edges = [(u, v, w['weight']) for u, v, w in G.edges(data=True)]
1919     # row, col, data = get_coo_matrix_from_G(G, weight='weight')
1920     # for edge in edges:
1921     #     # DI_ij = delt_RXij(Xi=[edge[0]], Xj=[edge[1]], G=G, weight='weight')
1922     #     DI_ij = deltDI_ij(row, col, data, ndq_a=[edge[0]], ndq_b=[edge[1]])
1923     #     DI_ij = float("%.3f" % (DI_ij))
1924     #     delt_di = ((edge[0], edge[1]), edge[2], DI_ij)
1925     #     edges_di.append(delt_di)
1926     #     print(delt_di)
1927 
1928     # print('--' * 15)
1929     # results, DI = iter_maxDI(G, iter_max=50, pk_partion=None, weight='weight', n_jobs=4, verbose=0)  # 并行加速
1930     # print(results)
1931     # print(DI[-1])
1932     # print(DI)
1933     # print('--' * 15)
1934 
1935     # results, DI = get_iter_maxDICluster(G, iter_max=50)  # 并行加速
1936     # print(results)
1937     # print(DI[-1])
1938     # print(DI)
1939     # print('--' * 15)
1940 
1941     # maxDI
1942     results = maxDI(G, weight='weight')  # 优化版
1943     print("Partition-Size(by maxDI):", len(results))
1944     # 2D-StruInforEntropy
1945     HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
1946     oneHG = get_oneStruInforEntropy(G, weight='weight')
1947     print("1DStruInforEntropy:", float('%.3f' % oneHG))
1948     print("2DStruInforEntropy:", float('%.3f' % HG))
1949     # get_DI by Parallel
1950     DI = get_DI_Parallel(G, partion=results, weight='weight', n_jobs=-1)
1951     print("DI:", DI)
1952     partion_test = [[0, 1, 2, 3], [4, 5, 6, 7, 8], [9, 10, 11, 12, 13]]
1953     DI = get_DI_Parallel(G, partion=partion_test, weight='weight', n_jobs=-1)
1954     print("test-DI:", DI)
1955     row, col, data = get_coo_matrix_from_G(G, weight='weight')
1956     for i in range(len(results)):
1957         for j in range(i+1, len(results)):
1958             ndq_a, ndq_b = results[i], results[j]
1959             DI_ij = deltDI_ij(row, col, data, ndq_a=ndq_a, ndq_b=ndq_b)
1960             DI_ij = float("%.3f" % (DI_ij))
1961             print(ndq_a, '+', ndq_b, '-', ndq_a + ndq_b, 'deltDI=', DI_ij)
1962 
1963     # row, col, data = get_coo_matrix_from_G(G, weight='weight')
1964     # ndq_a = [0, 1, 2, 3]
1965     # ndq_b = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
1966     # Vi, Vj, Vij, gi, gj, gij, deg_i, deg_j, deg_ij = get_Vgdeg(row, col, data, ndq_a, ndq_b)
1967     # V_G = np.sum(data) * 2.0
1968     # ndq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
1969     # Vi, gi, deg_i = get_Vigidegi(row, col, data, ndq)
1970 # typical_MaxDIClustering_Demonstration
1971 def typical_MaxDIClustering_Demonstration():
1972     # Load the geodata
1973     output = open('park_X.pkl', 'rb')# park_X = get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True)
1974     park_X = pickle.load(output)
1975     output.close()
1976 
1977     output = open('busstop_X.pkl', 'rb')# busstop_X = get_X_fromSHP(filename=r'./busstop/BeijingBusStops_84.shp', epsg4326_srid=2436, plot=True)
1978     busstop_X = pickle.load(output)# (42161, 2) # Maybe MemoryError --> A RBF kernel matrix K: (42161, 42161)!
1979     output.close()
1980 
1981     print('------------------------------Extract Partion Outlines----------------------------------')
1982     # Select Anyone Graph Extracted Method: getRadiusGraph、getKNNGraph、getRBFKernelsGraph
1983     G = getRadiusGraph(X=park_X, radius=500, merage_components=True, save=False, shpfile=False)
1984     partion_result = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
1985     outline_gdf = get_partion_result_outline(X=park_X, partion_result=partion_result, input_crs="EPSG:2436",
1986                                              output_crs=None,
1987                                              shpfile="partion_outline_shp.shp", save_shp=True, plot=False)
1988     output = open('Extract_Partion_Outlines_gdf.pkl', 'wb')
1989     pickle.dump(outline_gdf, output)
1990     output.close()
1991     print('------------------------------Extract Partion Outlines End----------------------------------')
1992 
1993     # Clustering for demonstration
1994     X = park_X
1995     # Clustering by MaxDIClustering
1996     max_di_clus = MaxDIClustering(affinity='rbf',
1997                                   threshold=0.998,  # other: getKNNGraph, getRBFKernelsGraph
1998                                   gamma=1.0,
1999                                   # for getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
2000                                   merage_components=True,  # other: getKNNGraph, getRadiusGraph, getRBFKernelsGraph
2001                                   pk='3',  # -- PK
2002                                   n_jobs=4).fit(X)
2003     print('DI_', max_di_clus.DI_)
2004     print('OSIE_', max_di_clus.OSIE_)
2005     print('DI_rate_', max_di_clus.DI_rate_)
2006     print('si_score_', max_di_clus.si_score_)
2007 # # # # # # # # # # # # # # # test and Demonstration - end # # # # # # # # # # # # # # # #
2008 
2009 def main():
2010     # # 1. test cKDTree and pairwise_kernels Graph
2011     # test()
2012 
2013     # 2. test_StruInforEntropy
2014     test_StruInforEntropy()
2015 
2016     # # 3. rbf_Clustering by MaxDIClustering
2017     # rbf_Clustering()
2018 
2019     # # 4. typical_MaxDIClustering_Demonstration
2020     # typical_MaxDIClustering_Demonstration()
2021 
2022 if __name__ == '__main__':
2023     main()
个人学习记录
原文地址:https://www.cnblogs.com/jeshy/p/15110393.html