k-Means聚类算法

聚类算法是ML中一个重要分支,一般采用unsupervised learning进行学习,聚类算法分为K-Means, K-Medoids, GMM, Spectral clustering,Ncut五个算法;本文将实现K-eans算法。

K-Means算法:

       1. 将数据分为k个非空子集

       2. 计算每个类中心点(k-means<centroid>中心点是所有点的average),记为seed point

       3. 将每个object聚类到最近seed point

       4. 返回2,当聚类结果不再变化的时候stop

复杂度:

       O(kndt)

       -计算两点间距离:d

       -指定类:O(kn)   ,k是类数

       -迭代次数上限:t

KMeans.h

  1 #include "stdafx.h"
  2 #include <iostream>
  3 using namespace std;
  4 template <typename Type>
  5 class KMeans{
  6 public:
  7     KMeans(const size_t nd =0,const int nk=0,const float precision = 0.0001):m_ndataNumbers(nd),
  8         m_nkNumbers(nk),m_iterations(0),m_datas(NULL),m_center(NULL),m_precision(precision){
  9         m_datas = new Type[m_ndataNumbers];
 10         m_center = new Type[m_nkNumbers];
 11     }
 12     KMeans(Type[],Type[],const size_t ,const int,const float);
 13     ~KMeans(){
 14         delete[]m_datas;
 15         delete[]m_center;
 16     }
 17     
 18     Type* getDatas()const;              // get the datas
 19     Type* getCenter()const;         // get the centers
 20     int iterationTimes()const;      // iteration times 
 21     void kmeans();        // carry out k-means 
 22     void printCenter(); // cout center
 23 private:
 24     float dataDivide(Type* , size_t*);   // data divide
 25     void changeCenters(Type* , size_t*);   //  change centers 
 26 private:
 27     Type *m_datas;
 28     Type *m_center;
 29     const size_t m_ndataNumbers;   //data numbers
 30     const  int m_nkNumbers;   // center numbers
 31     const float m_precision;    // end iteration precision 
 32     int m_iterations;   //    carry out times
 33 };
 34  // initialize the datas and the center
 35 template <typename Type>
 36 KMeans<Type>::KMeans( Type datas[] ,Type center[],const size_t nd,const int nk,const float precision):
 37     m_ndataNumbers(nd),m_nkNumbers(nk),m_iterations(0),m_center(NULL),m_datas(NULL),m_precision(precision){
 38     m_datas = new Type[m_ndataNumbers];
 39     m_center=new Type[m_nkNumbers];
 40     for(size_t i = 0 ; i<m_ndataNumbers ;i++){
 41         m_datas[i] = datas[i];
 42     }
 43     for(int i = 0 ; i<m_nkNumbers ; i++){
 44         m_center[i] = center[i];
 45     }
 46 }
 47  template <typename Type>
 48  Type* KMeans<Type> ::getDatas()const{
 49      return this->m_datas;
 50  }
 51  // get the center
 52  template <typename Type>
 53  Type* KMeans<Type> ::getCenter()const{
 54      return this->m_center;
 55  }
 56  // get iteration times
 57  template<typename Type>
 58  int KMeans<Type>::iterationTimes()const{
 59      return this->m_iterations;
 60  }
 61  // carry out kmeans
 62  template<typename Type>
 63  void KMeans<Type>::kmeans(){
 64      float previous  = 0;   //  
 65      float current = 1;    
 66      size_t *numbers = new size_t[m_nkNumbers];   // record every cluster datas
 67      Type *sumvalues = new Type[m_nkNumbers];   // record every cluster values
 68      while((current-previous)>m_precision){ 
 69          // initialize zero 
 70          for(int i = 0 ; i<m_nkNumbers ; i++){
 71              numbers[i] = 0;
 72              sumvalues[i] =0;
 73          }
 74          previous = current;
 75          current = dataDivide(sumvalues,numbers);
 76          changeCenters(sumvalues,numbers);
 77          m_iterations++;
 78         
 79      }
 80      delete[] numbers;
 81      delete[] sumvalues;  
 82  }
 83  // data divide
 84 template <typename Type>
 85 float KMeans<Type>::dataDivide(Type*sumvalues , size_t*numbers){
 86     float dist = 0.0;
 87     for(size_t i = 0 ; i<m_ndataNumbers ; i++){
 88         float d = sqrt(float(m_datas[i]-m_center[0]));
 89         int pos = 0;
 90         for(int j =1 ; j <m_nkNumbers ; j++){
 91             if(d > sqrt(float(m_datas[i]-m_center[j]))){
 92                 d = sqrt(float(m_datas[i]-m_center[j]));
 93                 pos = j;
 94             }
 95         dist+=d;
 96         sumvalues[pos]+=m_datas[i];
 97         numbers[pos]++;
 98         }
 99     }
100     return dist;
101 }
102     // change the center
103 template<typename Type>
104 void KMeans<Type>::changeCenters(Type*sumvalues , size_t*numbers){
105     for(int i=0 ; i<m_nkNumbers ; i++){
106         if(numbers[i]==0)continue;
107         m_center[i] = sumvalues[i]/numbers[i];
108     }
109 }
110 template<typename Type>
111 void KMeans<Type>::printCenter(){
112     for(int i = 0 ; i<m_nkNumbers ;i++){
113         cout << "center " << i <<":  "<< m_center[i] <<endl;
114     }
115     cout <<endl;
116 }
原文地址:https://www.cnblogs.com/bobo0892/p/4002988.html