K-Modes算法[聚类算法]

聚类算法k-Modes的实现

  1 <?php
  2 /*
  3 *Kmodes算法(聚类算法的实现)
  4 */
  5 
  6 
  7 /*
  8 *获取簇的数目
  9 */
 10 //--------------------------------------------------------------------    
 11 function Category($train)
 12 {
 13     $category = array(NULL);//存放不同的类别
 14     array_splice($category,0,1);
 15     
 16     for($i=1;$i<count($train);$i++)
 17     {
 18         $flags = true;//标志,用于标记将要存入的类别是否已经存在
 19         for($j=0;$j<count($category);$j++)
 20         {
 21             if($category[$j]==$train[$i][count($train[$i])-1])
 22             {
 23                 $flags = false;
 24                  break;
 25             }
 26         }
 27         if($flags)
 28         {
 29             array_push($category,$train[$i][count($train[$i])-1]);
 30         }
 31     }
 32     return $category;
 33 }
 34 //--------------------------------------------------------------------
 35 
 36 /*
 37 *获得初始矩阵M
 38 */
 39 //--------------------------------------------------------------------
 40 function first_M($train)
 41 {
 42     $category = Category($train);
 43     $M = array(NULL);
 44     array_splice($M,0,1);
 45     $num = 1;
 46     for($j=0;$j<count($category);$j++)
 47     {
 48         while($num<count($train))
 49         {
 50             if($train[$num][count($train[$num])-1]==$category[$j])
 51             {
 52                 $temp = $train[$num];
 53                 //print_r($temp);
 54                 array_splice($temp,0,1);
 55                 array_splice($temp,count($temp)-1,1);
 56                 array_push($M,$temp);
 57                 $num++;
 58                 break;
 59             }else{
 60                 $num++;
 61             }
 62         }
 63     }
 64 /*    echo "<pre>";
 65     print_r($M);
 66 */
 67     return $M;
 68 }
 69 //--------------------------------------------------------------------
 70 
 71 
 72 /*
 73 *获得距离dis(ml,ei)
 74 */
 75 //--------------------------------------------------------------------
 76 function dis($array,$e)
 77 {
 78     $temp = $array;
 79     $sum = 0;
 80     for($i=1;$i<count($array)-1;$i++)
 81     {
 82         if($array[$i]!=$e[$i-1])
 83         {
 84             $sum++;
 85         }
 86     }
 87     return $sum;
 88 }
 89 //--------------------------------------------------------------------
 90 
 91 /*
 92 *获得新的矩阵W
 93 */
 94 //--------------------------------------------------------------------
 95 function W($train,$M)
 96 {
 97     $W = array(NULL);
 98 
 99     for($i=1;$i<count($train);$i++)
100     {
101         $flags = true;
102         $min = dis($train[$i],$M[0]);
103         for($j=2;$j<=count($M);$j++)
104         {
105             if(dis($train[$i],$M[$j-1])<$min)
106             {
107                 $min = dis($train[$j],$M[$j-1]);
108             }
109         }
110         
111         for($j=1;$j<=count($M);$j++)
112         {
113             if(dis($train[$i],$M[$j-1])==$min)
114             {
115                 $num = $j;
116                 break;
117             }
118         }
119         for($j=1;$j<=count($M);$j++)
120         {
121             
122             if($j!=$num)
123             {
124                 $W[$j][$i] = 0;
125             }else{
126                  $W[$j][$i] = 1;
127             }
128             
129         }
130     }
131 /*
132     for($i=1;$i<=count($M);$i++)
133     {
134         $flags = true;
135         for($j=2;$j<count($train);$j++)
136         {
137             $flags = true;
138             $min = dis($train[$j],$M[$i-1]);
139             for($k=1;$k<=count($M);$k++)
140             {
141                 if((dis($train[$j],$M[$k-1])<=$min)&&($k!=$i))
142                 {
143                     $flags = false;
144                     break;
145                 }
146             }
147             if($flags)
148             {
149                 $W[$i][$j] = 1;
150             }else $W[$i][$j] = 0;
151         }
152     }
153 */    
154     return $W;
155 }
156 //--------------------------------------------------------------------
157 
158 
159 /*
160 *获得 F_W_M
161 */
162 //--------------------------------------------------------------------
163 function F_W_M($train,$M,$W)
164 {
165     $fwm = 0;
166     for($i=1;$i<=count($M);$i++)
167     {
168         for($j=1;$j<count($train);$j++)
169         {
170             $fwm += dis($train[$j],$M[$i-1])*$W[$i][$j];
171         }
172     }
173 /*    echo "<pre>";
174     //print_r($W);
175     echo "<pre>";
176     print_r($fwm);
177 */    
178     return $fwm;
179 }
180 //--------------------------------------------------------------------
181 
182 
183 /*
184 *获得新的矩阵M单行元素
185 */
186 //--------------------------------------------------------------------
187 function New_SingleM($array)
188 {
189     $new_m = array();
190     array_splice($new_m,0,1);
191     for($i=1;$i<count($array[0])-1;$i++)
192     {
193         $temp = array();
194         array_splice($temp,0,1);
195         for($j=0;$j<count($array);$j++)
196         {
197             $flags = true;
198             for($k=0;$k<count($temp);$k++)
199             {
200                 if($temp[$k][0]==$array[$j][$i])
201                 {
202                     $flags = false;
203                     $temp[$k][1]++;
204                 }
205             }
206             if($flags)
207             {
208                 array_push($temp,array($array[$j][$i],1));
209             }
210         }
211         $max[0]=$temp[0][0];
212         $max[1]=$temp[0][1];
213         for($j=1;$j<count($temp);$j++)
214         {
215             if($temp[$j][1]>$max[1])
216             {
217                 $max[0]=$temp[$j][0];
218                 $max[1]=$temp[$j][1];
219             }
220         }
221         array_push($new_m,$max[0]);
222 /*    
223         echo "<pre>";
224         print_r($temp);
225         print_r($max[0]);
226 */
227     }
228 /*
229     echo "<pre>";
230     print_r($new_m);
231 */
232     return $new_m;
233 }
234 //--------------------------------------------------------------------
235 
236 
237 /*
238 *获得新的矩阵M
239 */
240 //--------------------------------------------------------------------
241 function New_M($train,$W)
242 {
243     $new_train = array(NULL);
244     array_splice($new_train,0,1);
245     for($i=1;$i<count($W);$i++)
246     {
247         $array = array(NULL);
248         array_splice($array,0,1);
249         for($j=1;$j<=count($W[1]);$j++)
250         {
251             if($W[$i][$j]==1)
252             {
253                 array_push($array,$train[$j]);
254             }
255         }
256         array_push($new_train,$array);
257     }
258     $new_M = array();
259     array_splice($new_M,0,1);
260     for($i=0;$i<count($new_train);$i++)
261     {
262         array_push($new_M,New_SingleM($new_train[$i]));
263     }
264 /*    echo "<pre>";
265     print_r($new_train);
266     
267     echo "<pre>";
268     print_r($new_M);
269 */
270     return $new_M;
271 }
272 //--------------------------------------------------------------------
273 
274 
275 /*
276 *Kmodes算法
277 *$m,&$w,返回矩阵M,W
278 */
279 //--------------------------------------------------------------------
280 function  Kmodes($train,&$m,&$w)
281 {
282     $M = first_M($train);
283     $FWM = 1;
284     $FWM2 =0;
285     while(abs($FWM2 - $FWM)>0)
286     {
287         $W = W($train,$M);
288         $FWM = F_W_M($train,$M,$W);
289         $M = New_M($train,$W);
290         $FWM2 = F_W_M($train,$M,$W);
291 
292         if(abs($FWM2 - $FWM )>0)
293         {
294             $FWM = $FWM2;
295             $W = W($train,$M2);
296             $FWM2 = F_W_M($train,$M,$W);
297         }
298     }
299     $m = $M;
300     $w = $W;
301 }
302 //--------------------------------------------------------------------
303 
304 
305 /*
306 *把.txt中的内容读到数组中保存
307 *$filename:文件名称
308 */
309 //--------------------------------------------------------------------
310 function  getFileContent($filename)
311 {
312     $array = array(null);
313     $content = file_get_contents($filename);
314     $result = explode("
",$content);
315     //print_r(count($result));
316     for($j=0;$j<count($result);$j++)
317     {
318         //print_r($result[$j]."<br>");
319         $con = explode(" ",$result[$j]);
320         array_push($array,$con);
321     }
322     array_splice($array,0,1);
323     return $array;
324 }
325 //--------------------------------------------------------------------
326 
327 
328 /*
329 *把数组中内容写到.txt中保存
330 *$result:要存储的数组内容
331 *$filename:文件名称
332 */
333 //--------------------------------------------------------------------
334 function Array_Totxt($result,$filename)
335 {
336     $fp= fopen($filename,'wb');
337     for($i=0;$i<count($result);$i++)
338     {
339         $temp = NULL;
340         for($j=0;$j<=count($result[$i]);$j++)
341         {
342             $temp =  $result[$i][$j]."	";
343             fwrite($fp,$temp);
344         }
345         fwrite($fp,"
");
346     }
347     fclose($fp);
348 }
349 //--------------------------------------------------------------------
350     $train = getFileContent("train.txt");
351     Kmodes($train,$M,$W);
352     Array_Totxt($M,"M.txt");
353     Array_Totxt($W,"w.txt");
354         
355 ?>

 M矩阵:

W矩阵:

原文地址:https://www.cnblogs.com/minmsy/p/4983902.html