数据挖掘之神经网络SOM算法

/*
神经网络SOM算法思想:分为输入层和竞争层,输入层就是样本的输入,假如我现在有5个样本A,B,C,D,E,他们是5维向量,竞争层是10*10的二维平面,相当于100个神经元,这些神经元也是5维向量,这些神经元对输入向量进行竞争,最后只有一个神经元获胜,越是与输入向量相似,竞争力越强,假如现在位于(7,8)的神经元最相似,它有权力去修改以它为中心的神经元的值,越靠近它的神经元影响越大,越是接近获胜神经元。假如样本A对应的获胜神经元是(7,8),B对应的获胜神经元是(6,8),因为这两个神经元靠得很近,所以样本A和B很相似。如果C对应的获胜神经元是(1,1),与另外两个距离比较远,所以C与A,B相似度较低。
*/
#include<fstream.h> #include<iomanip.h> #include<stdio.h> #include<cstdlib.h> #include<math.h> using namespace std; #define InputLayerNum 35 #define OutputLayerRow 8 #define OutputLayerColumn 12 #define total_iteration_Num 80 #define error_limit 0.0001 #define efficiency 0.9 int i,j,k,l,m,n; int inputMode[26][7][5]; double weight[OutputLayerRow*OutputLayerColumn][InputLayerNum]; int current_iteration_num=0; double study_efficiency=efficiency; double distance[OutputLayerRow*OutputLayerColumn]; int neighbor_width=OutputLayerColumn; int neighbor_height=OutputLayerRow; int row[OutputLayerRow],column[OutputLayerColumn]; int flag[OutputLayerRow][OutputLayerColumn]; int temp_row,temp_column; int winner_row,winner_column; double min_distance=1000.0; /****************************************************/ //该函数初始化距离变量为0,初始化保存生胜出节点的位置的变量 /****************************************************/ void init_distance(){ for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++) distance[i*OutputLayerColumn+j]=0.0; } /****************************************************/ //该函数用于计算欧氏距离,并找到获胜神经元 /****************************************************/ void eula_distance(){ int ttLow,ttUp,ppLow,ppUp; ttLow=winner_column-neighbor_width/2; ttUp=winner_column+neighbor_width/2; ppLow=winner_row-neighbor_height/2; ppUp=winner_row+neighbor_height/2; if(ttLow<0) ttLow=0; if(ttUp>=OutputLayerColumn) ttUp=OutputLayerColumn-1; if(ppLow<0) ppLow=0; if(ppUp>=OutputLayerRow) ppUp=OutputLayerRow-1; for(i=ppLow;i<=ppUp;i++) for(j=ttLow;j<=ttUp;j++){ if(!(flag[i][i]==100)){ for(m=0;m<7;m++) for(n=0;n<5;n++) distance[i*OutputLayerColumn+j]+=pow((inputMode[l][m][n]- weight[i*OutputLayerColumn+j][m*5+n]),2); if(distance[i*OutputLayerColumn+j]<min_distance){ min_distance=distance[i*OutputLayerColumn+j]; temp_row=i; temp_column=j; } } } if(current_iteration_num>0){ if(min_distance<=error_limit){ row[temp_row]=temp_row; row[temp_column]=temp_column; flag[temp_row][temp_column]=100; } } } /****************************************************/ //调整权值 /****************************************************/ void weight_change(){ int ttLow,ttUp,ppLow,ppUp; winner_row=temp_row; winner_column=temp_column; ttLow=winner_column-neighbor_width/2; ttUp=winner_column+neighbor_width/2; ppLow=winner_row-neighbor_height/2; ppUp=winner_row+neighbor_height/2; if(ttLow<0) ttLow=0; if(ttUp>=OutputLayerColumn) ttUp=OutputLayerColumn-1; if(ppLow<0) ppLow=0; if(ppUp>=OutputLayerRow) ppUp=OutputLayerRow-1; for(i=ppLow;i<=ppUp;i++) for(j=ttLow;j<=ttUp;j++){ if(!(flag[i][j]==100)){ for(m=0;m<7;m++) for(n=0;n<5;n++) weight[i*OutputLayerColumn+j][m*5+n]+= study_efficiency*(inputMode[l][m][n]-weight[i*OutputLayerColumn+j][m*5+n]); } } } /****************************************************/ //调整学习效率以及获胜节点的邻域大小 /****************************************************/ void paraChange(){ study_efficiency=study_efficiency*(1.0-((double)current_iteration_num)/total_iteration_Num); neighbor_width=(int)(neighbor_width*(1.0-((double)current_iteration_num)/total_iteration_Num)); neighbor_height=(int)(neighbor_height*(1.0-((double)current_iteration_num)/total_iteration_Num)); } /****************************************************/ //该函数用于将所有输入模式从文件中读入,并存放在数组inputMode中 //同时进行权值的初始化,采用随机赋值的方法 /****************************************************/ void initialize(){ for(i=0;i<OutputLayerRow;i++) row[i]=100; for(j=0;j<OutputLayerColumn;j++) column[j]=100; for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++) flag[i][j]=0; FILE *pf=fopen("输入数据.txt","a+"); if(pf==NULL){ cout<<"Can not input file! "; exit(0); } for(i=0;i<26;i++) for(j=0;j<7;j++) for(k=0;k<5;k++) fscanf(pf,"%d",&inputMode[i][j][k]); //用于测试是否能够正确读入输入模式 char character[26]; for(i=0;i<26;i++) character[i]=(65+i); ofstream mode("输出数据.txt",ios::out); for(i=0;i<26;i++){ mode<<character[i]<<' '<<endl; for(j=0;j<7;j++){ for(k=0;k<5;k++) mode<<inputMode[i][j][k]<<" "; mode<<" "; } mode<<" "; } //权值随机初始化,采用随机赋值的方法 for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++) for(k=0;k<InputLayerNum;k++) weight[i*OutputLayerColumn+j][k]=(double)(rand()%101)/100.0; //用于测试是否能够正确初始化权值 ofstream quan("初始权值.txt",ios::out); for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++){ quan<<" "<<"Node["<<i+1<<"]["<<j+1<<"]"<<" "; for(k=0;k<InputLayerNum;k++){ if(k%5==0) quan<<" "; quan<<setprecision(6)<<setiosflags(ios::fixed)<<weight[i*OutputLayerColumn+j][k]<<" "; } quan<<" "; } } int main(){ int iteration_numbers[26]; int total_num=0; char character[26]; void test_netWork_1(); void test_netWork_2(); for(l=0;l<26;l++){ iteration_numbers[l]=0; character[l]=(65+l); } initialize(); for(l=0;l<26;l++){ winner_row=OutputLayerRow/2; winner_column=OutputLayerColumn/2; while(current_iteration_num<total_iteration_Num){//迭代次数控制 init_distance(); eula_distance(); weight_change(); if(min_distance<=error_limit) break; ++current_iteration_num; paraChange(); } iteration_numbers[l]=current_iteration_num+1; neighbor_width=OutputLayerColumn; //修改邻域的宽度 neighbor_height=OutputLayerRow; //修改邻域的高度 study_efficiency=efficiency; //学习率重置 current_iteration_num=0; //重置迭代次数 min_distance=1000.0; //重置最小距离 } /***********************************/ //输出部分 /***********************************/ for(l=0;l<26;l++) total_num+=iteration_numbers[l]; ofstream iteration_num("迭代次数.txt",ios::out); for(l=0;l<26;l++){ iteration_num<<character[l]<<"迭代"<<iteration_numbers[l]<<"次! "<<endl; if(l==25) iteration_num<<"整个训练过程共迭代"<<total_num<<"次! "<<endl; } ofstream all_weight("训练后所有权值.txt",ios::out); ofstream winner_weight("训练后胜出权值.txt",ios::out); for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++){ printf(" "); all_weight<<" "<<"Node["<<i+1<<"]["<<j+1<<"]"<<" "; for(k=0;k<InputLayerNum;k++){ if(k%5==0){ printf(" "); all_weight<<" "; } if(weight[i*OutputLayerColumn+j][k]>0.9999999) weight[i*OutputLayerColumn+j][k]=1.0; if(weight[i*OutputLayerColumn+j][k]<0.0000001) weight[i*OutputLayerColumn+j][k]=0.0; printf("%f ",weight[i*OutputLayerColumn+j][k]); all_weight<<setprecision(8)<<setiosflags(ios::fixed)<<weight[i*OutputLayerColumn+j][k]<<" "; } } ofstream winner_node("获胜节点.txt",ios::out); for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++){ if(flag[i][j]==100){ //获胜节点 printf(" "); winner_weight<<" "<<"Node["<<i+1<<"]["<<j+1<<"]"<<" "; for(k=0;k<InputLayerNum;k++){ if(k%5==0){ printf(" "); winner_weight<<" "; } if(weight[i*OutputLayerColumn+j][k]>0.9999999) weight[i*OutputLayerColumn+j][k]=1.0; if(weight[i*OutputLayerColumn+j][k]<0.0000001) weight[i*OutputLayerColumn+j][k]=0.0; printf("%f ",weight[i*OutputLayerColumn+j][k]); winner_weight<<setprecision(8)<<setiosflags(ios::fixed)<<weight[i*OutputLayerColumn+j][k]<<" "; } winner_node<<"Node["<<i+1<<"]["<<j+1<<"]"<<endl; } } printf(" "); test_netWork_1(); test_netWork_2(); return 0; } void test_netWork_1(){ ofstream test1("标准测试.txt",ios::out); char character[26]; for(i=0;i<26;i++) character[i]=(65+i); for(l=0;l<26;l++){ for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++) distance[i*OutputLayerColumn+j]=0.0; min_distance=1000; for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++){ for(m=0;m<7;m++) for(n=0;n<5;n++) distance[i*OutputLayerColumn+j]+=pow(inputMode[l][m][n]-weight[i*OutputLayerColumn+j][m*5+n],2); if(distance[i*OutputLayerColumn+j]<min_distance){ min_distance=distance[i*OutputLayerColumn+j]; temp_row=i; temp_column=j; } } test1<<character[l]<<"'s winner is Node["<<temp_row+1<<"]["<<temp_column+1<<"]"<<endl<<endl; } } /****************************************************/ //利用非标准数据测试训练后的网络 /****************************************************/ void test_netWork_2(){ ofstream test2("非标准测试.txt",ios::out); char character[26]; FILe *pf=fopen("非标准数据测试.txt","a+"); if(pf==NULL){ cout<<"Can not open input file! "; exit(0); } for(i=0;i<26;i++) for(j=0;j<7;j++) for(k=0;k<5;k++) fscanf(pf,"%d",&inputMode[i][j][k]); for(i=0;i<26;i++) character[i]=(65+i); for(l=0;l<26;l++){ for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++) distance[i*OutputLayerColumn+j]=0.0; min_distance=1000; for(i=0;i<OutputLayerRow;i++) for(j=0;j<OutputLayerColumn;j++){ for(m=0;m<7;m++) for(n=0;n<5;n++) distance[i*OutputLayerColumn+j]+=pow(inputMode[l][m][n]-weight[i*OutputLayerColumn+j][m*5+n],2); if(distance[i*OutputLayerColumn+j]<min_distance){ min_distance=distance[i*OutputLayerColumn+j]; temp_row=i; temp_column=j; } } test2<<character[l]<<"'s winner is Node["<<temp_row+1<<"]["<<temp_column+1<<"]"<<endl<<endl; } }
原文地址:https://www.cnblogs.com/wust-ouyangli/p/6571900.html