PCA和Softmax分类比较—Mnist与人脸数据集

PCA人脸识别中三种方法得到的正确率可达到100%

作为对比,单独使用Softmax回归对人脸40*10*92*112的数据分类正确率为97%。

用PCA对MNIST手写数字10*500*28*28识别,也可以达到相对比较高的正确率,马氏距离h=32时正确率为0.93 (在softmax中为0.85~0.89)。

 1 # coding:utf8
 2 import numpy as np
 3 import os
 4 import sf
 5 import pca
 6 
 7 if __name__ == '__main__':
 8     img=pca.load_img()
 9     test=img
10     print np.mat(img).shape
11     label=[a+1 for a in range(40) for j in range(10)]
12     index=range(400)
13     np.random.shuffle(index)
14     label_=[label[i] for i in index]
15     test_=np.mat([test[i] for i in index])
16     
17     softmax = sf.SoftMax(MAXT=200, step=0.03, landa=0.01)
18     softmax.process_train(np.mat(img),np.array(label),40)
19     softmax.validate(test_,np.array(label_))
20     # correctnum = 390, sumnum = 400, Accuracy:0.97
  1 #coding:utf8
  2 import cv2
  3 import numpy as np
  4 import matplotlib.pyplot as plt
  5 import cPickle
  6 
  7 TYPE_NUM=10  # 40
  8 SAMPLE_NUM=500  # 10
  9 
 10 def load_img():
 11     img=[]
 12     for i in range(40):
 13         for j in range(10):
 14             path='att_faces\s'+str(i+1)+'\'+str(j+1)+'.pgm'
 15             a=cv2.imread(path,0)
 16             a=a.flatten()/255.0
 17             img.append(a)
 18     return img
 19 
 20 def dis(A,B,dis_type=0,s=None):
 21     if dis_type==1:  # 欧式距离
 22         return np.sum(np.square(A-B))
 23     elif dis_type==2:  # 马式距离
 24         f=np.sqrt(abs(np.dot(np.dot((A-B),s.I),(A-B).T)))  # h增大时会出现负值
 25         return f.tolist()[0][0]
 26     else:  # 曼哈顿距离
 27         return np.sum(abs(A-B))
 28 
 29 def pca(data,h,dis_type=0):
 30     q,r=np.linalg.qr(data.T)
 31     u,s,v=np.linalg.svd(r.T)
 32     fi=np.dot(q,(v[:h]).T)
 33     y=np.dot(fi.T,data.T)
 34     ym=[np.mean(np.reshape(x,(TYPE_NUM,SAMPLE_NUM)),axis=1) for x in y]
 35     ym=np.reshape(ym,(h,TYPE_NUM))
 36     c=[]
 37     if dis_type==2:# 计算马氏距离的额外处理"
 38         yr=[np.reshape(x,(TYPE_NUM,SAMPLE_NUM)) for x in y]
 39         yr=[[np.array(yr)[j][k] for j in  range(h)]for k in range(TYPE_NUM)]
 40         for k in yr:
 41             k=np.reshape(k,(h,SAMPLE_NUM))
 42             e=np.cov(k)
 43             c.append(e)
 44     return fi,ym,c
 45 
 46 def validate(fi,ym,test,label,dis_type=0,c=None):
 47     ty=np.dot(fi.T,test.T)
 48     correctnum=0
 49     testnum=len(test)
 50     for i in range(testnum):
 51         if dis_type==2:
 52             n=len(ym.T)
 53             dd=[dis(ty.T[i],ym.T[n_],dis_type,np.mat(c[n_])) for n_ in range(n)]
 54         else:
 55             dd=[dis(ty.T[i],yy,dis_type) for yy in ym.T]
 56         if np.argsort(dd)[0]==label[i]:  # mnist中从0开始
 57             correctnum+=1
 58     rate = float(correctnum) / testnum
 59     print "Correctnum = %d, Sumnum = %d" % (correctnum, testnum), "Accuracy:%.2f" % (rate)
 60     return rate
 61 
 62 if __name__ == '__main__':
 63     f = open('mnist.pkl', 'rb')
 64     training_data, validation_data, test_data = cPickle.load(f)
 65     training_inputs = [np.reshape(x, 784) for x in training_data[0]]
 66     data = np.array(training_inputs[:10000])
 67     training_inputs = [np.reshape(x, 784) for x in validation_data[0]]
 68     vdata = np.array(training_inputs[:5000])
 69     f.close()
 70     label=training_data[1][:10000]
 71     c=np.argsort(label)
 72     l=[label[x] for x in c]
 73     d=[data[x] for x in c]
 74     data_new=[]
 75     label_new=[]
 76     temp=-1000
 77     for i in  range(10):   # 将数据整理为10类各500个样本依次排列
 78         id= l.index(i)
 79         if id-temp<500:
 80             print "<500"
 81             break
 82         data_new.append(d[id:id+500])
 83         label_new.append(l[id:id+500])  # PCA中不需要,用于在Softmax中验证数据
 84         temp=id
 85     lb=np.array(label_new).flatten()
 86     data_=[]
 87     for j in data_new:
 88         data_+=j
 89     x_=[2**i for i in range(9)]
 90     d_=['Manhattan Distance','Euclidean Metric', 'Mahalanobis Distance']
 91     for j in range(3):
 92         y_=[]
 93         plt.figure()
 94         for i in range(9):
 95             fi,ym,c=pca.pca(np.mat(data_),h=x_[i],dis_type=j)
 96             y_.append(pca.validate(fi,ym,vdata, validation_data[1][:5000],dis_type=j,c=c))
 97         plt.ylim([0,1.0])
 98         plt.plot(x_,y_)
 99         plt.scatter(x_,y_)
100         plt.xlabel('h')
101         plt.ylabel('Accuracy')
102         plt.title(d_[j])
103     plt.show()

原文地址:https://www.cnblogs.com/qw12/p/6097012.html