Softmax回归（使用theano）

  1 # coding:utf8
  2 import numpy as np
  3 import cPickle
  4 import theano
  5 import os
  6 import theano.tensor as T
  7 
  8 class SoftMax:
  9     def __init__(self,MAXT=50,step=0.15,landa=0):
 10         self.MAXT = MAXT
 11         self.step = step
 12         self.landa = landa  #在此权重衰减项未能提升正确率
 13         
 14     def load_theta(self,datapath):
 15         self.theta = cPickle.load(open(datapath,'rb'))
 16 
 17     def process_train(self,data,label,typenum,batch_size=500):
 18         valuenum=data.shape[1]
 19         batches =  data.shape[0] / batch_size
 20         data = theano.shared(np.asarray(data,dtype=theano.config.floatX))
 21         label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32')
 22         x = T.matrix('x')
 23         y = T.ivector('y')
 24         index = T.lscalar()
 25         theta =  theano.shared(value=0.001*np.zeros((valuenum,typenum),
 26             dtype=theano.config.floatX),
 27             name='theta',borrow=True)
 28         hx=T.nnet.softmax(T.dot(x,theta))
 29         cost =  -T.mean(T.log(hx)[T.arange(y.shape[0]), y]) +0.5*self.landa*T.sum(theta ** 2)  #权重衰减项
 30         g_theta = T.grad(cost, theta)
 31         updates = [(theta, theta - self.step * g_theta)]
 32         train_model = theano.function(
 33         inputs=[index],outputs=cost,updates=updates,givens={
 34             x: data[index * batch_size: (index + 1) * batch_size],
 35             y: label[index * batch_size: (index + 1) * batch_size]
 36         },allow_input_downcast=True
 37         )
 38         lastcostJ = np.inf
 39         stop = False
 40         epoch = 0
 41         costj=[]
 42         while (epoch < self.MAXT) and (not stop):
 43             epoch = epoch + 1
 44             for minibatch_index in xrange(batches):
 45                 costj.append(train_model(minibatch_index))
 46             if np.mean(costj)>=lastcostJ:
 47                 print "costJ is increasing !!!"
 48                 stop=True
 49             else:
 50                 lastcostJ=np.mean(costj)
 51                 print(( 'epoch %i, minibatch %i/%i,averange cost is %f') %
 52                         (epoch,minibatch_index + 1,batches,lastcostJ))
 53         self.theta=theta
 54         if not os.path.exists('data/softmax.pkl'):
 55             f= open("data/softmax.pkl",'wb')
 56             cPickle.dump(self.theta.get_value(),f)
 57             f.close()
 58         return self.theta.get_value()
 59 
 60     def process_test(self,data,label,batch_size=500):
 61         batches = label.shape[0] / batch_size
 62         data = theano.shared(np.asarray(data,dtype=theano.config.floatX))
 63         label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32')
 64         x = T.matrix('x')
 65         y = T.ivector('y')
 66         index = T.lscalar()
 67         hx=T.nnet.softmax(T.dot(x,self.theta))
 68         predict = T.argmax(hx, axis=1)
 69         errors=T.mean(T.neq(predict, y))
 70         test_model = theano.function(
 71         inputs=[index],outputs=errors,givens={
 72             x: data[index * batch_size: (index + 1) * batch_size],
 73             y: label[index * batch_size: (index + 1) * batch_size]
 74         },allow_input_downcast=True
 75         )
 76         test_losses=[]
 77         for minibatch_index in xrange(batches):
 78             test_losses.append(test_model(minibatch_index))
 79         test_score = np.mean(test_losses)
 80         print(( 'minibatch %i/%i, test error of model %f %%') %
 81               (minibatch_index + 1,batches,test_score * 100.))
 82 
 83     def h(self,x):
 84         m = np.exp(np.dot(x,self.theta))
 85         sump = np.sum(m,axis=1)
 86         return m/sump
 87 
 88     def predict(self,x):
 89         return np.argmax(self.h(x),axis=1)
 90 
 91 if __name__ == '__main__':
 92     f = open('mnist.pkl', 'rb')
 93     training_data, validation_data, test_data = cPickle.load(f)
 94     training_inputs = [np.reshape(x, 784) for x in training_data[0]]
 95     data = np.array(training_inputs)
 96     training_inputs = [np.reshape(x, 784) for x in validation_data[0]]
 97     vdata = np.array(training_inputs)
 98     f.close()
 99     softmax = SoftMax()
100     softmax.process_train(data,training_data[1],10)
101     softmax.process_test(vdata,validation_data[1])
102     #minibatch 20/20, test error of model 7.530000 %