机器学习：Softmax Classifier (两个隐含层)

程序实现 softmax classifier, 含有两个隐含层的情况。activation function 是 ReLU : f(x)=max(0,x)
f1=w1x+b1
h1=max(0,f1)
f2=w2h1+b2
h2=max(0,f2)
f3=w3h2+b3
y=ef3i∑jef3j

function Out=Softmax_Classifier_2(train_x,  train_y, opts)

% setting learning parameters
step_size=opts.step_size;
reg=opts.reg;
batchsize = opts.batchsize;
numepochs = opts.numepochs;
K=opts.class;
h1=opts.hidden_1;
h2=opts.hidden_2;

D=size(train_x, 2);

W1=0.01*randn(D,h1);
b1=zeros(1,h1);
W2=0.01*randn(h1, h2);
b2=zeros(1,h2);
W3=0.01*randn(h2, K);
b3=zeros(1, K);

loss(1 : numepochs)=0;

num_examples=size(train_x, 1);
numbatches = num_examples / batchsize;

for epoch=1:numepochs

     kk = randperm(num_examples);
     loss(epoch)=0;

     tic;

      sprintf('epoch %d:  
' , epoch)


     for bat=1:numbatches

         batch_x = train_x(kk((bat - 1) * batchsize + 1 : bat * batchsize), :);
         batch_y = train_y(kk((bat - 1) * batchsize + 1 : bat * batchsize), :);

         %% forward
         f1=batch_x*W1+repmat(b1, batchsize, 1);
         hiddenval_1=max(0, f1);
         f2=hiddenval_1*W2+repmat(b2, batchsize, 1);
         hiddenval_2=max(0, f2);
         scores=hiddenval_2*W3+repmat(b3, batchsize, 1);

         %% the loss
         exp_scores=exp(scores);
         dd=repmat(sum(exp_scores, 2), 1, K);
         probs=exp_scores./dd;
         correct_logprobs=-log(sum(probs.*batch_y, 2));
         data_loss=sum(correct_logprobs)/batchsize;
         reg_loss=0.5*reg*sum(sum(W1.*W1))+0.5*reg*sum(sum(W2.*W2))+0.5*reg*sum(sum(W3.*W3));
         loss(epoch) =loss(epoch)+ data_loss + reg_loss;

         %% back propagation
          % output layer
         dscores = probs-batch_y;
         dscores=dscores/batchsize;
         dW3=hiddenval_2'*dscores;
         db3=sum(dscores);

         % hidden layer 2
         dhiddenval_2=dscores*W3';
         mask=max(sign(hiddenval_2), 0);
         df_2=dhiddenval_2.*mask;
         dW2=hiddenval_1'*df_2;
         db2=sum(df_2);

         % hidden layer 1
         dhiddenval_1=df_2*W2';
         mask=max(sign(hiddenval_1), 0);
         df_1=dhiddenval_1.*mask;
         dW1=batch_x'*df_1;
         db1=sum(df_1);

         %% update
         dW3=dW3+reg*W3;
         dW2=dW2+reg*W2;
         dW1=dW1+reg*W1;

         W3=W3-step_size*dW3;
         b3=b3-step_size*db3;

         W2=W2-step_size*dW2;
         b2=b2-step_size*db2;

         W1=W1-step_size*dW1;
         b1=b1-step_size*db1;


     end

     loss(epoch)=loss(epoch)/numbatches;

    sprintf('training loss is  %f:  
', loss(epoch))

    toc;

end

Out.W1=W1;
Out.W2=W2;
Out.W3=W3;

Out.b1=b1;
Out.b2=b2;
Out.b3=b3;

Out.loss=loss;