【deep learning学习笔记】注释yusugomori的DA代码 --- dA.cpp -- 训练

说实话，具体的训练公式，我没有自己推导，姑且认为他写的代码是对的。总体上看，用bp的方法。特殊之处，在于输入层和输出层是完完全全的“同一层”。
void dA::get_corrupted_input (
					int *x,			// the original input 0-1 vector			-- input
					int *tilde_x,	// the resulted 0-1 vector gotten noised	-- output
					double p		// the p probability of noise, binomial test -- input
					) 
{
	for(int i=0; i<n_visible; i++) 
	{
		if(x[i] == 0) 
		{
			// if the state is 0, do noghing 
			tilde_x[i] = 0;
		} 
		else 
		{
			// if the state is 1, add the noise of p probability on it
			tilde_x[i] = binomial(1, p);
		}
	}
}

// Encode
void dA::get_hidden_values (
					int *x,		// the input from visible nodes
					double *y	// the output of hidden nodes
					) 
{
	for(int i=0; i<n_hidden; i++) 
	{
		// calculated sum_j(vj * wij) + bi
		y[i] = 0;
		for(int j=0; j<n_visible; j++) 
		{
			y[i] += W[i][j] * x[j];
		}
		y[i] += hbias[i];
		// sigmod (y)
		y[i] = sigmoid(y[i]);
	}
}

// Decode
void dA::get_reconstructed_input (
					double *y,	// the input from hidden nodes
					double *z	// the output reconstructed of visible nodes
					) 
{
	for(int i=0; i<n_visible; i++) 
	{
		// calculated sum_j(hj * wij) + ci
		z[i] = 0;
		for(int j=0; j<n_hidden; j++) 
		{
			z[i] += W[j][i] * y[j];
		}
		z[i] += vbias[i];
		// sigmod (z)
		z[i] = sigmoid(z[i]);
	}
}

void dA::train (
				int *x,						// the input sample from visiable node
				double lr,					// the learning rate
				double corruption_level		// corruption_level is the probability of noise
				) 
{
	// the auto-encoder networks:
	// input(visible) layer --> hidden layer --> output(visible) layer
	// the input layer is the same as the output layer, the two layers are totally same.
	// we train it by the standard bp algorithm, from output layer to the hidden layer, and to the input layer
	// Here is the whole process: 


	int *tilde_x = new int[n_visible];		// the noise input
	double *y = new double[n_hidden];		// the output of hidden layer
	double *z = new double[n_visible];		// the output of output layer, reconstruction

	double *L_vbias = new double[n_visible];	// temp value for visible bias
	double *L_hbias = new double[n_hidden];		// temp value for hidden bias

	double p = 1 - corruption_level;

	// make the input sample noise by the p probability
	get_corrupted_input(x, tilde_x, p);
	// calculate the output of hidden nodes by the noise input, encode
	get_hidden_values(tilde_x, y);
	// reconstruct the input sample from visible nodes, decode
	get_reconstructed_input(y, z);
  
	// update the bias of visible nodes
	for(int i=0; i<n_visible; i++) 
	{
		// the difference between input sample and the PROBABILITY of reconstructed probability of visible node
		// it's different from RBM that in RBM we calcualte the difference between input sample and 
		// the 0-1 state of the reconstructed visiable node
		// here use the standard bp algorithm, from visible layer to hidden layer
		L_vbias[i] = x[i] - z[i];
		// update the value by the learning rate
		vbias[i] += lr * L_vbias[i] / N;
	}

	// update the bias of hidden nodes
	for(int i=0; i<n_hidden; i++) 
	{
		// propgate the bias from visible nodes
		// here use the standard bp algorithm, from visible layer to hidden layer
		L_hbias[i] = 0;
		for(int j=0; j<n_visible; j++) 
		{
			L_hbias[i] += W[i][j] * L_vbias[j];
		}
		L_hbias[i] *= y[i] * (1 - y[i]);
		hbias[i] += lr * L_hbias[i] / N;
	}
  
	// update the weight of networks
	for(int i=0; i<n_hidden; i++) 
	{
		for(int j=0; j<n_visible; j++) 
		{
			W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
		}
	}

	delete[] L_hbias;
	delete[] L_vbias;
	delete[] z;
	delete[] y;
	delete[] tilde_x;
}

void dA::reconstruct (
				int *x,			// the input sample		-- input
				double *z		// the reconstructed value -- output
				) 
{
	double *y = new double[n_hidden];

	// calculate the output of hidden layer
	get_hidden_values(x, y);
	// reconstruct from hidden layer to visible layer
	get_reconstructed_input(y, z);

	delete[] y;
}