CNN-手写数字识别

一、用CNN实现手写数字识别
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_digits

import time
print( time.ctime() )

digits = load_digits()
X_data = digits.data.astype(np.float32)
Y_data = digits.target.astype(np.float32).reshape(-1, 1)

def generatebatch(X, Y, n_examples, batch_size):
    for batch_i in range(n_examples // batch_size):
        start = batch_i * batch_size
        end = start + batch_size
        batch_xs = X[start:end]
        batch_ys = Y[start:end]
        yield batch_xs, batch_ys # 生成每一个batch
        
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_data = scaler.fit_transform(X_data)

from sklearn.preprocessing import OneHotEncoder
Y = OneHotEncoder().fit_transform(Y_data).todense()#todense返回矩阵

#转换为图片格式(batch, height, width, channels)
X = X_data.reshape(-1, 8, 8, 1)

batch_size = 8 #使用MBGD算法，batch_size=8

tf.reset_default_graph()#tf.reset_default_graph函数用于清除默认图形堆栈并重置全局默认图形。
#输入层
tf_X = tf.placeholder(tf.float32, [None, 8, 8, 1])
tf_Y = tf.placeholder(tf.float32, [None, 10])

#卷积层，创建权重变量和偏置变量，其中的四维矩阵代表了filters的深度，即10个filter，其中每个filter的深度是1，若是RGB则每个filter深度为3
conv_filter_w1 = tf.Variable( tf.random_normal([3, 3, 1, 10]) )#3*3的filter，每个filter深度是1，共10个filter
#conv_filter_w1 = tf.get_variable( 'weights', [3, 3, 1, 10], initializer=tf.truncated_normal_initializer(stddev=0.1) )

conv_filter_b1 = tf.Variable( tf.random_normal([10]) )#10个filter对应10个bias
#conv_filter_b1 = tf.get_variable( 'biases', [10], initializer=tf.constant_initializer(0.1) )

#tf.nn.conv2d提供了方便的函数来实现卷积层的前向传播算法
#函数的第一个输入为当前层的节点矩阵，注意这个是四维矩阵即tf_X
#其中(tf_X)后面三维对应一个节点矩阵，第一维对应一个输入batch，比如在输入层
#input[0, :, :, :]表示第一张图，input[1, :, :, :]表示第二张图，input[2, :, :, :]表示第三张图，以此类推
#tf.nn.conv2d第二个参数提供了卷积层权重，第三个参数是不同维度上的步长，长度为4的数组，第一维和最后一维数字要求一定是1
#最后一个参数是padding，SAME表示添加全0填充，VALID表示不添加
conv = tf.nn.conv2d( tf_X, conv_filter_w1, strides=[1, 1, 1, 1], padding='SAME')

#tf.nn.bias_add提供了一个方便的函数给每个节点加上偏置项
#注意这里不能直接使用加法，因为矩阵上不同位置上的节点都需要加上同样的偏置项，也就是下面第二种直接加法是错误的
bias = tf.nn.bias_add( conv, conv_filter_b1 )
#bias = conv + conv_filter_b1这是错误的做法

#ReLU激活函数去线性化
relu_feature_maps1 = tf.nn.relu( bias )
print(relu_feature_maps1)
#Tensor("Relu:0", shape=(?, 8, 8, 10), dtype=float32)

# 池化层，tf.nn.max_pool实现了最大池化层的前向传播过程，参数和tf.nn.conv2d函数类似
#ksize提供了filter的尺寸，strides提供了步长，padding提供是否使用全0填充
max_pool1 = tf.nn.max_pool( relu_feature_maps1, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME')

print(max_pool1)
#Tensor("MaxPool:0", shape=(?, 4,4,10), dtype=float32)

# 卷积层2
conv_filter_w2 = tf.Variable( tf.random_normal([3, 3, 10, 5]) )#因为上一层的卷积层深度是10，即有10个输出，这里一个filter深度就是10，而用5个filter
conv_filter_b2 =  tf.Variable( tf.random_normal([5]) )
conv2 = tf.nn.conv2d(relu_feature_maps1, conv_filter_w2, strides=[1, 2, 2, 1], padding='SAME')

conv_out2 = tf.nn.bias_add( conv2, conv_filter_b2 )
print(conv_out2)
#Tensor("BiasAdd_1:0", shape=(?, 4, 4, 5), dtype=float32)

# BN归一化层+激活层
#批标准化（batch normalization，BN）一般用在激活函数之前，使结果各个维度均值为0，方差为1。
#通过规范化让激活函数分布在线性区间，让每一层的输入有一个稳定的分布会有利于网络的训练。
batch_mean, batch_var = tf.nn.moments( conv_out2, axes=[0, 1, 2], keep_dims=True )
#axes：表示在哪个维度上求解，是个list，例如 [0, 1, 2]
#keep_dims：是否保持维度

shift = tf.Variable(tf.zeros([5]))
scale = tf.Variable(tf.ones([5]))
epsilon = 1e-3
#•tf.nn.batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name=None)
#https://blog.csdn.net/lanchunhui/article/details/70792458

BN_out = tf.nn.batch_normalization( conv_out2, batch_mean, batch_var, shift, scale, epsilon )
print(BN_out)
#Tensor("batchnorm/add_1:0", shape=(?, 4, 4, 5), dtype=float32)

relu_BN_maps2 = tf.nn.relu(BN_out)

# 池化层
max_pool2 = tf.nn.max_pool( relu_BN_maps2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME' )
print(max_pool2)
#Tensor("MaxPool_1:0", shape=(?, 2, 2, 5), dtype=float32)

# 将特征图进行展开
max_pool2_flat = tf.reshape( max_pool2, [-1, 2*2*5] )

# 全连接层
fc_w1 = tf.Variable( tf.random_normal([2*2*5,50]) )
fc_b1 =  tf.Variable( tf.random_normal([50]) )
fc_out1 = tf.nn.relu( tf.matmul(max_pool2_flat, fc_w1) + fc_b1 )

# 输出层
out_w1 = tf.Variable( tf.random_normal([50,10]) )
out_b1 = tf.Variable( tf.random_normal([10]) )
pred = tf.nn.softmax( tf.matmul(fc_out1, out_w1) + out_b1 )

loss = -tf.reduce_mean( tf_Y * tf.log( tf.clip_by_value(pred,1e-11,1.0) ) )

train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)

y_pred = tf.arg_max(pred,1)
bool_pred = tf.equal( tf.arg_max(tf_Y,1), y_pred )

accuracy = tf.reduce_mean( tf.cast(bool_pred,tf.float32) ) # 准确率

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(1000): # 迭代1000个周期
        for batch_xs, batch_ys in generatebatch(X, Y, Y.shape[0], batch_size): # 每个周期进行MBGD算法
            sess.run( train_step, feed_dict={tf_X:batch_xs, tf_Y:batch_ys} )
            
        if(epoch%100 == 0):
            res = sess.run(accuracy,feed_dict={ tf_X:X,tf_Y:Y} )
            print (epoch, res)
            
    res_ypred = y_pred.eval(feed_dict={tf_X:X,tf_Y:Y}).flatten() # 只能预测一批样本，不能预测一个样本
    print (res_ypred)
    
from sklearn.metrics import  accuracy_score
print ( accuracy_score( Y_data,res_ypred.reshape(-1,1) ))

print( time.ctime() )