TensorFlow实战-VGGNet

  1 from ... import input_data
  2 input_data=data_read()
  3 import tensorflow as tf
  4 
  5 def conv(name,x,w,b):
  6     return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME'),b),name=name)
  7 
  8 def max_pool(name,x,k):
  9     return tf.nn.max_pool(x,ksize=[1,k,k,1],strides=[1,k,k,1],padding='SAME',name=name)
 10 
 11 def fc(name,x,w,b):
 12     return tf.nn.relu(tf.matmul(x,w)+b,name=name)
 13 
 14 def vgg_net(_X,_weights,_biases,keep_prob):
 15         x_shape=_X.get_shape()
 16     _X=tf.reshape(_X,shape=[-1,X_shape[1].value,x_shape[2].value,x_shape[3].value])
 17 
 18     conv1_1=conv('conv1_1',_X,_weights['wc1_1'],_biases['bc1_1'])
 19     conv1_2=conv('conv1_2',conv1_1,_weights['wc1_2'],_biases['bc1_2'])
 20     pool1=max_pool('pool1',conv1_2,k=2)
 21 
 22     conv2_1=conv('conv2_1',pool1,_weights['wc2_1'],_biases['bc2_1'])
 23     conv2_2=conv('conv2_2',conv2_1,_weights['wc2_2'],_biases['bc2_2'])
 24     pool2=max_pool('pool2',conv2_2,k=2)
 25 
 26     conv3_1=conv('conv3_1',pool2,_weights['wc3_1'],_biases['bc3_1'])
 27     conv3_2=conv('conv3_2',conv3_1,_weights['wc3_2'],_biases['bc3_2'])
 28     conv3_3=conv('conv3_3',conv3_2,_weights['wc3_3'],_biases['bc3_3'])
 29     pool3=max_pool('pool3',conv3_3,k=2)
 30 
 31     conv4_1=conv('conv4_1',pool3,_weights['wc4_1'],_biases['bc4_1'])
 32     conv4_2=conv('conv4_2',conv4_1,_weights['wc4_2'],_biases['bc4_2'])
 33     conv4_3=conv('conv4_3',conv4_2,_weights['wc4_3'],_biases['bc4_3'])
 34     pool4=max_pool('pool4',conv4_3,k=2)
 35 
 36     conv5_1=conv('conv5_1',pool4,_weights['wc5_1'],_biases['bc5_1'])
 37     conv5_2=conv('conv5_2',conv5_1,_weights['wc5_2'],_biases['bc5_2'])
 38     conv5_3=conv('conv5_3',conv5_2,_weights['wc5_3'],_biases['bc5_3'])    
 39     pool5=max_pool('pool5',conv5_3,k=2)
 40 
 41     _shape=pool5.get_shape()
 42     flatten=_shape[1].value*_shape[2].value*_shape[3].value
 43     pool5=tf.reshape(pool5,shape=[-1,flatten])    
 44     fc1=fc('fc1',pool5,_weights['fc1'],_biases['fb1'])
 45     fc1=tf.nn.dropout(fc1,keep_prob)
 46 
 47     fc2=fc('fc2',fc1,_weights['fc2'],_biases['fb2'])
 48     fc2=tf.nn.dropout(fc2,keep_prob)
 49 
 50     fc3=fc('fc3',fc2,_weights['fc3'],_biases['fb3'])
 51     fc3=tf.nn.dropout(fc3,keep_prob)
 52 
 53     out=tf.argmax(tf.nn.softmax(fc3),1)
 54     
 55     return out
 56 
 57 learning_rate=0.001
 58 max_iters=200000
 59 batch_size=100
 60 display_step=20
 61 
 62 n_input=224*224*3
 63 n_classes=1000
 64 dropout=0.8
 65 
 66 x=tf.placeholder(tf.float32,[None,n_input])
 67 y=tf.placeholder(tf.float32,[None,n_classes])
 68 keep_prob=tf.placeholder(tf.float32)
 69 
 70 weights={
 71     'wc1_1':tf.Variable(tf.random_normal([3,3,3,64])),
 72     'wc1_2':tf.Variable(tf.random_normal([3,3,64,64])),
 73     'wc2_1':tf.Variable(tf.random_normal([3,3,64,128])),
 74     'wc2_2':tf.Variable(tf.random_normal([3,3,128,128])),
 75     'wc3_1':tf.Variable(tf.random_normal([3,3,128,256])),
 76     'wc3_2':tf.Variable(tf.random_normal([3,3,256,256])),
 77     'wc3_3':tf.Variable(tf.random_normal([3,3,256,256])),
 78     'wc4_1':tf.Variable(tf.random_normal([3,3,256,512])),
 79     'wc4_2':tf.Variable(tf.random_normal([3,3,512,512])),
 80     'wc4_3':tf.Variable(tf.random_normal([3,3,512,512])),
 81     'wc5_1':tf.Variable(tf.random_normal([3,3,512,512])),
 82     'wc5_2':tf.Variable(tf.random_normal([3,3,512,512])),
 83     'wc5_3':tf.Variable(tf.random_normal([3,3,512,512])),
 84     'fc1':tf.Variable(tf.random_normal([7*7*512,4096])),
 85     'fc2':tf.Variable(tf.random_normal([4096,4096])),
 86     'fc3':tf.Variable(tf.random_normal([4096,n_classes]))
 87 }
 88 
 89 biases={
 90     'bc1_1':tf.Variable(tf.random_normal([64])),
 91     'bc1_2':tf.Variable(tf.random_normal([64])),
 92     'bc2_1':tf.Variable(tf.random_normal([128])),
 93     'bc2_2':tf.Variable(tf.random_normal([128])),
 94     'bc3_1':tf.Variable(tf.random_normal([256])),
 95     'bc3_2':tf.Variable(tf.random_normal([256])),
 96     'bc3_3':tf.Variable(tf.random_normal([256])),
 97     'bc4_1':tf.Variable(tf.random_normal([512])),
 98     'bc4_2':tf.Variable(tf.random_normal([512])),
 99     'bc4_3':tf.Variable(tf.random_normal([512])),
100     'bc5_1':tf.Variable(tf.random_normal([512])),
101     'bc5_2':tf.Variable(tf.random_normal([512])),
102     'bc5_3':tf.Variable(tf.random_normal([512]))
103 }
104 
105 pred=vgg_net(x,weights,biases,keep_prob)
106 
107 cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
108 optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
109 
110 correct=tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
111 accuracy=tf.reduce_mean(tf.cast(correct,float32))
112 
113 init=tf.initialize_all_variables()
114 
115 with tf.Session() as sess:
116     sess.run(init)
117     step=1
118 
119     while step*batch_size<max_iters:
120         batch_xs,batch_ys=mnist.train.next_batch(batch_size)
121         sess.run(optimizer,feed_dict{x:batch_xs,y:batch_ys,keep_prob:dropout})
122 
123     step+=1

VGGNet：

(1) 牛津大学计算机视觉组(Visual Geometry Group)和GoogleDeepMind公司的研究员一起研发的

(2)探索了卷积神经网络的深度与其性能之间的关系，反复读碟3*3的小型卷积核和2*2的最大池化层，16-19层深的卷积神经网络

(3)取得了ILSVRC2014比赛分类项目的第2 名，定位项目的第1名。

(4)VGG的网络结构：

5段卷积层+3段全连接层
两个3*3的卷积层串联相当于1个5*5的卷积层，即一个像素会跟周围5*5的像素产生关联，感受野大小为5*5
三个3*3的卷积层串联的效果相当于1个7*7的卷积层。
3个串联的3*3的卷积层，比1个7*7的卷积层参数量少，只有后者的(3*3*3)/(7*7)=55%
3个3*3的卷积层比1个7*7的卷积层有更多的非线性变换，前者可以使用三次ReLU激活函数，后者只有一次

(5)VGG训练技巧：

先训练级别A的简单网络，再复用A网络的权重来初始化后面的几个复杂模型，训练收敛的速度更快。
训练时采用multi-scale方法做数据增强，将原始图像缩放到不同尺寸S，然后再随机裁切224*224的图片，这样能增加很多数据量，防止过拟合。
预测时，VGG采用Multi-scale的方法，输入图像为多尺度Q，且对于每个Q在最后一个卷积层使用滑窗的方式进行分类预测，将不同窗口的分类结果平均，再将不同尺寸Q的结果平均得到最后结果。

(6)VGG结论：

LRN层作用不大。
越深的网络效果越好
1*1的卷积也是很有效的，但是没有3*3的卷积好，大一些的卷积核可以学习更大的空间特征。

参考资料：

《TensorFlow实战》黄文坚唐源著