VGGNet学习—

VGGNet学习——实践

0 - DataSet

http://www.csc.kth.se/~att/Site/Animals.html

1 - Code

1.1 - Import Packages

import tensorflow as tf
import os, glob
import numpy as np
from skimage import io, transform

1.2 - Initialize Parameters

DATA_PATH = "animal_database/"
INPUT_W = 224
INPUT_H = 224
INPUT_C = 3
OUTPUT_C = 19
TRAINING_STEPS = 50
MODEL_SAVE_PATH = "model"
MODEL_NAME = "model.ckpt"
BATCH_SIZE = 64
LEARNING_RATE_BASE = 1e-6
LEARNING_RATE_DECAY = 0.99
MOMENTUM = 0.9
TRAIN_KEEP_PROB = 0.6
VAL_KEEP_PROB = 1.0
TEST_KEEP_PROB = 1.0

1.3 - Build Data Reader

class DCdataset(object):
    def __init__(self, path, w, h, c, ratio=0.8):
        
        def onehot(n):
            l = np.zeros([OUTPUT_C])
            l[n] = 1
            return l

        print("Process images start")
        cate = [path+x for x in os.listdir(path) if os.path.isdir(path+x)]
        x = []
        y = []
        for (i, folder) in enumerate(cate):
            for img_path in glob.glob(folder+"/original/*.jpg"):
                # print("reading the image: %s" % img_path)
                img = io.imread(img_path)
                img = transform.resize(img, (w, h, c))
                x.append(img)
                y.append(i)
        x = np.asarray(x, np.float32)
        y = np.asarray(y, np.int32)
        
        num_example = x.shape[0]
        arr = np.arange(num_example)
        np.random.shuffle(arr)
        x = x[arr]
        y = y[arr]
        x = np.asarray([np.reshape(x_, (w, h, c)) for x_ in x])
        y = np.asarray([onehot(y_) for y_ in y])
        s = np.int(num_example * ratio)
        self.x_train, self.x_val = x[:s], x[s:]
        self.y_train, self.y_val = y[:s], y[s:]
        self.train_size = s
        self.val_size = num_example - s
        print("Process images end")
    
    def next_batch(self, batch_size):
        arr = np.arange(self.train_size)
        np.random.shuffle(arr)
        arr = arr[:batch_size]
        batch_x = self.x_train[arr]
        batch_y = self.y_train[arr]
        return batch_x, batch_y
    
    def next_val_batch(self, batch_size):
        arr = np.arange(self.val_size)
        np.random.shuffle(arr)
        arr = arr[:batch_size]
        batch_x = self.x_val[arr]
        batch_y = self.y_val[arr]
        return batch_x, batch_y

1.4 - Build Network

def conv_op(input_op, name, kh, kw, n_out, dh, dw, p):
    n_in = input_op.get_shape()[-1].value
    
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope+"w",
            shape=[kh, kw, n_in, n_out], dtype=tf.float32,
            # initializer=tf.truncated_normal_initializer(mean=0, stddev=10e-2))
            initializer=tf.contrib.layers.xavier_initializer_conv2d())
        conv = tf.nn.conv2d(input_op, kernel, (1, dh, dw, 1), padding="SAME")
        bias_init_val = tf.constant(0.0, shape=[n_out], dtype=tf.float32)
        biases = tf.Variable(bias_init_val, trainable=True, name="b")
        z = tf.nn.bias_add(conv, biases)
        activation = tf.nn.relu(z, name=scope)
        p += [kernel, biases]
        return activation

def fc_op(input_op, name, n_out, p):
    n_in = input_op.get_shape()[-1].value
    
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope+"w",
            shape=[n_in, n_out], dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(0.1, shape=[n_out],
            dtype=tf.float32), name="b")
        activation = tf.nn.relu_layer(input_op, kernel, biases, name=scope)
        p += [kernel, biases]
        return activation

def mpool_op(input_op, name, kh, kw, dh, dw):
    return tf.nn.max_pool(input_op,
            ksize=[1, kh, kw, 1],
            strides=[1, dh, dw, 1],
            padding="SAME",
            name=name)

def inference_op(input_op, keep_prob):
    p = []
    
    conv1_1 = conv_op(input_op, name="conv1_1", kh=3, kw=3, n_out=64, dh=1, dw=1, p=p)
    conv1_2 = conv_op(conv1_1, name="conv1_2", kh=3, kw=3, n_out=64, dh=1, dw=1, p=p)
    pool1 = mpool_op(conv1_2, name="pool1", kh=2, kw=2, dh=2, dw=2)
    
    conv2_1 = conv_op(pool1, name="conv2_1", kh=3, kw=3, n_out=128, dh=1, dw=1, p=p)
    conv2_2 = conv_op(conv2_1, name="conv2_2", kh=3, kw=3, n_out=128, dh=1, dw=1, p=p)
    pool2 = mpool_op(conv2_2, name="pool2", kh=2, kw=2, dh=2, dw=2)
    
    conv3_1 = conv_op(pool2, name="conv3_1", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_2 = conv_op(conv3_1, name="conv3_2", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_3 = conv_op(conv3_2, name="conv3_3", kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    pool3 = mpool_op(conv3_3, name="pool3", kh=2, kw=2, dh=2, dw=2)
    
    conv4_1 = conv_op(pool3, name="conv4_1", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_2 = conv_op(conv4_1, name="conv4_2", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_3 = conv_op(conv4_2, name="conv4_3", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool4 = mpool_op(conv4_3, name="pool4", kh=2, kw=2, dh=2, dw=2)

    conv5_1 = conv_op(pool4, name="conv5_1", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_2 = conv_op(conv5_1, name="conv5_2", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_3 = conv_op(conv5_2, name="conv5_3", kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    pool5 = mpool_op(conv5_3, name="pool5", kh=2, kw=2, dh=2, dw=2)
    
    shp = pool5.get_shape()
    flattened_shape = shp[1].value * shp[2].value * shp[3].value
    resh1 = tf.reshape(pool5, [-1, flattened_shape], name="resh1")
    
    fc6 = fc_op(resh1, name="fc6", n_out=4096, p=p)
    fc6_drop = tf.nn.dropout(fc6, keep_prob, name="fc6_drop")
    
    fc7 = fc_op(fc6_drop, name="fc7", n_out=4096, p=p)
    fc7_drop = tf.nn.dropout(fc7, keep_prob, name="fc7_drop")
    
    fc8 = fc_op(fc7_drop, name="fc8", n_out=OUTPUT_C, p=p)
    # softmax = tf.nn.softmax(fc8)
    # predictions = tf.argmax(softmax, 1)
    
    return fc8, p

1.5 - Train

def train():
    x = tf.placeholder(tf.float32, [None, INPUT_W, INPUT_H, INPUT_C], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_C], name="y-input")
    keep_prob = tf.placeholder(tf.float32, name="keep_prob")
    
    dataset = DCdataset(DATA_PATH, INPUT_W, INPUT_H, INPUT_C)
    
    global_step = tf.Variable(0, trainable=False)
    
    y, p = inference_op(x, keep_prob)
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf.nn.softmax(y), 1), tf.argmax(y_, 1)), tf.float32))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        dataset.train_size / BATCH_SIZE,
        LEARNING_RATE_DECAY
    )
    optimizer = tf.train.MomentumOptimizer(learning_rate, MOMENTUM).minimize(loss, global_step=global_step)
    
    # tf.reset_default_graph()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        
        saver = tf.train.Saver()
        for i in range(TRAINING_STEPS):
            xs, ys = dataset.next_batch(BATCH_SIZE)
            _, loss_value, accuracy_value, step = sess.run([optimizer, loss, accuracy, global_step], 
                feed_dict={x: xs, y_: ys, keep_prob: TRAIN_KEEP_PROB})
            print("After %d training step(s), loss on training batch is %g, accuracy on training batch is %g%%." % (step, loss_value, accuracy_value*100))

            if i % 2 == 0:
                xs, ys = dataset.next_val_batch(BATCH_SIZE)
                _, loss_value, accuracy_value, step = sess.run([optimizer, loss, accuracy, global_step],
                    feed_dict={x: xs, y_: ys, keep_prob: VAL_KEEP_PROB})
                print("[Validation] Step %d: Validation loss is %g and Validation accuracy is %g%%." % (step, loss_value, accuracy_value*100))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

train()

1.6 - Test

def test(img_path, model_path):
    
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(model_path+".meta")
        saver.restore(sess, model_path)
        graph = tf.get_default_graph()
        
        x = graph.get_tensor_by_name("x-input:0")
        keep_prob = graph.get_tensor_by_name("keep_prob:0")
        fc8 = graph.get_tensor_by_name("fc8:0")
        
        img = io.imread(img_path)
        img = transform.resize(img, (INPUT_W, INPUT_H, INPUT_C))
        y = sess.run(fc8, feed_dict={
            x: np.reshape(img, [-1, INPUT_W, INPUT_H, INPUT_C]),
            keep_prob: TEST_KEEP_PEOB
        })
        softmax = tf.nn.softmax(y)
        prediction_labels = tf.argmax(softmax, 1)
        print("label: ", sess.run(softmax))

img_path = os.path.join(DATA_PATH, "cougar", "original", "4400.jpg")
model_path = os.path.join(MODEL_SAVE_PATH, MODEL_NAME+"-2")
test(img_path, model_path)