Stanford CS20学习笔记

Lecture Note 2

Tensorboard　　P3

Data Structures　　P4

Math Operations　　P6

Data Types　　P7

　　tf native && python native

tensorflow && numpy　　P9

Variables　　P10-14

　　var要先initiate/assign

placeholder　　P15-16

Lecture Note 3

An example of logitic regression　　P3

- How to define a los function?　　P4-6

- tf.data 导入数据用　　P6-9

- Optimizer　　P9-13

- eg: logistic on MNIST　　P14

Lecture Note 4

Eager：方便在Python中使用TensorFlow

eg: ppt P19-P23　　不用再tf.session.run

自动求导　　P25-28

与传统tf命令的区别　　P32

usage　　P37

Assignment 1

1. Commonly used tensorflow operations

  1 """
  2 Simple exercises to get used to TensorFlow API
  3 You should thoroughly test your code.
  4 TensorFlow's official documentation should be your best friend here
  5 CS20: "TensorFlow for Deep Learning Research"
  6 cs20.stanford.edu
  7 Created by Chip Huyen (chiphuyen@cs.stanford.edu)
  8 """
  9 import os
 10 os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 11 
 12 import tensorflow as tf
 13 
 14 sess = tf.InteractiveSession()
 15 ###############################################################################
 16 # 1a: Create two random 0-d tensors x and y of any distribution.
 17 # Create a TensorFlow object that returns x + y if x > y, and x - y otherwise.
 18 # Hint: look up tf.cond()
 19 # I do the first problem for you
 20 ###############################################################################
 21 
 22 x = tf.random_uniform([])  # Empty array as shape creates a scalar.
 23 y = tf.random_uniform([])
 24 out = tf.cond(tf.greater(x, y), lambda: x + y, lambda: x - y)
 25 print(sess.run(out))
 26 
 27 ###############################################################################
 28 # 1b: Create two 0-d tensors x and y randomly selected from the range [-1, 1).
 29 # Return x + y if x < y, x - y if x > y, 0 otherwise.
 30 # Hint: Look up tf.case().
 31 ###############################################################################
 32 
 33 # YOUR CODE
 34 x = tf.random_uniform([])
 35 y = tf.random_uniform([])
 36 xdy = lambda x,y: x-y
 37 xpy = lambda x,y: x+y
 38 res = tf.case({tf.less(x,y): lambda: xpy(x,y), tf.greater(x,y): lambda: xdy(x,y)}, default=lambda: 0.00, exclusive=True)
 39 print(sess.run(res))
 40 
 41 ###############################################################################
 42 # 1c: Create the tensor x of the value [[0, -2, -1], [0, 1, 2]] 
 43 # and y as a tensor of zeros with the same shape as x.
 44 # Return a boolean tensor that yields Trues if x equals y element-wise.
 45 # Hint: Look up tf.equal().
 46 ###############################################################################
 47 
 48 # YOUR CODE
 49 x = tf.constant([[0, -2, -1], [0, 1, 2]])
 50 y = tf.zeros_like(x)
 51 res = tf.equal(x,y)
 52 print(sess.run(res))
 53 
 54 ###############################################################################
 55 # 1d: Create the tensor x of value 
 56 # [29.05088806,  27.61298943,  31.19073486,  29.35532951,
 57 #  30.97266006,  26.67541885,  38.08450317,  20.74983215,
 58 #  34.94445419,  34.45999146,  29.06485367,  36.01657104,
 59 #  27.88236427,  20.56035233,  30.20379066,  29.51215172,
 60 #  33.71149445,  28.59134293,  36.05556488,  28.66994858].
 61 # Get the indices of elements in x whose values are greater than 30.
 62 # Hint: Use tf.where().
 63 # Then extract elements whose values are greater than 30.
 64 # Hint: Use tf.gather().
 65 ###############################################################################
 66 
 67 # YOUR CODE
 68 x=tf.constant([[29.05088806,  27.61298943,  31.19073486,  29.35532951], [30.97266006,  26.67541885,  38.08450317,  20.74983215], [34.94445419,  34.45999146,  29.06485367,  36.01657104], [27.88236427,  20.56035233,  30.20379066,  29.51215172], [33.71149445,  28.59134293,  36.05556488,  28.66994858]])
 69 h1=tf.where(tf.greater(x, 30))
 70 print(sess.run(h1))
 71 h2=tf.gather_nd(x,h1)
 72 print(sess.run(h2))
 73 
 74 ###############################################################################
 75 # 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1,
 76 # 2, ..., 6
 77 # Hint: Use tf.range() and tf.diag().
 78 ###############################################################################
 79 
 80 # YOUR CODE
 81 ran=tf.range(1,7,1)
 82 dig=tf.diag(ran)
 83 print(sess.run(dig))
 84 
 85 ###############################################################################
 86 # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution.
 87 # Calculate its determinant.
 88 # Hint: Look at tf.matrix_determinant().
 89 ###############################################################################
 90 
 91 # YOUR CODE
 92 x = tf.random_uniform((10,10))
 93 res=tf.matrix_determinant(x)
 94 print(sess.run(res))
 95 
 96 ###############################################################################
 97 # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].
 98 # Return the unique elements in x
 99 # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.
100 ###############################################################################
101 
102 # YOUR CODE
103 x=tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9])
104 y, idx=tf.unique(x)
105 print(sess.run(y))
106 
107 ###############################################################################
108 # 1h: Create two tensors x and y of shape 300 from any normal distribution,
109 # as long as they are from the same distribution.
110 # Use tf.cond() to return:
111 # - The mean squared error of (x - y) if the average of all elements in (x - y) is negative, or
112 # - The sum of absolute value of all elements in the tensor (x - y) otherwise.
113 # Hint: see the Huber loss function in the lecture slides 3.
114 ###############################################################################
115 
116 # YOUR CODE
117 x = tf.random_normal([300])
118 y = tf.random_normal([300])
119 res=tf.cond(tf.reduce_mean(x-y)<0, lambda: tf.reduce_mean(tf.square(x-y)), lambda: tf.reduce_sum(tf.abs(x-y)))
120 print(sess.run(res))

View Code

2. Logistic regression in tensorflow

  1 """ Starter code for simple logistic regression model for MNIST
  2 with tf.data module
  3 MNIST dataset: yann.lecun.com/exdb/mnist/
  4 Created by Chip Huyen (chiphuyen@cs.stanford.edu)
  5 CS20: "TensorFlow for Deep Learning Research"
  6 cs20.stanford.edu
  7 Lecture 03
  8 """
  9 import os
 10 os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 11 
 12 import numpy as np
 13 import tensorflow as tf
 14 import time
 15 
 16 import utils
 17 
 18 # Define paramaters for the model
 19 learning_rate = 0.01
 20 batch_size = 128
 21 n_epochs = 50
 22 n_train = 60000
 23 n_test = 10000
 24 
 25 # Step 1: Read in data
 26 mnist_folder = 'data/mnist'
 27 utils.download_mnist(mnist_folder)
 28 train, val, test = utils.read_mnist(mnist_folder, flatten=True)
 29 
 30 # Step 2: Create datasets and iterator
 31 # create training Dataset and batch it
 32 train_data = tf.data.Dataset.from_tensor_slices(train)
 33 train_data = train_data.shuffle(30000) # if you want to shuffle your data
 34 train_data = train_data.batch(batch_size)
 35 
 36 # create testing Dataset and batch it
 37 test_data = None
 38 #############################
 39 ########## TO DO ############
 40 test_data = tf.data.Dataset.from_tensor_slices(test)
 41 test_data = test_data.batch(batch_size)
 42 #############################
 43 
 44 
 45 # create one iterator and initialize it with different datasets
 46 iterator = tf.data.Iterator.from_structure(train_data.output_types, 
 47                                            train_data.output_shapes)
 48 img, label = iterator.get_next()
 49 
 50 train_init = iterator.make_initializer(train_data)    # initializer for train_data
 51 test_init = iterator.make_initializer(test_data)    # initializer for train_data
 52 
 53 # Step 3: create weights and bias
 54 # w is initialized to random variables with mean of 0, stddev of 0.01
 55 # b is initialized to 0
 56 # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
 57 # shape of b depends on Y
 58 w, b = None, None
 59 #############################
 60 ########## TO DO ############
 61 w=tf.get_variable("W", shape=(784,10), initializer=tf.random_normal_initializer(mean=0, stddev=1))
 62 b=tf.get_variable("b", shape=(1,10), initializer=tf.zeros_initializer())
 63 #############################
 64 
 65 
 66 # Step 4: build model
 67 # the model that returns the logits.
 68 # this logits will be later passed through softmax layer
 69 logits = None
 70 #############################
 71 ########## TO DO ############
 72 logits=tf.matmul(img,w)+b
 73 #############################
 74 
 75 
 76 # Step 5: define loss function
 77 # use cross entropy of softmax of logits as the loss function
 78 loss = None
 79 #############################
 80 ########## TO DO ############
 81 entropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label)
 82 loss=tf.reduce_mean(entropy)
 83 #############################
 84 
 85 
 86 # Step 6: define optimizer
 87 # using Adamn Optimizer with pre-defined learning rate to minimize loss
 88 optimizer = None
 89 #############################
 90 # REF: https://blog.csdn.net/mao_xiao_feng/article/details/53382790
 91 ########## TO DO ############
 92 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 93 #############################
 94 
 95 
 96 # Step 7: calculate accuracy with test set
 97 preds = tf.nn.softmax(logits)
 98 correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
 99 accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
100 
101 writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
102 with tf.Session() as sess:
103    
104     start_time = time.time()
105     sess.run(tf.global_variables_initializer())
106 
107     # train the model n_epochs times
108     for i in range(n_epochs):     
109         sess.run(train_init)    # drawing samples from train_data
110         total_loss = 0
111         n_batches = 0
112         try:
113             while True:
114                 _, l = sess.run([optimizer, loss])
115                 total_loss += l
116                 n_batches += 1
117         except tf.errors.OutOfRangeError:
118             pass
119         print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
120     print('Total time: {0} seconds'.format(time.time() - start_time))
121 
122     # test the model
123     sess.run(test_init)            # drawing samples from test_data
124     total_correct_preds = 0
125     try:
126         while True:
127             accuracy_batch = sess.run(accuracy)
128             total_correct_preds += accuracy_batch
129     except tf.errors.OutOfRangeError:
130         pass
131 
132     print('Accuracy {0}'.format(total_correct_preds/n_test))
133 writer.close()

View Code

Lecture Note5

word embedding: Representing a word by means of its neighbors

softmax：预测概率　　Note P3

Model介绍

　　Note P4-7, P9-14

Tensorflow programming的一般模式　　Note P7-8

1. Import data (use tf.data or placehholder)
2. Define weights
3. Define model
4. Define loss function
5. Define optimizer
6. Train the model
　　6.1 Initialize all model variables
　　6.2 Initialize iterator/feed in the training data
　　6.3 Execute the model on training data
　　6.4 Compute cost
　　6.5 Adjust parameter to minimize the cost

Name Scope：用于在tensorboard中给ops归类　　Note P15

VarScope：为了在不同计算图(eg: 不同NN中)复用变量值　　Note P17-19

collection　　Note P20

tf.saver　　Note P21-24

tf.summary　　用于在tensorboard中看变量(eg: loss)的变化情况　　Note P25-27

randomization　　Note P27-29

autodiff　　Note P29

Lecture Note 6

CNN

imput image: width * height * depth (图像的长/宽/color [eg: RGB] )　　　eg: Note P2

3 main types of layers:　　Conv Layer / Pooling Layer / Fully-Connected Layer

1. Conv Layer　　　　Note P6 图

　　depth：卷积核（filter）的数量

　　stride：卷积核在input上每次移动的步长

　　zero-padding：在input的边框上补几圈0，以保证卷积前后input/output大小一致

　　summary：Note P9

2. Pooling Layer　　　　Note P13

　　用于降维，常用于conv layer之后。且可以降低overfitting

3. Normalization Layer　　　　Note P14

4. FC(Fully-Connected) Layer　　　　Note P14

5. Convert FC to Conv Layer　　　　Note P14

　　不会qwq

6. Conv Network Architecture　　　　Note P16

Lecture Note 7

1. tf.nn.conv2d　　　　Note P3

2. Def a layer in a function, 便于复用

　　Conv layer　　P4

　　Max pooling　　P6

　　Fully connected　　P7

3. A more simple way: tf.layers　　　　Note P9

Assignment 2

1. CNN for mnist

  1 """ Using convolutional net on MNIST dataset of handwritten digits
  2 MNIST dataset: http://yann.lecun.com/exdb/mnist/
  3 CS 20: "TensorFlow for Deep Learning Research"
  4 cs20.stanford.edu
  5 Chip Huyen (chiphuyen@cs.stanford.edu)
  6 Lecture 07
  7 """
  8 import os
  9 os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 10 import time 
 11 
 12 import tensorflow as tf
 13 
 14 import utils
 15 
 16 def conv_relu(inputs, filters, k_size, stride, padding, scope_name):
 17     '''
 18     A method that does convolution + relu on inputs
 19     '''
 20     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
 21         in_channels = inputs.shape[-1]
 22         kernel = tf.get_variable('kernel',
 23                                 [k_size, k_size, in_channels, filters],
 24                                 initializer = tf.truncated_normal_initializer())
 25         biases = tf.get_variable('biases', [filters], 
 26                                 initializer = tf.random_normal_initializer())
 27         conv = tf.nn.conv2d(inputs, kernel, strides=[1, stride, stride, 1], padding=padding)
 28     return tf.nn.relu(conv+biases, name=scope.name)
 29 
 30 def maxpool(inputs, ksize, stride, padding='VALID', scope_name='pool'):
 31     '''A method that does max pooling on inputs'''
 32     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
 33         pool = tf.nn.max_pool(inputs,
 34                              ksize=[1, ksize, ksize, 1],
 35                              strides=[1, stride, stride, 1],
 36                              padding=padding)
 37     return pool
 38 
 39 def fully_connected(inputs, out_dim, scope_name='fc'):
 40     '''
 41     A fully connected linear layer on inputs
 42     '''
 43     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
 44         in_dim = inputs.shape[-1]
 45         W = tf.get_variable('weights',
 46                             [in_dim, out_dim],
 47                             initializer=tf.truncated_normal_initializer())
 48         b = tf.get_variable('biases',
 49                             [out_dim],
 50                             initializer=tf.constant_initializer(0.0))
 51         out = tf.matmul(inputs, W) + b
 52     return out
 53 
 54 class ConvNet(object):
 55     def __init__(self):
 56         self.lr = 0.001
 57         self.batch_size = 128
 58         self.keep_prob = tf.constant(0.75)
 59         self.gstep = tf.Variable(0, dtype=tf.int32, 
 60                                 trainable=False, name='global_step')
 61         self.n_classes = 10
 62         self.skip_step = 20
 63         self.n_test = 10000
 64         self.training=False
 65 
 66     def get_data(self):
 67         with tf.name_scope('data'):
 68             train_data, test_data = utils.get_mnist_dataset(self.batch_size)
 69             iterator = tf.data.Iterator.from_structure(train_data.output_types, 
 70                                                    train_data.output_shapes)
 71             img, self.label = iterator.get_next()
 72             self.img = tf.reshape(img, shape=[-1, 28, 28, 1])
 73             # reshape the image to make it work with tf.nn.conv2d
 74 
 75             self.train_init = iterator.make_initializer(train_data)  # initializer for train_data
 76             self.test_init = iterator.make_initializer(test_data)    # initializer for train_data
 77 
 78     def inference_by_functions(self):
 79         '''
 80         Build the model according to the description we've shown in class
 81         Define the model by using the functions above
 82         '''
 83         conv1 = conv_relu(inputs=self.img,
 84                           filters=32,
 85                           k_size=5,
 86                           stride=1,
 87                           padding='SAME',
 88                           scope_name='conv1')
 89         pool1 = maxpool(conv1, 2, 2, 'VALID', 'pool1')
 90         conv2 = conv_relu(inputs=pool1,
 91                           filters=64,
 92                           k_size=5,
 93                           stride=1,
 94                           padding='SAME',
 95                           scope_name='conv2')
 96         pool2 = maxpool(conv2, 2, 2, 'VALID', 'pool2')
 97         feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]
 98         pool2 = tf.reshape(pool2, [-1, feature_dim])
 99         fc = tf.nn.relu(fully_connected(pool2, 1024, 'fc'))
100         dropout = tf.layers.dropout(fc, self.keep_prob, name='dropout')
101         self.logits = fully_connected(dropout, self.n_classes, 'logits')
102 
103     def inference_by_layers(self):
104         '''
105         Build the model according to the description we've shown in class
106         Define the model by using tf.layers
107         '''
108         conv1 = tf.layers.conv2d(inputs=self.img,
109                                   filters=32,
110                                   kernel_size=[5, 5],
111                                   padding='SAME',
112                                   activation=tf.nn.relu,
113                                   name='conv1')
114         pool1 = tf.layers.max_pooling2d(inputs=conv1, 
115                                         pool_size=[2, 2], 
116                                         strides=2,
117                                         name='pool1')
118 
119         conv2 = tf.layers.conv2d(inputs=pool1,
120                                   filters=64,
121                                   kernel_size=[5, 5],
122                                   padding='SAME',
123                                   activation=tf.nn.relu,
124                                   name='conv2')
125         pool2 = tf.layers.max_pooling2d(inputs=conv2, 
126                                         pool_size=[2, 2], 
127                                         strides=2,
128                                         name='pool2')
129 
130         feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]
131         pool2 = tf.reshape(pool2, [-1, feature_dim])
132         fc = tf.layers.dense(pool2, 1024, activation=tf.nn.relu, name='fc')
133         dropout = tf.layers.dropout(fc, 
134                                     self.keep_prob, 
135                                     training=self.training, 
136                                     name='dropout')
137         self.logits = tf.layers.dense(dropout, self.n_classes, name='logits')
138 
139 
140     def loss(self):
141         '''
142         define loss function
143         use softmax cross entropy with logits as the loss function
144         tf.nn.softmax_cross_entropy_with_logits
145         softmax is applied internally
146         don't forget to compute mean cross all sample in a batch
147         '''
148         with tf.name_scope('loss'):
149             cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.label))
150             self.loss = cross_entropy
151     
152     def optimize(self):
153         '''
154         Define training op
155         using Adam Gradient Descent to minimize cost
156         Don't forget to use global step
157         '''
158         optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss, global_step=self.gstep)
159         self.opt = optimizer
160 
161     def summary(self):
162         '''
163         Create summaries to write on TensorBoard
164         Remember to track both training loss and test accuracy
165         '''
166         with tf.name_scope('summaries'):
167             tf.summary.scalar('loss', self.loss)
168             tf.summary.scalar('accuracy', self.accuracy)
169             tf.summary.histogram('histogram loss', self.loss)
170             self.summary_op = tf.summary.merge_all()
171         
172     def eval(self):
173         '''
174         Count the number of right predictions in a batch
175         '''
176         with tf.name_scope('predict'):
177             preds = tf.nn.softmax(self.logits)
178             correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(self.label, 1))
179             self.accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
180 
181     def build(self):
182         '''
183         Build the computation graph
184         '''
185         self.get_data()
186         self.inference_by_layers()
187         self.loss()
188         self.optimize()
189         self.eval()
190         self.summary()
191 
192     def train_one_epoch(self, sess, saver, init, writer, epoch, step):
193         start_time = time.time()
194         sess.run(init) 
195         total_loss = 0
196         n_batches = 0
197         try:
198             while True:
199                 _, l, summaries = sess.run([self.opt, self.loss, self.summary_op])
200                 writer.add_summary(summaries, global_step=step)
201                 if (step + 1) % self.skip_step == 0:
202                     print('Loss at step {0}: {1}'.format(step, l))
203                 step += 1
204                 total_loss += l
205                 n_batches += 1
206         except tf.errors.OutOfRangeError:
207             pass
208         saver.save(sess, 'checkpoints/convnet_starter/mnist-convnet', step)
209         print('Average loss at epoch {0}: {1}'.format(epoch, total_loss/n_batches))
210         print('Took: {0} seconds'.format(time.time() - start_time))
211         return step
212 
213     def eval_once(self, sess, init, writer, epoch, step):
214         start_time = time.time()
215         sess.run(init)
216         total_correct_preds = 0
217         try:
218             while True:
219                 accuracy_batch, summaries = sess.run([self.accuracy, self.summary_op])
220                 writer.add_summary(summaries, global_step=step)
221                 total_correct_preds += accuracy_batch
222         except tf.errors.OutOfRangeError:
223             pass
224 
225         print('Accuracy at epoch {0}: {1} '.format(epoch, total_correct_preds/self.n_test))
226         print('Took: {0} seconds'.format(time.time() - start_time))
227 
228     def train(self, n_epochs):
229         '''
230         The train function alternates between training one epoch and evaluating
231         '''
232         utils.safe_mkdir('checkpoints')
233         utils.safe_mkdir('checkpoints/convnet_starter')
234         writer = tf.summary.FileWriter('./graphs/convnet_starter', tf.get_default_graph())
235 
236         with tf.Session() as sess:
237             sess.run(tf.global_variables_initializer())
238             saver = tf.train.Saver()
239             ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_starter/checkpoint'))
240             if ckpt and ckpt.model_checkpoint_path:
241                 saver.restore(sess, ckpt.model_checkpoint_path)
242             
243             step = self.gstep.eval()
244 
245             for epoch in range(n_epochs):
246                 step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step)
247                 self.eval_once(sess, self.test_init, writer, epoch, step)
248         writer.close()
249 
250 if __name__ == '__main__':
251     model = ConvNet()
252     model.build()
253     model.train(n_epochs=15)

View Code

2. style transfer

  1 """ Implementation in TensorFlow of the paper 
  2 A Neural Algorithm of Artistic Style (Gatys et al., 2016) 
  3 
  4 Created by Chip Huyen (chiphuyen@cs.stanford.edu)
  5 CS20: "TensorFlow for Deep Learning Research"
  6 cs20.stanford.edu
  7 
  8 For more details, please read the assignment handout:
  9 https://docs.google.com/document/d/1FpueD-3mScnD0SJQDtwmOb1FrSwo1NGowkXzMwPoLH4/edit?usp=sharing
 10 """
 11 import os
 12 os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 13 import time
 14 
 15 import numpy as np
 16 import tensorflow as tf
 17 
 18 import load_vgg
 19 import utils
 20 
 21 def setup():
 22     utils.safe_mkdir('checkpoints')
 23     utils.safe_mkdir('checkpoints/style_transfer')
 24     utils.safe_mkdir('outputs')
 25     utils.safe_mkdir('graphs')
 26 
 27 class StyleTransfer(object):
 28     def __init__(self, content_img, style_img, img_width, img_height):
 29         '''
 30         img_width and img_height are the dimensions we expect from the generated image.
 31         We will resize input content image and input style image to match this dimension.
 32         Feel free to alter any hyperparameter here and see how it affects your training.
 33         '''
 34         self.img_width = img_width
 35         self.img_height = img_height
 36         self.content_img = utils.get_resized_image(content_img, img_width, img_height)
 37         self.style_img = utils.get_resized_image(style_img, img_width, img_height)
 38         self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)
 39 
 40         ## create global step (gstep) and hyperparameters for the model
 41         self.content_layer = 'conv4_2'
 42         self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
 43         # content_w, style_w: corresponding weights for content loss and style loss
 44         self.content_w = 0.01
 45         self.style_w = 1
 46         # style_layer_w: weights for different style layers. deep layers have more weights
 47         self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0] 
 48         self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
 49         self.lr = 2.0
 50 
 51     def create_input(self):
 52         '''
 53         We will use one input_img as a placeholder for the content image, 
 54         style image, and generated image, because:
 55             1. they have the same dimension
 56             2. we have to extract the same set of features from them
 57         We use a variable instead of a placeholder because we're, at the same time, 
 58         training the generated image to get the desirable result.
 59 
 60         Note: image height corresponds to number of rows, not columns.
 61         '''
 62         with tf.variable_scope('input') as scope:
 63             self.input_img = tf.get_variable('in_img', 
 64                                         shape=([1, self.img_height, self.img_width, 3]),
 65                                         dtype=tf.float32,
 66                                         initializer=tf.zeros_initializer())
 67     def load_vgg(self):
 68         '''
 69         Load the saved model parameters of VGG-19, using the input_img
 70         as the input to compute the output at each layer of vgg.
 71 
 72         During training, VGG-19 mean-centered all images and found the mean pixels
 73         to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract
 74         this mean from our images.
 75 
 76         '''
 77         self.vgg = load_vgg.VGG(self.input_img)
 78         self.vgg.load()
 79         self.content_img -= self.vgg.mean_pixels
 80         self.style_img -= self.vgg.mean_pixels
 81 
 82     def _content_loss(self, P, F):
 83         ''' Calculate the loss between the feature representation of the
 84         content image and the generated image.
 85         
 86         Inputs: 
 87             P: content representation of the content image
 88             F: content representation of the generated image
 89             Read the assignment handout for more details
 90 
 91             Note: Don't use the coefficient 0.5 as defined in the paper.
 92             Use the coefficient defined in the assignment handout.
 93         '''
 94         self.content_loss = tf.reduce_sum((F - P) ** 2) / (4.0 * P.size)
 95         
 96     def _gram_matrix(self, F, N, M):
 97         """ Create and return the gram matrix for tensor F
 98             Hint: you'll first have to reshape F
 99         """
100         F = tf.reshape(F, (M, N))
101         return tf.matmul(tf.transpose(F), F)
102         
103     def _single_style_loss(self, a, g):
104         """ Calculate the style loss at a certain layer
105         Inputs:
106             a is the feature representation of the style image at that layer
107             g is the feature representation of the generated image at that layer
108         Output:
109             the style loss at a certain layer (which is E_l in the paper)
110 
111         Hint: 1. you'll have to use the function _gram_matrix()
112             2. we'll use the same coefficient for style loss as in the paper
113             3. a and g are feature representation, not gram matrices
114         """
115         N = a.shape[3]                      # number of filters
116         M = a.shape[1] * a.shape[2]         # height times width of the feature map
117         G = self._gram_matrix(g, N, M)
118         A = self._gram_matrix(a, N, M)
119         return tf.reduce_sum((G-A) ** 2 / (4*N*N*M*M))
120         
121     def _style_loss(self, A):
122         """ Calculate the total style loss as a weighted sum 
123         of style losses at all style layers
124         Hint: you'll have to use _single_style_loss()
125         """
126         n_layers = len(self.style_layers)
127         E = [self._single_style_loss(A[i], getattr(self.vgg, self.style_layers[i])) for i in range(n_layers)]
128         self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])
129 
130     def losses(self):
131         with tf.variable_scope('losses') as scope:
132             with tf.Session() as sess:
133                 # assign content image to the input variable
134                 sess.run(self.input_img.assign(self.content_img)) 
135                 gen_img_content = getattr(self.vgg, self.content_layer)
136                 content_img_content = sess.run(gen_img_content)
137             self._content_loss(content_img_content, gen_img_content)
138 
139             with tf.Session() as sess:
140                 sess.run(self.input_img.assign(self.style_img))
141                 style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])                              
142             self._style_loss(style_layers)
143 
144             self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss
145 
146     def optimize(self):
147         optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss, global_step=self.gstep)
148         self.opt = optimizer
149 
150     def create_summary(self):
151         with tf.name_scope('summaries'):
152             tf.summary.scalar('content loss', self.content_loss)
153             tf.summary.scalar('style loss', self.style_loss)
154             tf.summary.scalar('total loss', self.total_loss)
155             tf.summary.histogram('histogram content loss', self.content_loss)
156             tf.summary.histogram('histogram style loss', self.style_loss)
157             tf.summary.histogram('histogram total loss', self.total_loss)
158             self.summary_op = tf.summary.merge_all()
159 
160     def build(self):
161         self.create_input()
162         self.load_vgg()
163         self.losses()
164         self.optimize()
165         self.create_summary()
166 
167     def train(self, n_iters):
168         skip_step = 1
169         with tf.Session() as sess:
170             ## 1. initialize your variables
171             ## 2. create writer to write your graph
172             sess.run(tf.global_variables_initializer())
173             writer = tf.summary.FileWriter('./graphs/style_transfer', tf.get_default_graph())
174             sess.run(self.input_img.assign(self.initial_img))
175 
176             ## 3. create a saver object
177             ## 4. check if a checkpoint exists, restore the variables
178             saver = tf.train.Saver()
179             ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
180             if ckpt and ckpt.model_checkpoint_path:
181                 saver.restore(sess, ckpt.model_checkpoint_path)
182 
183             initial_step = self.gstep.eval()
184             
185             start_time = time.time()
186             for index in range(initial_step, n_iters):
187                 if index >= 5 and index < 20:
188                     skip_step = 10
189                 elif index >= 20:
190                     skip_step = 20
191                 
192                 sess.run(self.opt)
193                 if (index + 1) % skip_step == 0:
194                     ## obtain generated image, loss, and summary                    
195                     gen_image, total_loss, summary = sess.run([self.input_img, self.total_loss, self.summary_op])
196                     
197                     # add back the mean pixels we subtracted before
198                     gen_image = gen_image + self.vgg.mean_pixels 
199                     writer.add_summary(summary, global_step=index)
200                     print('Step {}
   Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
201                     print('   Loss: {:5.1f}'.format(total_loss))
202                     print('   Took: {} seconds'.format(time.time() - start_time))
203                     start_time = time.time()
204 
205                     filename = 'outputs/%d.png' % (index)
206                     utils.save_image(filename, gen_image)
207 
208                     if (index + 1) % 20 == 0:
209                         # save the variables into a checkpoint
210                         saver.save(sess, 'checkpoints/style_transfer', index)
211 
212 if __name__ == '__main__':
213     setup()
214     machine = StyleTransfer('content/deadpool.jpg', 'styles/harlequin.jpg', 333, 250)
215     machine.build()
216     machine.train(300)

View Code