Eager Guide

1,Goodness

  • An intuitive interface—Structure your code naturally and use Python data structures. Quickly iterate on small models and small data.
  • Easier debugging—Call ops directly to inspect running models and test changes. Use standard Python debugging tools for immediate error reporting.
  • Natural control flow—Use Python control flow instead of graph control flow, simplifying the specification of dynamic models.

2,Setup and basic usage

from __future__ import absolute_import, division, print_function

import tensorflow as tf

tf.enable_eager_execution()

tf.executing_eagerly() 

x = [[2.]]
m = tf.matmul(x, x)
print("hello, {}".format(m))

two-way converting

tfe = tf.contrib.eager

3,Dynamic control flow

def fizzbuzz(max_num):
counter = tf.constant(0)
max_num = tf.convert_to_tensor(max_num)
for num in range(1, max_num.numpy()+1):
num = tf.constant(num)
if int(num % 3) == 0 and int(num % 5) == 0:
print('FizzBuzz')
elif int(num % 3) == 0:
print('Fizz')
elif int(num % 5) == 0:
print('Buzz')
else:
print(num.numpy())
counter += 1

fizzbuzz(15)

4,Build a model

class MySimpleLayer(tf.keras.layers.Layer):
def __init__(self, output_units):
super(MySimpleLayer, self).__init__()
self.output_units = output_units

def build(self, input_shape):
# The build method gets called the first time your layer is used.
# Creating variables on build() allows you to make their shape depend
# on the input shape and hence removes the need for the user to specify
# full shapes. It is possible to create variables during __init__() if
# you already know their full shapes.
self.kernel = self.add_variable(
"kernel", [input_shape[-1], self.output_units])

def call(self, input):
# Override call() instead of __call__ so we can perform some bookkeeping.
return tf.matmul(input, self.kernel)

model = tf.keras.Sequential([
tf.keras.layers.Dense(10, input_shape=(784,)), # must declare input shape
tf.keras.layers.Dense(10)
])

class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.dense1 = tf.keras.layers.Dense(units=10)
self.dense2 = tf.keras.layers.Dense(units=10)

def call(self, input):
"""Run the model."""
result = self.dense1(input)
result = self.dense2(result)
result = self.dense2(result) # reuse variables from dense2 layer
return result

model = MNISTModel()

5,Eager training

5.1,Computing gradients

w = tf.Variable([[1.0]])
with tf.GradientTape() as tape:
loss = w * w

grad = tape.gradient(loss, w)
print(grad) # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)

5.2,training

# Fetch and format the mnist data
(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()

dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),
tf.cast(mnist_labels,tf.int64)))
dataset = dataset.shuffle(1000).batch(32)

# Build the model
mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10)
])

for images,labels in dataset.take(1):
print("Logits: ", mnist_model(images[0:1]).numpy())

optimizer = tf.train.AdamOptimizer()

loss_history = []

!!!!!!!!!!!!!!!

for (batch, (images, labels)) in enumerate(dataset.take(400)):
if batch % 10 == 0:
print('.', end='')
with tf.GradientTape() as tape:
logits = mnist_model(images, training=True)
loss_value = tf.losses.sparse_softmax_cross_entropy(labels, logits)

loss_history.append(loss_value.numpy())
grads = tape.gradient(loss_value, mnist_model.trainable_variables)
optimizer.apply_gradients(zip(grads, mnist_model.trainable_variables),
global_step=tf.train.get_or_create_global_step())

import matplotlib.pyplot as plt

plt.plot(loss_history)
plt.xlabel('Batch #')
plt.ylabel('Loss [entropy]')

6,Variables and optimizers

class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.W = tf.Variable(5., name='weight')
self.B = tf.Variable(10., name='bias')
def call(self, inputs):
return inputs * self.W + self.B

# A toy dataset of points around 3 * x + 2
NUM_EXAMPLES = 2000
training_inputs = tf.random_normal([NUM_EXAMPLES])
noise = tf.random_normal([NUM_EXAMPLES])
training_outputs = training_inputs * 3 + 2 + noise

# The loss function to be optimized
def loss(model, inputs, targets):
error = model(inputs) - targets
return tf.reduce_mean(tf.square(error))

def grad(model, inputs, targets):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets)
return tape.gradient(loss_value, [model.W, model.B])

# Define:
# 1. A model.
# 2. Derivatives of a loss function with respect to model parameters.
# 3. A strategy for updating the variables based on the derivatives.
model = Model()
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))

# Training loop
for i in range(300):
grads = grad(model, training_inputs, training_outputs)
optimizer.apply_gradients(zip(grads, [model.W, model.B]),
global_step=tf.train.get_or_create_global_step())
if i % 20 == 0:
print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs)))

print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs)))
print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy()))

7,Use objects for state during eager execution

Variables are objects

During eager execution, variables persist until the last reference to the object is removed, and is then deleted

if tf.test.is_gpu_available():
with tf.device("gpu:0"):
v = tf.Variable(tf.random_normal([1000, 1000]))
v = None # v no longer takes up GPU memory

Object-based saving

x = tf.Variable(10.)
checkpoint = tf.train.Checkpoint(x=x)

x.assign(2.) # Assign a new value to the variables and save.
checkpoint_path = './ckpt/'
checkpoint.save('./ckpt/')

x.assign(11.) # Change the variable after saving.

# Restore values from the checkpoint
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))

print(x) # => 2.0

To save and load models

import os
import tempfile

model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10)
])
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
checkpoint_dir = tempfile.mkdtemp()
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
root = tf.train.Checkpoint(optimizer=optimizer,
model=model,
optimizer_step=tf.train.get_or_create_global_step())

root.save(checkpoint_prefix)
root.restore(tf.train.latest_checkpoint(checkpoint_dir))

Object-oriented metrics

m = tfe.metrics.Mean("loss")
m(0)
m(5)
m.result() # => 2.5
m([8, 9])
m.result() # => 5.5

8,Summaries and TensorBoard

tf.contrib.summary.scalar

global_step = tf.train.get_or_create_global_step()

logdir = "./tb/"
writer = tf.contrib.summary.create_file_writer(logdir)
writer.set_as_default()

for _ in range(10):
global_step.assign_add(1)
# Must include a record_summaries method
with tf.contrib.summary.record_summaries_every_n_global_steps(100):
# your model code goes here
tf.contrib.summary.scalar('global_step', global_step)

9,Advanced automatic differentiation topics

 tf.GradientTape

def line_search_step(fn, init_x, rate=1.0):
with tf.GradientTape() as tape:
# Variables are automatically recorded, but manually watch a tensor
tape.watch(init_x)
value = fn(init_x)
grad = tape.gradient(value, init_x)
grad_norm = tf.reduce_sum(grad * grad)
init_value = value
while value > init_value - rate * grad_norm:
x = init_x - rate * grad
value = fn(x)
rate /= 2.0
return x, value

Additional functions to compute gradients

def square(x):
return tf.multiply(x, x)

grad = tfe.gradients_function(square)

square(3.).numpy()

grad(3.)[0].numpy()

# The second-order derivative of square:
gradgrad = tfe.gradients_function(lambda x: grad(x)[0])
gradgrad(3.)[0].numpy()

# The third-order derivative is None:
gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0])
gradgradgrad(3.)

# With flow control:
def abs(x):
return x if x > 0. else -x

grad = tfe.gradients_function(abs)

Custom gradients

@tf.custom_gradient
def clip_gradient_by_norm(x, norm):
y = tf.identity(x)
def grad_fn(dresult):
return [tf.clip_by_norm(dresult, norm), None]
return y, grad_fn

def log1pexp(x):
return tf.log(1 + tf.exp(x))
grad_log1pexp = tfe.gradients_function(log1pexp)

# The gradient computation works fine at x = 0.
grad_log1pexp(0.)[0].numpy()

# However, x = 100 fails because of numerical instability.
grad_log1pexp(100.)[0].numpy()

Performance

import time

def measure(x, steps):
# TensorFlow initializes a GPU the first time it's used, exclude from timing.
tf.matmul(x, x)
start = time.time()
for i in range(steps):
x = tf.matmul(x, x)
# tf.matmul can return before completing the matrix multiplication
# (e.g., can return after enqueing the operation on a CUDA stream).
# The x.numpy() call below will ensure that all enqueued operations
# have completed (and will also copy the result to host memory,
# so we're including a little more than just the matmul operation
# time).
_ = x.numpy()
end = time.time()
return end - start

shape = (1000, 1000)
steps = 200
print("Time to multiply a {} matrix by itself {} times:".format(shape, steps))

# Run on CPU:
with tf.device("/cpu:0"):
print("CPU: {} secs".format(measure(tf.random_normal(shape), steps)))

# Run on GPU, if available:
if tfe.num_gpus() > 0:
with tf.device("/gpu:0"):
print("GPU: {} secs".format(measure(tf.random_normal(shape), steps)))
else:
print("GPU: not found")

if tf.test.is_gpu_available():
x = tf.random_normal([10, 10])

x_gpu0 = x.gpu()
x_cpu = x.cpu()

_ = tf.matmul(x_cpu, x_cpu) # Runs on CPU
_ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0

if tfe.num_gpus() > 1:
x_gpu1 = x.gpu(1)
_ = tf.matmul(x_gpu1, x_gpu1) # Runs on GPU:1

10,Work with graphs

For building and training graph-constructed models, the Python program first builds a graph representing the computation, then invokes Session.run to send the graph for execution on the C++-based runtime. This provides:

  • Automatic differentiation using static autodiff.
  • Simple deployment to a platform independent server.
  • Graph-based optimizations (common subexpression elimination, constant-folding, etc.).
  • Compilation and kernel fusion.
  • Automatic distribution and replication (placing nodes on the distributed system).

Use eager execution in a graph environment

def my_py_func(x):
x = tf.matmul(x, x) # You can use tf ops
print(x) # but it's eager!
return x

with tf.Session() as sess:
x = tf.placeholder(dtype=tf.float32)
# Call eager function in graph!
pf = tfe.py_func(my_py_func, [x], tf.float32)

sess.run(pf, feed_dict={x: [[2.0]]}) # [[4.0]]

原文地址:https://www.cnblogs.com/augustone/p/10518787.html