Tensorflow 基础API

自定义损失函数

def customize_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

model.compile(loss=customize_mse, optimizer="adam")

自定义层次

# customized layer by a subclass
class CustomizedDenseLayer(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        self.units = units
        self.activation = keras.layers.Activation(activation)
        super(CustomizedDenseLayer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.kernel = self.add_weight(name="kernel", 
                                      shape = [input_shape[1], self.units],
                                      initializer = 'uniform',
                                      trainable = True)
        self.bias = self.add_weight(name="bias",
                                    shape=(self.units,),
                                    initializer="zeros",
                                    trainable=True) # 是否可改变

        super(CustomizedDenseLayer, self).build(input_shape)
    
    def call(self, x):
        return self.activation(x @ self.kernel +  self.bias)

model = keras.models.Sequential([
    CustomizedDenseLayer(30, activation="selu", input_shape=x_train.shape[1:]),
    CustomizedDenseLayer(1)
])

# customized activation func
# softplus : log(1 + e^x) similar to relu
customized_softplus = keras.layers.Lambda(lambda x: tf.nn.softplus(x))

tf.function的使用

Feature
1. 把python代码转化成图结构
2. 易于将模型到处成为GraphDef + checkpoint 或者SavedModel
3. 使得eager execution可以默认打开
4. 使得1.0的代码可以通过tf.function在版本2.0上继续运行

# python code
def scaled_elu(z, scale = 1.0, alpha = 1.0):
    # z >= 0? scale *z : scale * alpha * tf.nn.elu(z)
    is_positive = tf.greater_equal(z, 0.0)
    return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z))

scaled_elu_tf = tf.function(scaled_elu)

%timeit scaled_elu(tf.random.normal((1000, 1000))) # 14.8 ms ± 156 µs per loop
%timeit scaled_elu_tf(tf.random.normal((1000, 1000))) # 12.1 ms ± 35.6 µs per loop

普通python代码转成tf的图结构，速度加快

除此之外，tf.function还可以在类型上做限制

@tf.function(input_signature=[tf.TensorSpec([None], tf.int32, name='x')])
def cube(z):
    return tf.pow(z, 3)

print(cube(tf.constant([1.,2.,3.]))) # ValueError: Python inputs incompatible with input_signature
print(cube(tf.constant([1, 2, 3]))) # tf.Tensor([ 1  8 27], shape=(3,), dtype=int32)

自定义求导

def approximate_derivative(f, x, eps = 1e-3):
    return (f(x + eps) - f(x - eps)) / (2. *eps)

def equation1(x):
    return 3.* x**2 + 2. * x - 1

print(approximate_derivative(equation1, 1.)) # 7.999999999999119

def equation2(x1, x2):
    return (x1 + 5) * (x2 ** 2)


def approximate_gradient(f, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: equation2(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: equation2(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(equation2, 2., 3.)) # (8.999999999993236, 41.999999999994486)

这是在普通python代码上，实现一元和多元函数的求导。

下面是如何用tensorflow去完成相同的功能

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)

with tf.GradientTape(persistent=True) as tape2:
    z2 = equation2(x1, x2)
dz2_x1 = tape2.gradient(z2, x1)
print(dz2_x1) # tf.Tensor(9.0, shape=(), dtype=float32)

with tf.GradientTape() as tape3:
    z3 = equation2(x1, x2)
    
dz3_x1x2 = tape3.gradient(z3, [x1, x2])
# [<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]

x5 = tf.Variable(5.)
with tf.GradientTape() as tape5:
    z5 = 3 * x5
    z6 = x5 ** 2
dz5z6_x5 = tape5.gradient([z5, z6], x5) # tf.Tensor(13.0, shape=(), dtype=float32)

如何求二阶导

x6 = tf.Variable(2.0)
x7 = tf.Variable(3.0)

# compute the second derivate
with tf.GradientTape(persistent=True) as outter_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z7 = equation2(x6, x7)
    first_derivates = inner_tape.gradient(z7, [x6, x7])
second_derivate = [outter_tape.gradient(first_derivate, [x6, x7]) for first_derivate in first_derivates]

print(second_derivate)
del inner_tape
del outter_tape

梯度下降原理

learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = equation1(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
    print(x)


## introduce optimizer
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr=learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = equation1(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
    print(x)