ATTENTION NETWORK分析

1. TensorFlowTrainable类

 1 class TensorFlowTrainable(object):
 2     def __init__(self):
 3         self.parameters = []
 4 
 5     def get_weights(self, dim_in, dim_out, name, trainable=True):
 6         shape = (dim_out, dim_in)
 7         weightsInitializer = tf.constant_initializer(
 8             self.truncated_normal(shape=shape, stddev=0.01, mean=0.))
 9         weights = tf.get_variable(
10             initializer=weightsInitializer, shape=shape, trainable=True, name=name)
11         if trainable:
12             self.parameters.append(weights)  
13         return weights
14     def get_4Dweights(self, filter_height, filter_width, in_channels, out_channels, name, trainable=True):
15         shape = (filter_height, filter_width, in_channels, out_channels)
16         weightsInitializer = tf.constant_initializer(
17             self.truncated_normal(shape=shape, stddev=0.01, mean=0))
18         weights = tf.get_variable(
19             initializer=weightsInitializer, shape=shape, trainable=True, name=name)
20         if trainable:
21             self.parameters.append(weights)
22         return weights
23     def get_biases(self, dim_out, name, trainable=True):
24         shape = (dim_out, 1)
25         initialBiases = tf.constant_initializer(np.zeros(shape))
26         biases = tf.get_variable(
27             initializer=initialBiases, shape=shape, trainable=True, name=name)
28         if trainable:
29             self.parameters.append(biases)
30         return biases
31     @staticmethod
32     def truncated_normal(shape, stddev, mean=0.):
33         rand_init = np.random.normal(loc=mean, scale=stddev, size=shape)
34         inf_mask = rand_init < (mean - 2 * stddev)
35         rand_init = rand_init * 
36             np.abs(1 - inf_mask) + inf_mask * (mean - 2 * stddev)
37         sup_mask = rand_init > (mean + 2 * stddev)
38         rand_init = rand_init * 
39             np.abs(1 - sup_mask) + sup_mask * (mean + 2 * stddev)
40         return rand_init

@staticmethod

静态方法，类可以不用实例化就可以调用该方法，当然也可以实例化后调用。

所以要注意这里前面几个函数用到的self.truncated_normal()并不是一开始我以为的tf.truncated_normal()这个正态分布函数（我就奇怪为什么是self.而不是tf.，名字一样的0.0）。

那么这个函数传入参数为shape和stddev，形状和标准差。返回一个形状为shape的截断正态分布数组。

其余函数，get_weights是得到shape=(dim_out, dim_in)的截断正太分布权重,get_4Dweights是得到shape=(filter_height, filter_width, in_channels, out_channels)的截断正态分布权重，get_biases是得到shape=(dim_out, 1)的初始零向量偏置。

2. LSTMCell类

 1 class LSTMCell(TensorFlowTrainable):
 2     def __init__(self, num_units, **kwargs):
 3         super(LSTMCell, self).__init__()
 4         self._num_units = num_units  # 单元的个数
 5         self.w_i = self.get_weights(
 6             dim_in=2 * self._num_units, dim_out=self._num_units, name="w_i")  # 输入门权重
 7         self.w_f = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_f")  # 忘记门权重
 8         self.w_o = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_o")  # 输出门权重
 9         self.w_c = self.get_weights(dim_in=2 * self._num_units, dim_out=self._num_units, name="w_c")  # 数据输入权重
10         self.b_i = self.get_biases(dim_out=self._num_units, name="b_i")  # 输入门偏重
11         self.b_f = self.get_biases(dim_out=self._num_units, name="b_f")  # 忘记门偏重
12         self.b_o = self.get_biases(dim_out=self._num_units, name="b_o")  # 输出门偏重
13         self.b_c = self.get_biases(dim_out=self._num_units, name="b_c")  # 数据输入偏重
14         self.c = [self.get_biases(dim_out=self._num_units, name="c", trainable=False)]  # 记忆细胞状态偏重
15     def initialize_something(self, input):
16         # 对输入做一定的变换，包括转置、展开、扩展为度等，并把数值初始化为1
17         self.batch_size_vector = 1 + 0 * tf.expand_dims(tf.unstack(tf.transpose(input, [1, 0]))[0], 0)
18         # 初始化
19         self.h = [self.get_biases(dim_out=self._num_units, name="h", trainable=False) * self.batch_size_vector]
20 
21     def process(self, input, **kwargs):
22         H = tf.concat([tf.transpose(input, perm=[1, 0]),self.h[-1]], 0)  # 将输入数据与上一时刻的记忆信息整合成一个新的输入
23         i = tf.sigmoid(x=tf.add(tf.matmul(self.w_i, H), self.b_i))  # 经过输入门后的数据
24         f = tf.sigmoid(x=tf.add(tf.matmul(self.w_f, H), self.b_f))  # 经过忘记门后的数据
25         o = tf.sigmoid(x=tf.add(tf.matmul(self.w_o, H), self.b_o))  # 经过输出门后的数据
26         c = f * self.c[-1] + i * tf.tanh(x=tf.add(tf.matmul(self.w_c, H), self.b_c))
27         # 原代码：h = o * tf.tanh(x=self.c[-1])
28         h = o * tf.tanh(x=self.c[-1])
29         self.c.append(c)
30         self.h.append(h)
31 
32     @property
33     def features(self):
34         return self.h[-1]  # 将最后一个的向量输出

View Code

tf.transpose(input, [dimension_1, dimenaion_2,..,dimension_n]): 这里[1, 0]就是把第０，１维交换位置了。

tf.stack（）这是一个矩阵拼接的函数，tf.unstack（）则是一个矩阵分解的函数.

stack把两个矩阵按某个轴拼接起来，与tf.concat有所区分。

如拼接两个shape=(4, 3)的矩阵:

concat拼接axis=0后的矩阵是shape=(8, 3),拼接axis=1后，shape=(4,6)

stack拼接axis=0后的矩阵是shape=(2, 4, 3),拼接axis=1后的矩阵是shape=(4, 2, 3),拼接axis=0后的矩阵是shape=(4, 3, 1),

input.shape=(m, n)

H.shape=(2n, m)

i.shape=(n, m)

c.shape=(n, m)

h.shape=(n, m)

@property 装饰器

用装饰器函数把 get/set 方法“装饰”成属性调用：

 1 class Student(object):
 2     def __init__(self, name, score):
 3         self.name = name
 4         self.__score = score
 5     def get_score(self):
 6         return self.__score
 7     def set_score(self, score):
 8         if score < 0 or score > 100:
 9             raise ValueError('invalid score')
10         self.__score = score

- >

 1 class Student(object):
 2     def __init__(self, name, score):
 3         self.name = name
 4         self.__score = score
 5     @property
 6     def score(self):
 7         return self.__score
 8     @score.setter
 9     def score(self, score):
10         if score < 0 or score > 100:
11             raise ValueError('invalid score')
12         self.__score = score

详见＠property装饰器

python super:

Python: 你不知道的 super

小结

事实上，super 和父类没有实质性的关联。
super(cls, inst) 获得的是 cls 在 inst 的 MRO 列表中的下一个类。