跟我学算法-match-LSTM(向唐老师看齐)

对于match-lstm,将hi文本与输出的match-lstm(由si,hi,qi)组合重新输入到LSTM网络中,以端对端的操作理念。

参考的博客:https://blog.csdn.net/laddie132/article/details/79159895  #MATCH-LSTM原理

                      https://blog.csdn.net/jdbc/article/details/80755576          # 将SQUAD数据集转换为id

                      https://blog.csdn.net/xbinworld/article/details/54607525   # 注意机制模型

                     https://blog.csdn.net/appleml/article/details/76607980      #point-net模型

# !/usr/bin/env python3
# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import tensorflow.contrib as contrib

# from app.decorator import exe_time


class MatchLstm:
    # @exe_time
    def __init__(self, vocab_size, sentence_size, embedding_size,
                 word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
                 session=tf.Session(), num_class=3,
                 window_size=4, name='MatchLstm', initial_lr=0.001):
        # 字典的大小
        self._vocab_size = vocab_size
        # 句子的大小
        self._sentence_size = sentence_size
        # 隐含层的大小
        self._embedding_size = embedding_size
        # 用于构造向量
        self._we = word_embedding
        # 初始化
        self._initializer = initializer
        # 名字
        self._name = name
        # 输出种类
        self._num_class = num_class
        self._sess = session
        # 窗口的大小
        self._window_size = window_size
        # 学习率
        self._initial_lr = initial_lr
        # 编码原文和上下文的信息
        self._build_inputs_and_vars()
        # 构造模型的结构
        self._inference()
        # 初始化
        self._initial_optimizer()

    def _build_inputs_and_vars(self):
        # 文章的内容
        self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                       name='premises')
        # 问题
        self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                         name='hypotheses')
        # 标签
        self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
                                     name='labels')
        # 根据输入的大小来获得样本的大小
        self._batch_size = tf.shape(self.premises)[0]
        # 初始化学习率
        self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
                                  initializer=tf.constant_initializer(self._initial_lr), name='lr')
        # 初始化new_lr
        self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
                                     name='new_lr')
        # 将self.new_lr 赋值给self.lr
        self.lr_update_op = tf.assign(self.lr, self.new_lr)

        with tf.variable_scope(self._name):
            # self._word_embedding用于进行单词向量化操作
            self._word_embedding = tf.get_variable(name='word_embedding',
                                                   shape=[self._vocab_size, self._embedding_size],
                                                   initializer=tf.constant_initializer(self._we),
                                                   trainable=False)
        # 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
        self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
        # 对问题进行向量化操作
        self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)

    def _inference(self):
        with tf.variable_scope('{}_lstm_s'.format(self._name)):
            # 对原文进行了一次LSTM操作
            lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
            pre_length = self._length(self.premises)
            h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
                                       dtype=tf.float32)
            self.h_s = h_s

        with tf.variable_scope('{}_lstm_t'.format(self._name)):
            # 对问题进行了一次LSTM操作
            lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
            hyp_length = self._length(self.hypotheses)
            h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
                                       dtype=tf.float32)
            self.h_t = h_t
            # 构造一个lstm网络
        self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
                                                forget_bias=0.0)
        # 构造一个可以变化的向量矩阵
        h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)

        i = tf.constant(0)
        # while_loop,cond作为条件,body做为操作过程
        c = lambda x, y: tf.less(x, self._batch_size)
        b = lambda x, y: self._match_sent(x, y)
        res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
        # LSTM的输出结果
        self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
        # 进行一次全连接操作,使得最后的输出结果是一维的
        with tf.variable_scope('{}_fully_connect'.format(self._name)):
            w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
                                   initializer=self._initializer, name='w_fc')
            b_fc = tf.get_variable(shape=[self._num_class],
                                   initializer=self._initializer, name='b_fc')
            self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
        # softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
                                                                logits=self.logits,
                                                                name='cross_entropy')
        # 把batch_size的样本的损失函数进行加和
        cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
        # 加和以后相除,求损失的平均值
        self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
        # argmax,求出每个样本中最大的概率值
        self.predict_op = tf.arg_max(self.logits, dimension=1)

    def _match_sent(self, i, h_m_arr):
        # 对每一个句子进行操作
        h_s_i = self.h_s[i]
        h_t_i = self.h_t[i]
        # 输入句子的长度
        length_s_i = self._length(self.premises[i])
        length_t_i = self._length(self.hypotheses[i])

        state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)

        k = tf.constant(0)
        c = lambda a, x, y, z, s: tf.less(a, length_t_i)
        b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
        res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
        # 只获取最后一次的输出结果
        final_state_h = res[-1].h
        # 将其写入到h_m_arr文件中
        h_m_arr = h_m_arr.write(i, final_state_h)

        i = tf.add(i, 1)
        return i, h_m_arr

    def _match_attention(self, k, h_s, h_t, length_s, state):

        h_t_k = tf.reshape(h_t[k], [1, -1])
        h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])

        with tf.variable_scope('{}_attention_w'.format(self._name)):
            w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                  initializer=self._initializer, name='w_s')
            w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                  initializer=self._initializer, name='w_t')
            w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                  initializer=self._initializer, name='w_m')
            w_e = tf.get_variable(shape=[self._embedding_size, 1],
                                  initializer=self._initializer, name='w_e')

        last_m_h = state.h
        # sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
        sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
        # 经过一个激活层然后再与w_e进行相乘
        e_kj = tf.matmul(tf.tanh(sum_h), w_e)
        # 求得ai,j
        a_kj = tf.nn.softmax(e_kj)
        # 进行原文的参数加权
        alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)

        alpha_k.set_shape([1, self._embedding_size])
        # 将context与即将输入的h_t_k组合输入到下一次的LSTM中
        m_k = tf.concat([alpha_k, h_t_k], axis=1)
        #
        with tf.variable_scope('{}_lstm_m'.format(self._name)):
            # 输入到LSTM重新进行计算
            # state表示的是si
            _, new_state = self.lstm_m(inputs=m_k, state=state)

        k = tf.add(k, 1)
        return k, h_s, h_t, length_s, new_state

    def _embed_inputs(self, inputs, embeddings):
        ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
        i = tf.constant(0)
        # tf.less 当x大于self._batch_size时返回为假
        c = lambda x, y, z, n: tf.less(x, self._batch_size)
        b = lambda x, y, z, n: self._embed_line(x, y, z, n)
        # cond为条件,body为内容
        res = tf.while_loop(cond=c, body=b,
                            loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
        ndim0_tensor = res[-1].stack()
        ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
        return ndim0_tensor

    def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
        ndim1_list = []
        # 对输入的每一句话进行操作
        for j in range(self._sentence_size):
            # 输入的第一个字符串
            word = inputs[i][j]
            unk_word = tf.constant(-1)
            # tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
            f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
            # 如果没有的话使用0向量代替
            f2 = lambda: tf.zeros(shape=[self._embedding_size])
            # 如果wordunk与word不相等,执行f1,否者执行f2
            res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
            # 添加到ndim1_list 向量中
            ndim1_list.append(res_tensor)
        for j in range(self._sentence_size):
            word = inputs[i][j]
            unk_word = tf.constant(-1)
            # 如果word等于-1代表了提取答案上下文的内容
            f1 = lambda: self._ave_vec(ndim1_list, j)
            f2 = lambda: ndim1_list[j]
            ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
                                    default=f1)
        # tf.stack是一个函数拼接
        ndim1_tensor = tf.stack(ndim1_list)
        ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
        i = tf.add(i, 1)
        return i, inputs, embeddings, ndim0_tensor_arr

    def _ave_vec(self, embed_list, cur_pos):
        """
        生词的词向量为词窗口的词向量平均值
        :param embed_list:
        :param cur_pos:
        :return:
        """
        # 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
        left_pos = max(0, cur_pos - self._window_size)
        right_pos = min(cur_pos + self._window_size, self._sentence_size)
        # 获得上下文的词向量
        e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
        # tf.stack合并词向量
        e_tensor = tf.stack(e_list)
        # 对上下文的内容使用reduce_mean来替代原来的位置的信息
        ave_tensor = tf.reduce_mean(e_tensor, axis=0)
        return ave_tensor

    @staticmethod
    def _length(sequence):
        mask = tf.sign(tf.abs(sequence))
        length = tf.reduce_sum(mask, axis=-1)
        return length

    def _initial_optimizer(self):
        with tf.variable_scope('{}_step'.format(self._name)):
            # 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
            self.global_step = tf.get_variable(shape=[],
                                               initializer=tf.constant_initializer(0),
                                               dtype=tf.int32,
                                               name='global_step')
        # 根据动量平均跟新参数
        self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
        # 缩小loss
        self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)


if __name__ == '__main__':
    with tf.Session() as sess:
        # embedding需要翻译的句子
        embedding = np.random.randn(4, 6)
        embedding[0] = 0.0
        model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
                          word_embedding=embedding, session=sess)
        model.batch_size = 1
        sent1 = [[3, -1, 2, 1, 0],
                 [4, 5, 1, 0, 0],
                 [2, 1, 0, 0, 0]]

        sent2 = [[2, 1, 0, 0, 0],
                 [3, -1, 2, 1, 0],
                 [4, 5, 1, 0, 0]]

        labels = [[1, 0, 0],
                  [0, 1, 0],
                  [0, 0, 1]]

        sess.run(tf.global_variables_initializer())
        # 迭代优化
        for temp in range(300):
            loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
                                     feed_dict={model.premises: sent1, model.hypotheses: sent2,
                                                model.labels: labels, model.lr: 0.001})
            print(step, loss)
            sent1, sent2 = sent2, sent1
# !/usr/bin/env python3
# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np
import tensorflow.contrib as contrib

# from app.decorator import exe_time


class MatchLstm:
# @exe_time
def __init__(self, vocab_size, sentence_size, embedding_size,
word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
session=tf.Session(), num_class=3,
window_size=4, name='MatchLstm', initial_lr=0.001):
# 字典的大小
self._vocab_size = vocab_size
# 句子的大小
self._sentence_size = sentence_size
# 隐含层的大小
self._embedding_size = embedding_size
# 用于构造向量
self._we = word_embedding
# 初始化
self._initializer = initializer
# 名字
self._name = name
# 输出种类
self._num_class = num_class
self._sess = session
# 窗口的大小
self._window_size = window_size
# 学习率
self._initial_lr = initial_lr
# 编码原文和上下文的信息
self._build_inputs_and_vars()
# 构造模型的结构
self._inference()
# 初始化
self._initial_optimizer()

def _build_inputs_and_vars(self):
# 文章的内容
self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
name='premises')
# 问题
self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
name='hypotheses')
# 标签
self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
name='labels')
# 根据输入的大小来获得样本的大小
self._batch_size = tf.shape(self.premises)[0]
# 初始化学习率
self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
initializer=tf.constant_initializer(self._initial_lr), name='lr')
# 初始化new_lr
self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
name='new_lr')
# 将self.new_lr 赋值给self.lr
self.lr_update_op = tf.assign(self.lr, self.new_lr)

with tf.variable_scope(self._name):
# self._word_embedding用于进行单词向量化操作
self._word_embedding = tf.get_variable(name='word_embedding',
shape=[self._vocab_size, self._embedding_size],
initializer=tf.constant_initializer(self._we),
trainable=False)
# 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
# 对问题进行向量化操作
self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)

def _inference(self):
with tf.variable_scope('{}_lstm_s'.format(self._name)):
# 对原文进行了一次LSTM操作
lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
pre_length = self._length(self.premises)
h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
dtype=tf.float32)
self.h_s = h_s

with tf.variable_scope('{}_lstm_t'.format(self._name)):
# 对问题进行了一次LSTM操作
lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
hyp_length = self._length(self.hypotheses)
h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
dtype=tf.float32)
self.h_t = h_t
# 构造一个lstm网络
self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
forget_bias=0.0)
# 构造一个可以变化的向量矩阵
h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)

i = tf.constant(0)
# while_loop,cond作为条件,body做为操作过程
c = lambda x, y: tf.less(x, self._batch_size)
b = lambda x, y: self._match_sent(x, y)
res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
# LSTM的输出结果
self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
# 进行一次全连接操作,使得最后的输出结果是一维的
with tf.variable_scope('{}_fully_connect'.format(self._name)):
w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
initializer=self._initializer, name='w_fc')
b_fc = tf.get_variable(shape=[self._num_class],
initializer=self._initializer, name='b_fc')
self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
# softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
logits=self.logits,
name='cross_entropy')
# 把batch_size的样本的损失函数进行加和
cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
# 加和以后相除,求损失的平均值
self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
# argmax,求出每个样本中最大的概率值
self.predict_op = tf.arg_max(self.logits, dimension=1)

def _match_sent(self, i, h_m_arr):
# 对每一个句子进行操作
h_s_i = self.h_s[i]
h_t_i = self.h_t[i]
# 输入句子的长度
length_s_i = self._length(self.premises[i])
length_t_i = self._length(self.hypotheses[i])

state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)

k = tf.constant(0)
c = lambda a, x, y, z, s: tf.less(a, length_t_i)
b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
# 只获取最后一次的输出结果
final_state_h = res[-1].h
# 将其写入到h_m_arr文件中
h_m_arr = h_m_arr.write(i, final_state_h)

i = tf.add(i, 1)
return i, h_m_arr

def _match_attention(self, k, h_s, h_t, length_s, state):

h_t_k = tf.reshape(h_t[k], [1, -1])
h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])

with tf.variable_scope('{}_attention_w'.format(self._name)):
w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
initializer=self._initializer, name='w_s')
w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
initializer=self._initializer, name='w_t')
w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
initializer=self._initializer, name='w_m')
w_e = tf.get_variable(shape=[self._embedding_size, 1],
initializer=self._initializer, name='w_e')

last_m_h = state.h
# sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
# 经过一个激活层然后再与w_e进行相乘
e_kj = tf.matmul(tf.tanh(sum_h), w_e)
# 求得ai,j
a_kj = tf.nn.softmax(e_kj)
# 进行原文的参数加权
alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)

alpha_k.set_shape([1, self._embedding_size])
# 将context与即将输入的h_t_k组合输入到下一次的LSTM中
m_k = tf.concat([alpha_k, h_t_k], axis=1)
#
with tf.variable_scope('{}_lstm_m'.format(self._name)):
# 输入到LSTM重新进行计算
# state表示的是si
_, new_state = self.lstm_m(inputs=m_k, state=state)

k = tf.add(k, 1)
return k, h_s, h_t, length_s, new_state

def _embed_inputs(self, inputs, embeddings):
ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
i = tf.constant(0)
# tf.less 当x大于self._batch_size时返回为假
c = lambda x, y, z, n: tf.less(x, self._batch_size)
b = lambda x, y, z, n: self._embed_line(x, y, z, n)
# cond为条件,body为内容
res = tf.while_loop(cond=c, body=b,
loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
ndim0_tensor = res[-1].stack()
ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
return ndim0_tensor

def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
ndim1_list = []
# 对输入的每一句话进行操作
for j in range(self._sentence_size):
# 输入的第一个字符串
word = inputs[i][j]
unk_word = tf.constant(-1)
# tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
# 如果没有的话使用0向量代替
f2 = lambda: tf.zeros(shape=[self._embedding_size])
# 如果wordunk与word不相等,执行f1,否者执行f2
res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
# 添加到ndim1_list 向量中
ndim1_list.append(res_tensor)
for j in range(self._sentence_size):
word = inputs[i][j]
unk_word = tf.constant(-1)
# 如果word等于-1代表了提取答案上下文的内容
f1 = lambda: self._ave_vec(ndim1_list, j)
f2 = lambda: ndim1_list[j]
ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
default=f1)
# tf.stack是一个函数拼接
ndim1_tensor = tf.stack(ndim1_list)
ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
i = tf.add(i, 1)
return i, inputs, embeddings, ndim0_tensor_arr

def _ave_vec(self, embed_list, cur_pos):
"""
生词的词向量为词窗口的词向量平均值
:param embed_list:
:param cur_pos:
:return:
"""
# 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
left_pos = max(0, cur_pos - self._window_size)
right_pos = min(cur_pos + self._window_size, self._sentence_size)
# 获得上下文的词向量
e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
# tf.stack合并词向量
e_tensor = tf.stack(e_list)
# 对上下文的内容使用reduce_mean来替代原来的位置的信息
ave_tensor = tf.reduce_mean(e_tensor, axis=0)
return ave_tensor

@staticmethod
def _length(sequence):
mask = tf.sign(tf.abs(sequence))
length = tf.reduce_sum(mask, axis=-1)
return length

def _initial_optimizer(self):
with tf.variable_scope('{}_step'.format(self._name)):
# 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
self.global_step = tf.get_variable(shape=[],
initializer=tf.constant_initializer(0),
dtype=tf.int32,
name='global_step')
# 根据动量平均跟新参数
self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
# 缩小loss
self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)


if __name__ == '__main__':
with tf.Session() as sess:
# embedding需要翻译的句子
embedding = np.random.randn(4, 6)
embedding[0] = 0.0
model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
word_embedding=embedding, session=sess)
model.batch_size = 1
sent1 = [[3, -1, 2, 1, 0],
[4, 5, 1, 0, 0],
[2, 1, 0, 0, 0]]

sent2 = [[2, 1, 0, 0, 0],
[3, -1, 2, 1, 0],
[4, 5, 1, 0, 0]]

labels = [[1, 0, 0],
[0, 1, 0],
[0, 0, 1]]

sess.run(tf.global_variables_initializer())
# 迭代优化
for temp in range(300):
loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
feed_dict={model.premises: sent1, model.hypotheses: sent2,
model.labels: labels, model.lr: 0.001})
print(step, loss)
sent1, sent2 = sent2, sent1
原文地址:https://www.cnblogs.com/my-love-is-python/p/10079876.html