tensorflow sequence

tensorflow sequence_loss

sequence_loss是nlp算法中非常重要的一个函数.rnn,lstm,attention都要用到这个函数.看下面代码:

# coding: utf-8
import numpy as np
import tensorflow as tf
from tensorflow.contrib.seq2seq import sequence_loss

logits_np = np.array([
    [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
    [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]]
])
targets_np = np.array([
    [0, 0, 0],
    [0, 0, 0]
], dtype=np.int32)

logits = tf.convert_to_tensor(logits_np)
targets = tf.convert_to_tensor(targets_np)
cost = sequence_loss(logits=logits,
                     targets=targets,
                     weights=tf.ones_like(targets, dtype=tf.float64))
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    r = sess.run(cost)
    print(r)

先对每个[0.5,0.5,0.5,0.5]取softmax. softmax([0.5,0.5,0.5,0.5])=(0.25,0.25,0.25,0.25)然后再计算-ln(0.25)*6/6=1.38629436112.
再看一个例子

# coding:utf-8
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division

from tensorflow.contrib.seq2seq import sequence_loss

import tensorflow as tf
import numpy as np

output_np = np.array(
    [
        [[0.6, 0.5, 0.3, 0.2], [0.9, 0.5, 0.3, 0.2], [1.0, 0.5, 0.3, 0.2]],
        [[0.2, 0.5, 0.3, 0.2], [0.3, 0.5, 0.3, 0.2], [0.4, 0.5, 0.3, 0.2]]
    ]
)
print(output_np.shape)
target_np = np.array([[0, 1, 2],
                      [3, 0, 1]],
                     dtype=np.int32)
print(target_np.shape)
output = tf.convert_to_tensor(output_np, np.float32)
target = tf.convert_to_tensor(target_np, np.int32)

cost = sequence_loss(output,
                     target,
                     tf.ones_like(target, dtype=np.float32))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    cost_r = sess.run(cost)
    print(cost_r)

这个代码作用和下面的tf.reduce_mean(softmax_cross_entropy_with_logits)作用一致.

# coding:utf-8
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division

import tensorflow as tf
import numpy as np


def to_onehot(a):
    max_index = np.max(a)
    b = np.zeros((a.shape[0], max_index + 1))
    b[np.arange(a.shape[0]), a] = 1
    return b

logits_ph = tf.placeholder(tf.float32, shape=(None, None))
labels_ph = tf.placeholder(tf.float32, shape=(None, None))
output_np = np.array([
    [0.6, 0.5, 0.3, 0.2],
    [0.9, 0.5, 0.3, 0.2],
    [1.0, 0.5, 0.3, 0.2],
    [0.2, 0.5, 0.3, 0.2],
    [0.3, 0.5, 0.3, 0.2],
    [0.4, 0.5, 0.3, 0.2]])


cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels_ph, logits=logits_ph))
target_np = np.array([0, 1, 2, 3, 0, 1])
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    cost_r = sess.run(cost, feed_dict={logits_ph: output_np, labels_ph: to_onehot(target_np)})
    print(cost_r)

再取交叉熵,再取平均.