神经网络5：循环神经网络

▶ 循环神经网络

● 代码，参考【https://zybuluo.com/hanbingtao/note/541458】。主要实现了一个单层循环神经网络类 RecurrentLayer

  1 from functools import reduce
  2 import numpy as np
  3 
  4 global_epsilon = 10e-4
  5 
  6 class ReluActivator(object):                                    # ReLU 激活函数
  7     def forward(self, x):        
  8         return max(0, x)
  9 
 10     def backward(self, x):
 11         return int(x > 0)
 12 
 13 class IdentityActivator(object):                                # 激活函数
 14     def forward(self, x):
 15         return x         
 16 
 17     def backward(self, x):
 18         return 1
 19 
 20 def myMap(array, op):                                           # 对numpy数组进行element wise操作
 21     for i in np.nditer(array, op_flags=['readwrite']):
 22         i[...] = op(i)
 23 
 24 class RecurrentLayer(object):
 25     def __init__(self, sCol, dCol, activator, ita):
 26         self.sCol = sCol
 27         self.dCol = dCol
 28         self.activator = activator
 29         self.ita = ita
 30         self.time = 0       
 31         self.stateTable = []                                    # 状态表，每行表示依次迭代的所有神经元的状态值
 32         self.stateTable.append(np.zeros((dCol, 1)))           
 33         self.U = np.random.uniform(-1e-4, 1e-4,(dCol, sCol))    # 初始化 U
 34         self.W = np.random.uniform(-1e-4, 1e-4,(dCol, dCol))    # 初始化 W
 35 
 36     def print(self):
 37         print("inputSize = %d, outputSize = %d, ita = %f"%(self.sCol,self.dCol,self.ita))
 38         print("stateTable = 
", self.stateTable)
 39         print("U = 
", self.U)
 40         print("W = 
", self.W)
 41 
 42     def forward(self, sArray):                                  # 前向计算        
 43         self.time += 1
 44         state = (np.dot(self.U, sArray) + np.dot(self.W, self.stateTable[-1]))
 45         myMap(state, self.activator.forward)
 46         self.stateTable.append(state)
 47 
 48     def backward(self, deltaArrayNextLayer, activator):    
 49         self.bpDelta(deltaArrayNextLayer, activator)
 50         self.bpGrad()
 51 
 52     def update(self):
 53         self.W -= self.ita * self.grad
 54 
 55     def bpDelta(self, deltaArrayNextLayer, activator):
 56         self.deltaTable = []                                    # 用来保存各个时刻的误差项
 57         for i in range(self.time):
 58             self.deltaTable.append(np.zeros((self.dCol, 1)))
 59         self.deltaTable.append(deltaArrayNextLayer)        
 60         for k in range(self.time - 1, 0, -1):                   # 迭代计算每个时刻的误差项
 61             state = self.stateTable[k+1].copy()                 # 根据k+1时刻的delta计算k时刻的delta
 62             myMap(self.stateTable[k+1], activator.backward)
 63             self.deltaTable[k] = np.dot(np.dot(self.deltaTable[k+1].T, self.W), np.diag(state[:,0])).T
 64 
 65     def bpGrad(self):
 66         self.gradTable = []                                     # 保存各个时刻的权重梯度
 67         for t in range(self.time + 1):
 68             self.gradTable.append(np.zeros((self.dCol, self.dCol)))
 69         for t in range(self.time, 0, -1):            
 70             self.gradTable[t] = np.dot(self.deltaTable[t], self.stateTable[t-1].T)
 71         self.grad = reduce(lambda a, b: a + b, self.gradTable, self.gradTable[0]) # 各个时刻梯度之和 gradTable[0] 初始为零阵
 72             
 73     def reset_state(self):
 74         self.time = 0                                           # 当前时刻初始化为t0
 75         self.stateTable = []                                    # 状态表，每行表示依次迭代的所有神经元的状态值
 76         self.stateTable.append(np.zeros((self.dCol, 1)))      
 77 
 78 def createTestData():
 79     sArrayTest = [np.array([[1], [2], [3]]), np.array([[2], [3], [4]])]
 80     dArrayTest = np.array([[1], [2]])
 81     return sArrayTest, dArrayTest
 82 
 83 def test():
 84     recurrentLayer = RecurrentLayer(3, 2, ReluActivator(), 1e-3)
 85     sArrayTest, dArrayTest = createTestData()
 86     recurrentLayer.forward(sArrayTest[0])
 87     recurrentLayer.forward(sArrayTest[1])
 88     recurrentLayer.backward(dArrayTest, ReluActivator())
 89     recurrentLayer.print()
 90 
 91 def gradCheck():                                                # 梯度检查
 92     rl = RecurrentLayer(3, 2, IdentityActivator(), 1e-3)
 93     sArrayTest, dArrayTest = createTestData()
 94     rl.forward(sArrayTest[0])
 95     rl.forward(sArrayTest[1])            
 96     rl.backward(np.ones(rl.stateTable[-1].shape, dtype=np.float64), IdentityActivator())    # 以全1阵为初始 deltaNextLayer 作反向计算           
 97     for i in range(rl.W.shape[0]):
 98         for j in range(rl.W.shape[1]):
 99             rl.W[i,j] += global_epsilon
100             rl.reset_state()
101             rl.forward(sArrayTest[0])
102             rl.forward(sArrayTest[1])
103             err1 = np.sum(rl.stateTable[-1])
104             rl.W[i,j] -= 2*global_epsilon
105             rl.reset_state()
106             rl.forward(sArrayTest[0])
107             rl.forward(sArrayTest[1])
108             err2 = np.sum(rl.stateTable[-1])
109             expect_grad = (err1 - err2) / (2 * global_epsilon)
110             rl.W[i,j] += global_epsilon
111             print('w(%d,%d): expected %f - actural %f' % (i, j, expect_grad, rl.grad[i,j]))
112 
113 if __name__ == "__main__":
114     test()
115     gradCheck()

● 输出结果

inputSize = 3, outputSize = 2, ita = 0.001000
stateTable =
 [array([[0.],
       [0.]]), array([[8.47915158e-05],
       [0.00000000e+00]]), array([[1.],
       [0.]])]
U =
 [[ 8.40480937e-05 -6.87332018e-05  4.60699419e-05]
 [ 6.21850646e-06 -8.85631147e-05 -5.18937760e-05]]
W =
 [[ 5.08985344e-06 -2.33817326e-05]
 [ 8.48161836e-06  4.77010103e-05]]
w(0,0): expected -0.000272 - actural -0.000272
w(0,1): expected -0.000151 - actural -0.000151
w(1,0): expected -0.000272 - actural -0.000272
w(1,1): expected -0.000151 - actural -0.000151