神经网络4：卷积神经网络

▶ 卷积神经网络

● 代码，参考【https://www.zybuluo.com/hanbingtao/note/485480】。主要实现了卷积层神经 ConvLayer 和池化层神经 MaxPoolLayer。

  1 import numpy as np
  2 
  3 globalIta = 0.05
  4 globalEpsilon = 1e-3
  5 
  6 class ReluActivator(object):                                            # ReLU 激活函数
  7     def forward(self, x):        
  8         return max(0, x)
  9 
 10     def backward(self, x):
 11         return int(x > 0)
 12 
 13 class IdentityActivator(object):                                        # 激活函数
 14     def forward(self, x):
 15         return x         
 16 
 17     def backward(self, x):
 18         return 1
 19 
 20 def subArray(sArray, i, j, fRow, fCol, stride):                         # 获取卷积区域
 21     startI = i * stride
 22     startJ = j * stride
 23     if sArray.ndim == 2:
 24         return sArray[ startI : startI + fRow, startJ : startJ + fCol]    
 25     return sArray[:, startI : startI + fRow, startJ : startJ + fCol]
 26        
 27 def get_max_index(array):                                               # 获取一个2D区域的最大值所在的索引
 28     maxI = 0
 29     maxJ = 0
 30     max_value = array[0,0]
 31     for i in range(array.shape[0]):
 32         for j in range(array.shape[1]):
 33             if array[i,j] > max_value:                
 34                 maxI, maxJ = i, j
 35                 max_value = array[i,j]
 36     return maxI, maxJ
 37 
 38 def conv(sArray, fArray, dArray, stride, fBias):                        # 计算卷积
 39     dRow, dCol = dArray.shape
 40     fRow, fCol = fArray.shape[:2]    
 41     for i in range(dRow):
 42         for j in range(dCol):
 43             dArray[i][j] = np.sum(subArray(sArray, i, j, fRow, fCol, stride) * fArray) + fBias
 44 
 45 def padding(sArray, zpRow, zpCol = -1):                                 # 为数组增加 Zero padding
 46     if zpCol < 0:
 47         zpCol = zpRow
 48     
 49     if sArray.ndim == 2:        
 50         sRow, sCol = sArray.shape
 51         dArray = np.zeros((sRow + 2 * zpRow, sCol + 2 * zpCol))
 52         dArray[zpRow : zpRow + sRow, zpCol : zpCol + sCol] = sArray
 53         return dArray
 54 
 55     if sArray.ndim == 3:
 56         sPage, sRow, sCol = sArray.shape       
 57         dArray = np.zeros((sPage, sRow + 2 * zpRow, sCol + 2 * zpCol))
 58         dArray[:,zpRow : zpRow + sRow,zpCol : zpCol + sCol] = sArray
 59         return dArray
 60     return sArray        
 61 
 62 def myMap(array, op):                                                   # 对将数组做成迭代器，对每个元素依次操作
 63     for i in np.nditer(array, op_flags=['readwrite']):
 64         i[...] = op(i)
 65 
 66 class Filter(object):                                                   # 卷积窗口类，三个维度
 67     def __init__(self, row, col, page):
 68         self.page = page
 69         self.row = row
 70         self.col = col
 71         self.fArray = np.random.uniform(-1e-4, 1e-4, (self.page, self.row, self.col))
 72         self.fBias = 0
 73         self.fArrayGrad = np.zeros(self.fArray.shape)
 74         self.fBiasGrad = 0
 75 
 76     def __repr__(self):
 77         return 'filter fArray:
%s
bias:
%s' % (repr(self.fArray), repr(self.fBias))
 78 
 79     def update(self, ita):                                              # 使用梯度对窗口权值进行更新
 80         self.fArray -= ita * self.fArrayGrad
 81         self.fBias -= ita * self.fBiasGrad
 82 
 83 class ConvLayer(object):                                                # 单层卷积神经网络层，初始化时规定了输入图像、卷积窗口、输出图像的尺寸，并保存了卷积窗口和输出图像的数据
 84     def __init__(self, sRow, sCol, nChannel, fRow, fCol, nFilter, zeroPad, stride = 1, activator = IdentityActivator(), ita = globalIta):
 85         self.sRow = sRow                                                                       
 86         self.sCol = sCol        
 87         self.nChannel = nChannel        
 88         self.fRow = fRow
 89         self.fCol = fCol
 90         self.nFilter = nFilter
 91         self.zeroPad = zeroPad
 92         self.stride = stride
 93         self.activator = activator
 94         self.ita = ita    
 95         self.filters = [ Filter(self.fRow, self.fCol, self.nChannel) for i in range(self.nFilter) ]
 96         self.dRow = ConvLayer.calculateDSize(self.sRow, fRow, self.zeroPad, self.stride)
 97         self.dCol = ConvLayer.calculateDSize(self.sCol, fCol, self.zeroPad, self.stride)        
 98         self.dArray = np.zeros((self.nFilter, self.dRow, self.dCol))
 99     
100     @staticmethod                                                       # 计算输出图像的大小
101     def calculateDSize(input_size, filter_size, zeroPad, stride):       
102         return (input_size - filter_size + 2 * zeroPad) // stride + 1
103     
104     def forward(self, sArray):                                          # 正向卷积输出        
105         self.sArray = sArray
106         self.sPadArray = padding(sArray, self.zeroPad)
107         for f in range(self.nFilter):
108             filter = self.filters[f]
109             conv(self.sPadArray, filter.fArray, self.dArray[f], self.stride, filter.fBias)
110         myMap(self.dArray, self.activator.forward)
111         
112     def backward(self, sArray, deltaArrayNextLayer, activator):         # 计算误差项和窗口梯度
113         self.forward(sArray)
114         self.bpDeltaAndGrad(deltaArrayNextLayer, activator)
115 
116     def update(self):                                                   # 使用梯度每个窗口
117         for filter in self.filters:
118             filter.update(self.ita)
119 
120     def bpDeltaAndGrad(self, deltaArrayNextLayer, activator):           # 计算传递到上一层的sensitivity map               
121         exArray = self.expandDelta(deltaArrayNextLayer)                                 # 将次层误差项扩张为 stride = 1 的情形        
122         exPAge, exRow, exCol = exArray.shape                                            # 对 exArray 垫边，次层误差项的边缘也会获得残差，但不会向上传递
123         exPadArray = padding(exArray, (self.sRow + self.fRow - 1 - exRow) // 2, (self.sCol + self.fCol - 1 - exCol) // 2)
124         
125         self.deltaArray = np.zeros((self.nChannel,self.sRow, self.sCol))                # 本层 deltaArray
126         for f in range(self.nFilter):                                                   # 依次计算每个窗口
127             filter = self.filters[f]        
128             roteteFArray = np.array(list(map(lambda i: np.rot90(i, 2), filter.fArray))) # 旋转卷积窗口，进行数学意义上的卷积                        
129             temp = np.zeros((self.nChannel,self.sRow, self.sCol))                       # 有多个 filter，则最终误差项等于各窗口误差项之和，需要累加
130             for d in range(self.nChannel):
131                 conv(exPadArray[f], roteteFArray[d], temp[d], 1, 0)                     # 计算误差项
132                 conv(self.sPadArray[d], exArray[f], filter.fArrayGrad[d], 1, 0)         # 计算梯度
133             self.deltaArray += temp
134             filter.fBiasGrad = np.sum(exArray[f])                                       # 计算偏移值
135                 
136         temp = np.array(self.sArray)                                                    # 将误差项结果与激活函数的偏导数相乘
137         myMap(temp, activator.backward)
138         self.deltaArray *= temp            
139 
140     def expandDelta(self, deltaArray):
141         exRow = (self.sRow - self.fRow + 2 * self.zeroPad + 1)          # 计算 stride 恢复到 1 时的 delta 阵大小
142         exCol = (self.sCol - self.fCol + 2 * self.zeroPad + 1)
143                
144         exArray = np.zeros((deltaArray.shape[0], exRow, exCol))         # 构建新的 delta 阵
145         for i in range(self.dRow):
146             for j in range(self.dCol):
147                 exArray[:, i * self.stride, j * self.stride] = deltaArray[:, i, j]
148         return exArray    
149 
150 class MaxPoolLayer(object):                                             # 池化类，初始化时规定了输入图像、卷积窗口、输出图像的尺寸，并保存了输出图像的数据
151     def __init__(self, sRow, sCol, nChannel, fRow, fCol, stride):
152         self.sRow = sRow
153         self.sCol = sCol        
154         self.nChannel = nChannel        
155         self.fRow = fRow
156         self.fCol = fCol
157         self.stride = stride        
158         self.dRow = (sRow - fRow) // self.stride + 1
159         self.dCol = (sCol - fCol) // self.stride + 1
160         self.dArray = np.zeros((self.nChannel,self.dRow, self.dCol))    # 正向池化
161 
162     def forward(self, sArray):
163         for d in range(self.nChannel):
164             for i in range(self.dRow):
165                 for j in range(self.dCol):
166                     self.dArray[d,i,j] = (subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride).max())
167 
168     def backward(self, sArray, deltaArrayNextLayer):                      # 反向池化
169         self.deltaArray = np.zeros(sArray.shape)
170         for d in range(self.nChannel):
171             for i in range(self.dRow):
172                 for j in range(self.dCol):
173                     patch_array = subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride)
174                     nonZeroRow, nonZeroCol = get_max_index(patch_array)
175                     self.deltaArray[d, i * self.stride + nonZeroRow, j * self.stride + nonZeroCol] = deltaArrayNextLayer[d,i,j]
176 
177 def gradCheck():                                                        # 梯度检查
178     sArray, deltaNextLayer, convLayer = createTestDataConv()
179     convLayer.forward(sArray)    
180     deltaArrayNextLayer = np.ones(convLayer.dArray.shape, dtype=np.float64)
181     convLayer.backward(sArray, deltaArrayNextLayer, IdentityActivator())
182     for d in range(convLayer.filters[0].fArrayGrad.shape[0]):
183         for i in range(convLayer.filters[0].fArrayGrad.shape[1]):
184             for j in range(convLayer.filters[0].fArrayGrad.shape[2]):
185                 convLayer.filters[0].fArray[d,i,j] += globalEpsilon
186                 convLayer.forward(sArray)
187                 err1 = np.sum(convLayer.dArray)
188                 convLayer.filters[0].fArray[d,i,j] -= 2*globalEpsilon
189                 convLayer.forward(sArray)
190                 err2 = np.sum(convLayer.dArray)
191                 expect_grad = (err1 - err2) / (2 * globalEpsilon)
192                 convLayer.filters[0].fArray[d,i,j] += globalEpsilon
193                 print('fArray(%d,%d,%d): expected - actural %f - %f' % (d, i, j, expect_grad, convLayer.filters[0].fArrayGrad[d,i,j]))
194 
195 def createTestDataConv():                                               # 生成卷积测试数据
196     a = np.array(
197         [[[0,1,1,0,2],[2,2,2,2,1],[1,0,0,2,0],[0,1,1,0,0],[1,2,0,0,2]],
198          [[1,0,2,2,0],[0,0,0,2,0],[1,2,1,2,1],[1,0,0,0,0],[1,2,1,1,1]],
199          [[2,1,2,0,0],[1,0,0,1,0],[0,2,1,0,1],[0,1,2,2,2],[2,1,0,0,1]]]
200         )
201     b = np.array( [[[0,1,1],[2,2,2],[1,0,0]],[[1,0,2],[0,0,0],[1,2,1]]] )
202     c = ConvLayer(5,5,3,3,3,2,1,2,IdentityActivator(),0.001)
203     c.filters[0].fArray = np.array( [[[-1,1,0],[0,1,0],[0,1,1]],[[-1,-1,0],[0,0,0],[0,-1,0]],[[0,0,-1],[0,1,0],[1,-1,-1]]], dtype=np.float64 )
204     c.filters[0].fBias = 1
205     c.filters[1].fArray = np.array( [[[1,1,-1],[-1,-1,1],[0,-1,1]],[[0,1,0],[-1,0,-1],[-1,1,0]],[[-1,0,0],[-1,0,1],[-1,0,0]]], dtype=np.float64 )
206     c.filters[1].fBias = 0
207     return a, b, c
208 
209 def createTestDataPool():                                               # 生成池化测试数据
210     a = np.array( [[[1,1,2,4],[5,6,7,8],[3,2,1,0],[1,2,3,4]],[[0,1,2,3],[4,5,6,7],[8,9,0,1],[3,4,5,6]]], dtype=np.float64 )
211     b = np.array( [[[1,2],[2,4]],[[3,5],[8,2]]], dtype=np.float64 )
212     c = MaxPoolLayer(4,4,2,2,2,2)
213     return a, b, c
214 
215 def testConv():
216     print("
testConv")
217     sArray, deltsNextLayer, convLayer = createTestDataConv()
218     convLayer.forward(sArray)                                           # 前向卷积
219     print(convLayer.dArray)                    
220     convLayer.backward(sArray, deltsNextLayer, IdentityActivator())         # 后向修正窗口权值
221     convLayer.update()
222     print(convLayer.filters[0])
223     print(convLayer.filters[1])
224        
225 def testPool():
226     print("
testPool")
227     sArray, deltaNextLayer, maxPoolLayer = createTestDataPool()
228     maxPoolLayer.forward(sArray)                                        # 正向池化
229     print('input array:
%s
output array:
%s' % (sArray,maxPoolLayer.dArray))
230     maxPoolLayer.backward(sArray, deltaNextLayer)                       # 反向池化
231     print('input array:
%s
sensitivity array:
%s
delta array:
%s' % (sArray, deltaNextLayer, maxPoolLayer.deltaArray))
232 
233 if __name__ == '__main__':
234     testConv()
235     testPool()
236     gradCheck()

● 输出结果

[[[ 6.  7.  5.]
  [ 3. -1. -1.]
  [ 2. -1.  4.]]

 [[ 2. -5. -8.]
  [ 1. -4. -4.]
  [ 0. -5. -5.]]]
filter weights:
array([[[-1.008,  0.99 , -0.009],
        [-0.005,  0.994, -0.006],
        [-0.006,  0.995,  0.996]],

       [[-1.004, -1.001, -0.004],
        [-0.01 , -0.009, -0.012],
        [-0.002, -1.002, -0.002]],

       [[-0.002, -0.002, -1.003],
        [-0.005,  0.992, -0.005],
        [ 0.993, -1.008, -1.007]]])
bias:
0.991
filter weights:
array([[[ 9.980e-01,  9.980e-01, -1.001e+00],
        [-1.004e+00, -1.007e+00,  9.970e-01],
        [-4.000e-03, -1.004e+00,  9.980e-01]],

       [[ 0.000e+00,  9.990e-01,  0.000e+00],
        [-1.009e+00, -5.000e-03, -1.004e+00],
        [-1.004e+00,  1.000e+00,  0.000e+00]],

       [[-1.004e+00, -6.000e-03, -5.000e-03],
        [-1.002e+00, -5.000e-03,  9.980e-01],
        [-1.002e+00, -1.000e-03,  0.000e+00]]])
bias:
-0.007
input array:
[[[1. 1. 2. 4.]
  [5. 6. 7. 8.]
  [3. 2. 1. 0.]
  [1. 2. 3. 4.]]

 [[0. 1. 2. 3.]
  [4. 5. 6. 7.]
  [8. 9. 0. 1.]
  [3. 4. 5. 6.]]]
output array:
[[[6. 8.]
  [3. 4.]]

 [[5. 7.]
  [9. 6.]]]
input array:
[[[1. 1. 2. 4.]
  [5. 6. 7. 8.]
  [3. 2. 1. 0.]
  [1. 2. 3. 4.]]

 [[0. 1. 2. 3.]
  [4. 5. 6. 7.]
  [8. 9. 0. 1.]
  [3. 4. 5. 6.]]]
sensitivity array:
[[[1. 2.]
  [2. 4.]]

 [[3. 5.]
  [8. 2.]]]
delta array:
[[[0. 0. 0. 0.]
  [0. 1. 0. 2.]
  [2. 0. 0. 0.]
  [0. 0. 0. 4.]]

 [[0. 0. 0. 0.]
  [0. 3. 0. 5.]
  [0. 8. 0. 0.]
  [0. 0. 0. 2.]]]
weights(0,0,0): expected - actural 5.000000 - 5.000000
weights(0,0,1): expected - actural 6.000000 - 6.000000
weights(0,0,2): expected - actural 5.000000 - 5.000000
weights(0,1,0): expected - actural 5.000000 - 5.000000
weights(0,1,1): expected - actural 7.000000 - 7.000000
weights(0,1,2): expected - actural 5.000000 - 5.000000
weights(0,2,0): expected - actural 5.000000 - 5.000000
weights(0,2,1): expected - actural 6.000000 - 6.000000
weights(0,2,2): expected - actural 5.000000 - 5.000000
weights(1,0,0): expected - actural 2.000000 - 2.000000
weights(1,0,1): expected - actural 1.000000 - 1.000000
weights(1,0,2): expected - actural 2.000000 - 2.000000
weights(1,1,0): expected - actural 9.000000 - 9.000000
weights(1,1,1): expected - actural 9.000000 - 9.000000
weights(1,1,2): expected - actural 9.000000 - 9.000000
weights(1,2,0): expected - actural 2.000000 - 2.000000
weights(1,2,1): expected - actural 1.000000 - 1.000000
weights(1,2,2): expected - actural 2.000000 - 2.000000
weights(2,0,0): expected - actural 4.000000 - 4.000000
weights(2,0,1): expected - actural 5.000000 - 5.000000
weights(2,0,2): expected - actural 4.000000 - 4.000000
weights(2,1,0): expected - actural 4.000000 - 4.000000
weights(2,1,1): expected - actural 9.000000 - 9.000000
weights(2,1,2): expected - actural 4.000000 - 4.000000
weights(2,2,0): expected - actural 4.000000 - 4.000000
weights(2,2,1): expected - actural 5.000000 - 5.000000
weights(2,2,2): expected - actural 4.000000 - 4.000000