Theano 学习三 conv2d

在之前的博文基于theano的深度卷积神经网络中使用了theano.tensor.nnet.conv.conv2d函数来计算神经网络的卷积。

计算过程如下图所示。

尽管下面的代码也能实现相应的功能，但是速度慢了很多。

def conv(self,a, v, full=0):  # valid:0  full:1
        ah, aw = np.shape(a)
        vh, vw = np.shape(v)
        if full:
            temp = np.zeros((ah + 2 * vh - 2, aw + 2 * vw - 2))
            temp[vh - 1:vh - 1 + ah, vw - 1:vw - 1 + aw] = a
            a = temp
            ah, aw = np.shape(a)
        k = [[np.sum(np.multiply(a[i:i+vh,j:j+vw],v))
              for j in range(aw - vw + 1)] for i in range(ah - vh + 1)]
        return k

下面简单使用一下theano.tensor.nnet.conv.conv2d。

输入层为2*1*4*4，卷积核为2*1*2*2，得到的结果是2*2*3*3。每个4*4的输入各与两个卷积核运算，每次输入1个，得到2*2*3*3的输出。

filter_shape=(2,1,2,2),image_shape=(2,1,4,4)为可选参数，此处使用结果也一样。

当输入层第二个参数n不为1时，卷积核第二个参数需要等于n，每次输入n个，结果为分别卷积之和。

在进行上图所示卷积运算之前，卷积核先要旋转180度。

比如，对于结果第一行的 [ 0. 0.1 0.3 ] 计算方式是：

用[[ 0.3 , 0.2 ] 分别点乘 [[ 0 , 0 ] 、 [[ 0 , 0 ] 和 [[ 0 , 0 ] 。

[ 0.1 , 0 ]] [ 0 , 0.1]] [ 0.1, 0.3 ]] [ 0.3 , 0 ]]

import numpy as np
import theano
from theano.tensor.nnet import conv
import theano.tensor as T
inputs = T.tensor4(name='input', dtype='float64')

a=np.array(range(8))
a=a*0.1

a=np.reshape(a,(2,1,2,2))
W = theano.shared(
    np.asarray(
        a,
        dtype=inputs.dtype),
    name='W')

conv_out = conv.conv2d(inputs, W,filter_shape=(2,1,2,2),image_shape=(2,1,4,4))

f = theano.function([inputs], conv_out)
x = np.asarray([
    [[[0, 0, 0, 0], [0, 1, 3, 0], [0, 2, 2, 0], [0, 0, 0, 0]]],
     [[[0, 0, 0, 0], [0, 2, 1, 0], [0, 1, 1, 0], [0, 0, 0, 0]]]
], dtype='float64')
y= f(x)

print(y)  # 2,2,3,3
"""
[[[[ 0.   0.1  0.3]
   [ 0.2  1.1  1.1]
   [ 0.4  1.   0.6]]

  [[ 0.4  1.7  1.5]
   [ 1.4  4.3  3.1]
   [ 1.2  2.6  1.4]]]


 [[[ 0.   0.2  0.1]
   [ 0.4  0.9  0.4]
   [ 0.2  0.5  0.3]]

  [[ 0.8  1.4  0.5]
   [ 1.6  2.9  1.2]
   [ 0.6  1.3  0.7]]]]
"""

在源码中调用了scipy.signal.sigtool._concolve2d。

scipy.signal.sigtool.concolve2d与numpy.convolve都能实现，下面是简单的示例。

concolve2d对单个输入与卷积核的运算，在valid模式下得到的结果和theano中一样。

convolve将除了首尾两行外的其余列两两相加得到的结果与convolve2d一样。

convolve计算比较简单，如 np.convolve([1 , 2 , 3 ], [ 2 ,3 ,4 ])得到[ 2 7 16 17 12]，等于[2,3,4]分别乘[1,2,3]并错位相加的结果。

import scipy.signal.signaltools as sg
import numpy as np

a = [[0, 0, 0, 0], [0, 1, 3, 0], [0, 2, 2, 0], [0, 0, 0, 0]]
b = [[0, 0.1], [0.2, 0.3]]
e = sg.convolve2d(a, b)

print e
"""
[[ 0.   0.   0.   0.   0. ]
 [ 0.   0.   0.1  0.3  0. ]
 [ 0.   0.2  1.1  1.1  0. ]
 [ 0.   0.4  1.   0.6  0. ]
 [ 0.   0.   0.   0.   0. ]]
"""

ne=[np.convolve(ai,bi) for ai in a for bi in b]
for ni in ne:
    print ni

"""
[ 0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.]
[ 0.   0.   0.1  0.3  0. ]
[ 0.   0.2  0.9  0.9  0. ]
[ 0.   0.   0.2  0.2  0. ]
[ 0.   0.4  1.   0.6  0. ]
[ 0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.]
"""

print sg.convolve2d(a, b, 'valid')
"""
[[ 0.   0.1  0.3]
 [ 0.2  1.1  1.1]
 [ 0.4  1.   0.6]]
 """

自己写一个简单的，速度差别很大。

import scipy.signal.signaltools as sg
import numpy as np
import timeit

a = [[1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4]]
b = [[1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4]]

def conv1d(a,b):
    m=len(a)
    n=len(b)
    t = m + n - 1
    r = [0] * t
    for i in range(t):
        for j in range(min(m,i+1)):
            if (i - j < n):
                r[i] += a[j] * b[i-j]
    return r

def npconv():
    np.convolve(a[0],b[0])

def sgconv():
    sg.convolve2d(a, b)

def newconv():
    conv1d(a[0], b[0])

if __name__=='__main__':
    print sg.convolve2d(a, b)
    print np.convolve(a[0], b[0])
    print conv1d(a[0], b[0])

    print timeit.timeit('sgconv()',setup='from __main__ import sgconv',number=10000)
    print timeit.timeit('npconv()',setup='from __main__ import npconv',number=10000)
    print timeit.timeit('newconv()',setup='from __main__ import newconv',number=10000)

"""
[[  1   4  10  20  27  32  36  40  53  60  62  60  79  88  88  80 103 108
 60  77  80  68  40  51  52  42  20  25  24  16]]
[  1   4  10  20  27  32  36  40  53  60  62  60  79  88  88  80 103 108
 60  77  80  68  40  51  52  42  20  25  24  16]
[1, 4, 10, 20, 27, 32, 36, 40, 53, 60, 62, 60, 79, 88, 88, 80, 103, 108, 94, 60, 77, 80, 68, 40, 51, 52, 42, 20, 25, 24, 16]
0.163667983502
0.0639453593833
0.62897722497

"""

附scipy.signal.signaltools.convolve2d源码。

def convolve2d(in1, in2, mode='full', boundary='fill', fillvalue=0):
    ......
    out = sigtools._convolve2d(in1, in2, 1, val, bval, fillvalue)
    ......
    return out

static PyObject *sigtools_convolve2d(PyObject *NPY_UNUSED(dummy), PyObject *args) {
......
 ret = pylab_convolve_2d (PyArray_DATA(ain1),      /* Input data Ns[0] x Ns[1] */
                     PyArray_STRIDES(ain1),   /* Input strides */
                     PyArray_DATA(aout),      /* Output data */
                     PyArray_STRIDES(aout),   /* Ouput strides */
                     PyArray_DATA(ain2),      /* coefficients in filter */
                     PyArray_STRIDES(ain2),   /* coefficients strides */ 
                     PyArray_DIMS(ain2),      /* Size of kernel Nwin[2] */
                 PyArray_DIMS(ain1),      /* Size of image Ns[0] x Ns[1] */
                     flag,                    /* convolution parameters */
                     PyArray_DATA(newfill));  /* fill value */
......
}

int pylab_convolve_2d (char  *in,        /* Input data Ns[0] x Ns[1] */
               intp   *instr,     /* Input strides */
               char  *out,       /* Output data */
               intp   *outstr,    /* Ouput strides */
               char  *hvals,     /* coefficients in filter */
               intp   *hstr,      /* coefficients strides */ 
               intp   *Nwin,     /* Size of kernel Nwin[0] x Nwin[1] */
               intp   *Ns,        /* Size of image Ns[0] x Ns[1] */
               int   flag,       /* convolution parameters */
               char  *fillvalue) /* fill value */
{
  int bounds_pad_flag = 0;
  int m, n, j, ind0, ind1;
  int Os[2];
  int new_m, new_n, ind0_memory=0;
  int boundary, outsize, convolve, type_num, type_size;
  char ** indices;
  OneMultAddFunction *mult_and_add;

  boundary = flag & BOUNDARY_MASK;  /* flag can be fill, reflecting, circular */
  outsize = flag & OUTSIZE_MASK;
  convolve = flag & FLIP_MASK;
  type_num = (flag & TYPE_MASK) >> TYPE_SHIFT;
  /*type_size*/

  mult_and_add = OneMultAdd[type_num];
  if (mult_and_add == NULL) return -5;  /* Not available for this type */

  if (type_num < 0 || type_num > MAXTYPES) return -4;  /* Invalid type */
  type_size = elsizes[type_num];

  if (outsize == FULL) {Os[0] = Ns[0]+Nwin[0]-1; Os[1] = Ns[1]+Nwin[1]-1;}
  else if (outsize == SAME) {Os[0] = Ns[0]; Os[1] = Ns[1];}
  else if (outsize == VALID) {Os[0] = Ns[0]-Nwin[0]+1; Os[1] = Ns[1]-Nwin[1]+1;}
  else return -1; /* Invalid output flag */
  
  if ((boundary != PAD) && (boundary != REFLECT) && (boundary != CIRCULAR))
    return -2; /* Invalid boundary flag */

  indices = malloc(Nwin[1] * sizeof(indices[0]));
  if (indices == NULL) return -3; /* No memory */

  /* Speed this up by not doing any if statements in the for loop.  Need 3*3*2=18 different
     loops executed for different conditions */

  for (m=0; m < Os[0]; m++) {
    /* Reposition index into input image based on requested output size */
    if (outsize == FULL) new_m = convolve ? m : (m-Nwin[0]+1);
    else if (outsize == SAME) new_m = convolve ? (m+((Nwin[0]-1)>>1)) : (m-((Nwin[0]-1) >> 1));
    else new_m = convolve ? (m+Nwin[0]-1) : m; /* VALID */

    for (n=0; n < Os[1]; n++) {  /* loop over columns */
      char * sum = out+m*outstr[0]+n*outstr[1];
      memset(sum, 0, type_size); /* sum = 0.0; */

      if (outsize == FULL) new_n = convolve ? n : (n-Nwin[1]+1);
      else if (outsize == SAME) new_n = convolve ? (n+((Nwin[1]-1)>>1)) : (n-((Nwin[1]-1) >> 1));
      else new_n = convolve ? (n+Nwin[1]-1) : n;

      /* Sum over kernel, if index into image is out of bounds
     handle it according to boundary flag */
      for (j=0; j < Nwin[0]; j++) {
    ind0 = convolve ? (new_m-j): (new_m+j);
    bounds_pad_flag = 0;

    if (ind0 < 0) {
      if (boundary == REFLECT) ind0 = -1-ind0;
      else if (boundary == CIRCULAR) ind0 = Ns[0] + ind0;
      else bounds_pad_flag = 1;
    }
    else if (ind0 >= Ns[0]) {
      if (boundary == REFLECT) ind0 = Ns[0]+Ns[0]-1-ind0;
      else if (boundary == CIRCULAR) ind0 = ind0 - Ns[0];
      else bounds_pad_flag = 1;
    }
    
    if (!bounds_pad_flag) ind0_memory = ind0*instr[0];

        if (bounds_pad_flag) {
          intp k;
          for (k=0; k < Nwin[1]; k++) {
              indices[k] = fillvalue;
          }
        }
        else  {
          intp k;
      for (k=0; k < Nwin[1]; k++) {
        ind1 = convolve ? (new_n-k) : (new_n+k);
        if (ind1 < 0) {
          if (boundary == REFLECT) ind1 = -1-ind1;
          else if (boundary == CIRCULAR) ind1 = Ns[1] + ind1;
          else bounds_pad_flag = 1;
        }
        else if (ind1 >= Ns[1]) {
          if (boundary == REFLECT) ind1 = Ns[1]+Ns[1]-1-ind1;
          else if (boundary == CIRCULAR) ind1 = ind1 - Ns[1];
          else bounds_pad_flag = 1;
        }

        if (bounds_pad_flag) {
              indices[k] = fillvalue;
            }
        else {
              indices[k] = in+ind0_memory+ind1*instr[1];
            }
        bounds_pad_flag = 0;
      }
        }
        mult_and_add(sum, hvals+j*hstr[0], hstr[1], indices, Nwin[1]);
      }
    }
  }
  free(indices);
  return 0;
}