『科学计算』图像检测微型demo

这里是课上老师给出的一个示例程序，演示图像检测的过程，本来以为是传统的滑窗检测，但实际上引入了selectivesearch来选择候选窗，所以看思路应该是RCNN的范畴，蛮有意思的，由于老师的注释写的蛮好的，我基本就不画蛇添足了，这里记录下来，为加深理解cs231n的课程做个铺垫。，所以做个储备，实在不行还有开学不是么233

# coding: utf-8
#copyRight by heibanke 
#如需转载请注明出处
#<<用Python做深度学习1-数学基础>>
#http://study.163.com/course/courseMain.htm?courseId=1050010

import numpy as np
# 这里nnet是课程作业里实现的一个模块，参考资料里也会附上我的版本。大家也可以用自己做的版本。
from nnet.layers import FCLayer,Activation,SoftMaxCostLayer
from nnet.neuralnetwork import neuralnetwork
from nnet.helpers import one_hot
# MNIST数据不再上传了，相信大家学到这里，这个数据应该都有一份，复制到文件夹内即可
import load_MNIST

# 需要安装selectivesearch，pip install selectivesearch
import selectivesearch
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches

# 需要安装opencv2
import cv2
%matplotlib inline

1. 用MNIST数据库训练分类器模型

这一步是我们之前课程里的重点，这里选用两层全连接神经网络模型进行训练。数据库的数据预测率能达到97%，大家可以根据自己喜好选择不同的模型试一下。

def get_model():
    train_X,train_y,test_X,test_y = load_MNIST.get_data()
    n_classes = np.unique(train_y).size

    w_decay = 0.0001

    nn = neuralnetwork(
        layers=[
            FCLayer(
                n_out=128,
                weight_decay = w_decay,
            ),
            Activation('sigmoid'),  
            FCLayer(
                n_out=n_classes,
                weight_decay = w_decay,
            ),
            Activation('softmax'),        
        ],
        cost = SoftMaxCostLayer(),

    )

    X = train_X.reshape(train_X.shape[0],28*28)
    Y_one_hot = one_hot(train_y)
    nn._setup(X, Y_one_hot)

    # Train neural network
    print('Training neural network')
    nn.train(X, train_y, learning_rate=1.0, max_epochs=8, batch_size=128)

    # Evaluate on training data
    error = nn.error(test_X.reshape(test_X.shape[0],28*28), test_y)
    print('Test error rate: %.4f' % error)

    return nn

nn=get_model()

2.读入待测图片，并在待测试图片上用selective search算法获得物体窗口

待测图片是我自己在Photoshop上手写的数字，几个数字在一张图片上，不同大小，不同位置。

img = cv2.imread("test1.jpg")
img_lbl, regions = selectivesearch.selective_search(
    img, scale=500, sigma=0.9, min_size=20)

print regions[0]
print len(regions)

{'labels': [0.0], 'rect': (0, 0, 511, 511), 'size': 243048} 49

# 接下来我们把窗口和图像打印出来，对它有个直观认识
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img)

for reg in regions:
    x, y, w, h = reg['rect']
    rect = mpatches.Rectangle(
        (x, y), w, h, fill=False, edgecolor='red', linewidth=1)
    ax.add_patch(rect)

plt.show()

3.定义规则来筛选窗口

candidates = []
for r in regions:
    # 重复的不要
    if r['rect'] in candidates:
        continue
    # 太小和太大的不要
    if r['size'] < 200 or r['size']>20000:
        continue

    x, y, w, h = r['rect']
    # 太不方的不要
    if w / h > 1.2 or h / w > 1.2:
        continue
    candidates.append((x,y,w,h))

print len(candidates)    
# 这一步的序号是事先设定好的，真正实现时不这样做，肯定会有多的窗口需要你以后来筛选。
candidates_re = [candidates[i] for i in [0,4,7,9,11]]

print u"最终筛选后的窗口是:",candidates_re

fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img)
for x, y, w, h in candidates_re:
    rect = mpatches.Rectangle(
        (x, y), w, h, fill=False, edgecolor='red', linewidth=1)
    ax.add_patch(rect)

plt.show()

最终筛选后的窗口是: [(47, 31, 65, 89), (335, 124, 84, 116), (127, 230, 65, 90), (343, 375, 41, 50), (183, 399, 73, 81)]

4.对窗口内图片进行处理，大小resize，转换灰度图，最终转换成为784的输入向量

img_sample = np.zeros((len(candidates_re),784))
i=0

for rect in candidates_re:
    x,y,w,h = rect
    if w>h:
        largewh = w
    else:
        largewh = h
    bord_size = int(largewh*0.2)
    img_cut = img[y-bord_size:y+largewh+bord_size,x-bord_size:x+largewh+bord_size,:]
    img_resize = cv2.resize(img_cut,(28,28),interpolation=cv2.INTER_NEAREST)
    gray=cv2.cvtColor(img_resize, cv2.COLOR_BGR2GRAY)
    img_sample[i,:]=gray.ravel()
    i+=1
    
# 把转换后的数据用图来显示
img_s=np.zeros((28,28*img_sample.shape[0]))
for i in xrange(img_sample.shape[0]):
    img_s[:,i*28:28*(i+1)]=img_sample[i,:].reshape(28,28)

fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
ax.imshow(img_s,cmap='gray')
plt.show()

5.用训练好的模型对处理后的图片进行预测

label = nn.predict(img_sample/255)
print u"每个窗口的预测值为:",label

每个窗口的预测值为: [8 5 3 5 0]

[注]，检测失败了一个。