EasyPR源码剖析（9）：字符识别

在上一篇文章的介绍中，我们已经通过相应的字符分割方法，将车牌区域进行分割，得到7个分割字符图块，接下来要做的就是将字符图块放入训练好的神经网络模型，通过模型来预测每个图块所表示的具体字符。神经网络的介绍和训练过程我们将在下一节中具体介绍，本节主要介绍字符特征的提取，和如何通过训练好的神经网络模型来进行字符的识别。

字符识别主要是通过类CharsIdentify 来进行，对于中文字符和非中文字符，分别采取了不同的策略，训练得到的ANN模型也不一样，中文字符的识别主要使用 identifyChinese 来处理，非中文字符的识别主要采用 identify 来处理。另外，类CharsIdentify采用了单例模式，具体的初始化代码和构造函数如下：

 1   CharsIdentify* CharsIdentify::instance_ = nullptr;
 2 
 3   CharsIdentify* CharsIdentify::instance() {
 4     if (!instance_) {
 5       instance_ = new CharsIdentify;
 6     }
 7     return instance_;
 8   }
 9 
10   CharsIdentify::CharsIdentify() {
11     ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(kDefaultAnnPath);
12     annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(kChineseAnnPath);
13     kv_ = std::shared_ptr<Kv>(new Kv);
14     kv_->load("etc/province_mapping");
15   }
16 
17   void CharsIdentify::LoadModel(std::string path) {
18     if (path != std::string(kDefaultAnnPath)) {
19 
20       if (!ann_->empty())
21         ann_->clear();
22 
23       ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
24     }
25   }
26 
27   void CharsIdentify::LoadChineseModel(std::string path) {
28     if (path != std::string(kChineseAnnPath)) {
29 
30       if (!annChinese_->empty())
31         annChinese_->clear();
32 
33       annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
34     }
35   }

View Code

这边单例模式只考虑了单线程情况，对于多线程的话，需要加入双重锁定。此处处理中文字符和非中文字符，分别加载了不同的ANN模型文件，ANN模型通过opencv 中机器学习中自带的神经网络模型 ml::ANN_MLP 来实现。

字符特征获取

通过神经网络对字符图块进行判别，首先需要获取字符图块的特征，字符特征的获取，主要通过 charFeatures 函数来实现。具体的函数代码如下所示：

 1 Mat charFeatures(Mat in, int sizeData) {
 2   const int VERTICAL = 0;
 3   const int HORIZONTAL = 1;
 4 
 5   // cut the cetner, will afect 5% perices.
 6   Rect _rect = GetCenterRect(in);
 7   Mat tmpIn = CutTheRect(in, _rect);
 8   //Mat tmpIn = in.clone();
 9 
10   // Low data feature
11   Mat lowData;
12   resize(tmpIn, lowData, Size(sizeData, sizeData));
13 
14   // Histogram features
15   Mat vhist = ProjectedHistogram(lowData, VERTICAL);
16   Mat hhist = ProjectedHistogram(lowData, HORIZONTAL);
17 
18   // Last 10 is the number of moments components
19   int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;
20 
21   Mat out = Mat::zeros(1, numCols, CV_32F);
22   // Asign values to
23 
24   int j = 0;
25   for (int i = 0; i < vhist.cols; i++) {
26     out.at<float>(j) = vhist.at<float>(i);
27     j++;
28   }
29   for (int i = 0; i < hhist.cols; i++) {
30     out.at<float>(j) = hhist.at<float>(i);
31     j++;
32   }
33   for (int x = 0; x < lowData.cols; x++) {
34     for (int y = 0; y < lowData.rows; y++) {
35       out.at<float>(j) += (float)lowData.at <unsigned char>(x, y);
36       j++;
37     }
38   }
39 
40   //std::cout << out << std::endl;
41 
42   return out;
43 }

View Code

对于中文字符和英文字符，默认的图块大小是不一样的，中文字符默认是 20*20，非中文默认是10*10。

GetCenterRect 函数主要用于获取字符的边框，分别查找从四个角落查找字符的位置；
CutTheRect 函数裁剪原图，即将字符移动到图像的中间位置，通过这一步的操作，可将字符识别的准确率提高5%左右；
ProjectedHistogram 函数用于获取归一化序列，归一化到0-1区间范围内；

GetCenterRect 函数具体代码如下：

 1 Rect GetCenterRect(Mat &in) {
 2   Rect _rect;
 3 
 4   int top = 0;
 5   int bottom = in.rows - 1;
 6 
 7   // find the center rect
 8 
 9   for (int i = 0; i < in.rows; ++i) {
10     bool bFind = false;
11     for (int j = 0; j < in.cols; ++j) {
12       if (in.data[i * in.step[0] + j] > 20) {
13         top = i;
14         bFind = true;
15         break;
16       }
17     }
18     if (bFind) {
19       break;
20     }
21 
22   }
23   for (int i = in.rows - 1;
24   i >= 0;
25   --i) {
26     bool bFind = false;
27     for (int j = 0; j < in.cols; ++j) {
28       if (in.data[i * in.step[0] + j] > 20) {
29         bottom = i;
30         bFind = true;
31         break;
32       }
33     }
34     if (bFind) {
35       break;
36     }
37 
38   }
39 
40 
41   int left = 0;
42   int right = in.cols - 1;
43   for (int j = 0; j < in.cols; ++j) {
44     bool bFind = false;
45     for (int i = 0; i < in.rows; ++i) {
46       if (in.data[i * in.step[0] + j] > 20) {
47         left = j;
48         bFind = true;
49         break;
50       }
51     }
52     if (bFind) {
53       break;
54     }
55 
56   }
57   for (int j = in.cols - 1;
58   j >= 0;
59   --j) {
60     bool bFind = false;
61     for (int i = 0; i < in.rows; ++i) {
62       if (in.data[i * in.step[0] + j] > 20) {
63         right = j;
64         bFind = true;
65 
66         break;
67       }
68     }
69     if (bFind) {
70       break;
71     }
72   }
73 
74   _rect.x = left;
75   _rect.y = top;
76   _rect.width = right - left + 1;
77   _rect.height = bottom - top + 1;
78 
79   return _rect;
80 }

View Code

CutTheRect 函数具体代码如下：

 1 Mat CutTheRect(Mat &in, Rect &rect) {
 2   int size = in.cols;  // (rect.width>rect.height)?rect.rect.height;
 3   Mat dstMat(size, size, CV_8UC1);
 4   dstMat.setTo(Scalar(0, 0, 0));
 5 
 6   int x = (int) floor((float) (size - rect.width) / 2.0f);
 7   int y = (int) floor((float) (size - rect.height) / 2.0f);
 8 
 9   for (int i = 0; i < rect.height; ++i) {
10 
11     for (int j = 0; j < rect.width; ++j) {
12       dstMat.data[dstMat.step[0] * (i + y) + j + x] =
13           in.data[in.step[0] * (i + rect.y) + j + rect.x];
14     }
15   }
16 
17   //
18   return dstMat;
19 }

View Code

ProjectedHistogram 函数代码如下：

 1 float countOfBigValue(Mat &mat, int iValue) {
 2   float iCount = 0.0;
 3   if (mat.rows > 1) {
 4     for (int i = 0; i < mat.rows; ++i) {
 5       if (mat.data[i * mat.step[0]] > iValue) {
 6         iCount += 1.0;
 7       }
 8     }
 9     return iCount;
10 
11   } else {
12     for (int i = 0; i < mat.cols; ++i) {
13       if (mat.data[i] > iValue) {
14         iCount += 1.0;
15       }
16     }
17 
18     return iCount;
19   }
20 }
21 
22 Mat ProjectedHistogram(Mat img, int t) {
23   int sz = (t) ? img.rows : img.cols;
24   Mat mhist = Mat::zeros(1, sz, CV_32F);
25 
26   for (int j = 0; j < sz; j++) {
27     Mat data = (t) ? img.row(j) : img.col(j);
28 
29     mhist.at<float>(j) = countOfBigValue(data, 20);
30   }
31 
32   // Normalize histogram
33   double min, max;
34   minMaxLoc(mhist, &min, &max);
35 
36   if (max > 0)
37     mhist.convertTo(mhist, -1, 1.0f / max, 0);   //归一化 0-1
38 
39   return mhist;
40 }

View Code

通过上述代码可知，非中文字符和中文字符获得的字符特征个数是不同的，非中文字符features个数为 10+10+10*10=120，中文字符features个数为 20+20+20*20=440。

字符识别

通过上述函数获取字符特征之后，可以通过神经网络模型对车牌字符进行识别，具体的识别函数如下所示：

 1   int CharsIdentify::classify(cv::Mat f, float& maxVal, bool isChinses){
 2     int result = -1;
 3 
 4     cv::Mat output(1, kCharsTotalNumber, CV_32FC1);
 5     ann_->predict(f, output);
 6 
 7     maxVal = -2.f;
 8     if (!isChinses) {
 9       result = 0;
10       for (int j = 0; j < kCharactersNumber; j++) {
11         float val = output.at<float>(j);
12         // std::cout << "j:" << j << "val:" << val << std::endl;
13         if (val > maxVal) {
14           maxVal = val;
15           result = j;
16         }
17       }
18     }
19     else {
20       result = kCharactersNumber;
21       for (int j = kCharactersNumber; j < kCharsTotalNumber; j++) {
22         float val = output.at<float>(j);
23         //std::cout << "j:" << j << "val:" << val << std::endl;
24         if (val > maxVal) {
25           maxVal = val;
26           result = j;
27         }
28       }
29     }
30     //std::cout << "maxVal:" << maxVal << std::endl;
31     return result;
32   }

View Code

ann_为之前加载得到的神经网路模型，直接调用其 predict() 函数，即可得到输出矩阵 output，输出矩阵中最大的值即为识别的车牌字符，其中，数值分别为0-64的65个数字，对应的值如下所示：

static const char *kChars[] = {
  "0", "1", "2",
  "3", "4", "5",
  "6", "7", "8",
  "9",
  /*  10  */
  "A", "B", "C",
  "D", "E", "F",
  "G", "H", /* {"I", "I"} */
  "J", "K", "L",
  "M", "N", /* {"O", "O"} */
  "P", "Q", "R",
  "S", "T", "U",
  "V", "W", "X",
  "Y", "Z",
  /*  24  */
  "zh_cuan" , "zh_e"    , "zh_gan"  ,
  "zh_gan1" , "zh_gui"  , "zh_gui1" ,
  "zh_hei"  , "zh_hu"   , "zh_ji"   ,
  "zh_jin"  , "zh_jing" , "zh_jl"   ,
  "zh_liao" , "zh_lu"   , "zh_meng" ,
  "zh_min"  , "zh_ning" , "zh_qing" ,
  "zh_qiong", "zh_shan" , "zh_su"   ,
  "zh_sx"   , "zh_wan"  , "zh_xiang",
  "zh_xin"  , "zh_yu"   , "zh_yu1"  ,
  "zh_yue"  , "zh_yun"  , "zh_zang" ,
  "zh_zhe"
  /*  31  */
};

其中26个英文字母中，因为I 和 O容易和数字的 1和0 混淆，因此被去除了，后面31个中文字符分别对应中国的31个行政区域（港澳台暂不考虑）。将识别的各个字符整体输出，就得到了最终的结果。