OpenCV-DNN学习日志:整体架构及使用样例

1.要点简介

         (1)OpenCV DNN的两大特点

                  (1)专注性:只提供推理功能,不涉及模型训练,实现了轻量级运算。

                  (2)通用性:将纵多开源框架、纵多后端优化库及纵多硬件平台抽象为统一的API接口,节省了大量的学习成本。

         (2)截至OpenCV4.5支持的后端优化库包括

                   cv::dnn::DNN_BACKEND_DEFAULT = 0

                   cv::dnn::DNN_BACKEND_HALIDE

                   cv::dnn::DNN_BACKEND_INFERENCE_ENGINE

                   cv::dnn::DNN_BACKEND_OPENCV

                   cv::dnn::DNN_BACKEND_VKCOM

                   cv::dnn::DNN_BACKEND_CUDA

         由Net:: SetPreferobleBackend设置优先考虑的后端优化库。

         (3)截至OpenCV4.5支持的硬件平台包括

                   cv::dnn::DNN_TARGET_CPU = 0

                   cv::dnn::DNN_TARGET_OPENCL

                   cv::dnn::DNN_TARGET_OPENCL_FP16

                   cv::dnn::DNN_TARGET_MYRIAD

                   cv::dnn::DNN_TARGET_VULKAN

                   cv::dnn::DNN_TARGET_FPGA

                   cv::dnn::DNN_TARGET_CUDA

                   cv::dnn::DNN_TARGET_CUDA_FP16

         由Net::SetPreferobleTarget设置优先考虑的硬件平台。

2.接口架构

         OpenCV DNN提供的API可分为三类:通用接口、应用模型、网络层。

         (1)通用接口:可分为数据读取和数据处理

 1 1.数据读取
 2     (1)readNet(modelPath, [configPath[, framework=torch/onnx/caffe/darknet/tensorflow/dldt]])=net
 3     (2)readNet(framework, modelBuf[, configBuf])=net
 4     (3)readNetFromTorch(modelPath, isBin, eval)=net
 5     (4)readNetFromONNX(modelPath/modelBuf)=net
 6     (5)readNetFromONNX(modelPtr, modelLen)=net
 7     (6)readNetFromCaffe(configPath/configBuf, modelPath/modelBuf)=net
 8     (7)readNetFromCaffe(configPtr, configLen, modelPtr, modePtr)=net
 9     (8)readNetFromDarknet(configPath/configBuf, modelPath/modelBuf)=net
10     (9)readNetFromDarknet(configPtr, configLen, modelPtr, modePtr)=net
11     (10)readNetFromTensorflow(configPath/configBuf, modelPath/modelBuf)=net
12     (11)readNetFromTensorflow(configPtr, configLen, modelPtr, modePtr)=net
13     (12)readNetFromModelOptimizer(configPath/configBuf, modelPath/modelBuf)=net
14     (13)readNetFromModelOptimizer(configPtr, configLen, modelPtr, modePtr)=net
15     (14)readTorchBlob(filePath, isBin)=blob
16     (15)readTensorFromONNX(filePath)=blob
17     (16)writeTextGraph(binModelPath, txtModePath)=void
18     (17)shrinkCaffeModel(srcModelPath, dstModelPath, layersTypes)=void
19 2.数据处理
20     (1)blobFromImage(ima[, blob], scale, size, mean, swapRB, crop, depth)=blob/void
21     (2)blobFromtImages(imas[, blob], scale, size, mean, swapRB, crop, depth)=blobs/void
22     (3)imagesFromBlob(blob, imas)=void
23     (4)getAvailableTargets(bkd)=tars
24     (5)getAvailableBackends()=<bkd,tar>
25     (6)NMSBoxes(bboxes, scores, scoreThr, nmsThr, indices, eta, topK)=void
View Code

         (2)应用模型:可分为NetModel&ModelExOthers

 1 1.Net
 2     (1)参数设置
 3         setPreferableTarget(tarId)=void
 4         setPreferableBackend(bkdId)=void
 5         setParam(layerId, nParam, blob)=void
 6         setInput(blob, name, scale, mean)=void
 7         setInputsNames(names)=void
 8         setInputShape(name, shape)=void
 9         setHalideScheduler(scheduler)=void
10         readFromModelOptimizer(…)=net: static and same as the above.
11     (2)参数获取
12         getPerfProfile(times)=int
13         getFLOPS(shape/shapes)=int
14         getFLOPS(id, shape/shapes)=int
15         getMemoryConsumption(shape/shapes, bytesWeight, bytesBlob)=void
16         getMemoryConsumption(shape/shapes, bytesWeights, bytesBlobs)=void
17         getMemoryConsumption(layerId, shape/shapes, bytesWeight, bytesBlob)=void
18 
19         getParam(layerId, nParam)=blob
20         getLayerInputs(layerId)=ptrs
21         getLayerShapes(shape/shapes, id, inShapes, outShapes)=void
22         getLayersShapes(shape/shapes, id, inShapes, outShapes)=void
23 
24         getLayer(layerId)=ptr
25         getLayerId(name)=int
26         getLayerNames()=strs
27         getLayersCount(type)=int
28         getLayerTypes(types)=void
29         getUnconnectedOutLayers()=ints
30         getUnconnectedOutLayersNames()=strs
31     (3)动作执行
32         empty()=bl
33         enableFusion(bl)=void
34         connect(outName, inName)=void
35         connect(outId, outNum, inId, inNum)=void
36         forward([name])=blob
37         forward([name])=asyncArr
38         forward(blobs, name/names)=void
39         addLayer(name, type, layerParams)=int
40         addLayerToPrev(name, type, layerParams)=int
41         dump()=str
42         dumpToFile(path)=void
43 2.Model&ModelExt
44     (1)自身成员
45         model(modelPath, configPath)
46         predict(frame, outs)=void
47         setInputScale(scale)=model
48         setInputSize(size)=model
49         setInputMean(mean)=model
50         setInputSwapRB(bl)=model
51         setInputCrop(bl)=model
52         setInputParams(scale, size, mean, swapRB, crop)=void
53     (2)ClassificationModel成员:classify(frame[, id, conf])=pairs/void
54     (3)DetectionModel成员:detect(frame, ids, confs, bboxes, confThr, nmsThr)=void
55     (4)KeypointsModel成员:estimate(frame, thr)=pts
56     (5)SegmentationModel成员:segment(frame, mask)=void
57 3.Dict&LayerParams&DictValue&BackendNode&BackendWrapper
58     (1)Dict成员
59         begin()/end()=map<str,dictVal>
60         erase(key)=void
61         set(key, val)=val
62         get(key[, defVal])=dictVal/val
63         has(key)=bl
64         ptr(key)=dictVal
65         out<<self
66     (2)LayerParams成员
67         string type
68         string name
69         vector<Mat> blobs
70     (3)DictValue成员
71         dictVal(bl/int/uint/int64/char*/string/double)
72         size()=int
73         get(idx=-1)=val
74         getIntValue(idx=-1)=int
75         getRealValue(idx=-1)=dbl
76         getStringValue(idx=-1)=str
77         isInt()/isReal/isString()=bl
78         out<<selft
79         arrayInt(bgn, size)/arrayReal(bgn, size)/arrayString(bgn, size)=dictVal: static
80     (4)BackendNode成员:backendId
81     (5)BackendWrapper成员:backendId; targetId; copyToHost()=void; setHostDirty()=void
View Code

         (3)网络层:参见Opencv官方文档

3.使用要点

         (1)Net:提供最核心功能,主要是推理和网络管理两大功能,核心函数是Net::forward,其内部就是整个推理过程的实现,实际使用中通常不会出现此函数不满足需求而往往出现网络模型不满足需求,此时需要调用相应网络管理函数修改网络模型。

         (2)Model:实例化Net功能,将推理具体化为预测并增加输入预处理功能,核心函数是Model::predict,其内部就是对预测前处理、Net::forward及预测后处理的封装,当Model::predict不能满足实际需求时需要重写之。

         (3)ClassificationModel:实例化Model功能,将预测具体为分类,核心函数是ClassificationModel::classify,其内部就是对分类前处理、Model::predict及分类后处理的封装,当ClassificationModel::classify不能满足实际需求时需要重写之。

         (3)DetectionModel:实例化Model功能,将预测具体为分类,核心函数是DetectionModel::detect,其内部就是对分类前处理、Model::predict及分类后处理的封装,当DetectionModel:: detect不能满足实际需求时需要重写之。

         (4)KeypointsModel:类似

         (5)SegmentationModel:类似

4.使用样例

         (1)提供检测的使用样例,封装在类AboutDNN中,其功能如下:

                  (1)自动生成配置文件:首次运行或配置文件不存在将自动生成workDir/data/dnnInput.yml,默认配置基于YOLO.V4模型。

                  (2)纵多推理参数配置:在配置文件中可配置后端优化库、硬件平台及输入预处理参数。

                   (3)支持图像视频输入:在配置文件中可设置输入路径为包括图像集的目录或视频路径。

         (2)若要测试YOLO.V4模型,可到如下链接下载相关文件:

                   (1)基于MSCOCO的配置文件:https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg (此为最新链接、确认没修改网络)

                   (2)基于MSCOCO的训练模型:https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights

                   (3)基于MSCOCO-YOLOV3V4类型:https://github.com/opencv/opencv/blob/master/samples/data/dnn/object_detection_classes_yolov3.txt

                   (4)测试图像:https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/dog416.png

                   (5)测试视频:https://github.com/opencv/opencv/blob/master/samples/data/vtest.avi

 

         以下是详细代码,依赖于C++14、OpenCV4.x和Spdlog,封装在类AboutDNN。

  1 #include <opencv2/opencv.hpp>
  2 #include <opencv2/core/utils/filesystem.hpp>
  3 #include <spdlog/spdlog.h>
  4 #include <fstream>
  5 using namespace std;
  6 using namespace cv;
  7 
  8 class AboutDNN
  9 {
 10 public:
 11     struct DNNInput
 12     {
 13         //For YoloV3: scale&size=(0.0039216,416,416)   mean=(0,0,0)   swapRB&doCrop=(1,0)
 14         //For YoloV4: scale&size=(0.0039216,608,608)   mean=(0,0,0)   swapRB&doCrop=(1,0)
 15         //For person-vehicle-bike-detection-crossroad-1016: scale&size=(1.0,512,512)   mean=(0,0,0)   swapRB&doCrop=(0,0)
 16         //For vehicle-license-plate-detection-barrier-0106: scale&size=(1.0,300,300)   mean=(0,0,0)   swapRB&doCrop=(0,0)
 17         double inScale = 0.0039216;
 18         Size inSize = Size(608, 608);
 19         Scalar inMean = Scalar(0, 0, 0, 0);
 20         bool swapRB = true;
 21         bool doCrop = false;
 22         int dnnBackend = dnn::DNN_BACKEND_INFERENCE_ENGINE;
 23         int dnnTarget = dnn::DNN_TARGET_CPU;
 24         char inPath[512] = "./data/imas";
 25         char cfgPath[512] = "./data/yolov4.cfg";
 26         char modelPath[512] = "./data/yolov4.weights";
 27         char classPath[512] = "./data/object_detection_classes_yolov3.txt";
 28         bool write(FileStorage& fs)
 29         {
 30             fs.writeComment("1.TensorParams");
 31             fs << "inScale" << inScale;
 32             fs << "inSize" << inSize;
 33             fs << "inMean" << inMean;
 34             fs << "swapRB" << swapRB;
 35             fs << "doCrop" << doCrop;
 36             fs << "dnnBackend" << dnnBackend;
 37             fs << "dnnTarget" << dnnTarget;
 38             fs.writeComment("2.ModelParams");
 39             fs << "inPath" << inPath; fs.writeComment("directory or video", true);
 40             fs << "cfgPath" << cfgPath;
 41             fs << "modelPath" << modelPath;
 42             fs << "classPath" << classPath;
 43             fs.writeComment("3.DnnBackend");
 44             fs << "DNN_BACKEND_DEFAULT" << dnn::DNN_BACKEND_DEFAULT;
 45             fs << "DNN_BACKEND_HALIDE" << dnn::DNN_BACKEND_HALIDE;
 46             fs << "DNN_BACKEND_INFERENCE_ENGINE" << dnn::DNN_BACKEND_INFERENCE_ENGINE;
 47             fs << "DNN_BACKEND_OPENCV" << dnn::DNN_BACKEND_OPENCV;
 48             fs << "DNN_BACKEND_VKCOM" << dnn::DNN_BACKEND_VKCOM;
 49             fs << "DNN_BACKEND_CUDA" << dnn::DNN_BACKEND_CUDA;
 50             fs.writeComment("3.DnnTarget");
 51             fs << "DNN_TARGET_CPU" << dnn::DNN_TARGET_CPU;
 52             fs << "DNN_TARGET_OPENCL" << dnn::DNN_TARGET_OPENCL;
 53             fs << "DNN_TARGET_OPENCL_FP16" << dnn::DNN_TARGET_OPENCL_FP16;
 54             fs << "DNN_TARGET_MYRIAD" << dnn::DNN_TARGET_MYRIAD;
 55             fs << "DNN_TARGET_VULKAN" << dnn::DNN_TARGET_VULKAN;
 56             fs << "DNN_TARGET_FPGA" << dnn::DNN_TARGET_FPGA;
 57             fs << "DNN_TARGET_CUDA" << dnn::DNN_TARGET_CUDA;
 58             fs << "DNN_TARGET_CUDA_FP16" << dnn::DNN_TARGET_CUDA_FP16;
 59             return true;
 60         }
 61         bool read(FileStorage& fs)
 62         {
 63             fs["inScale"] >> inScale;
 64             fs["inSize"] >> inSize;
 65             fs["inMean"] >> inMean;
 66             fs["swapRB"] >> swapRB;
 67             fs["doCrop"] >> doCrop;
 68             fs["dnnBackend"] >> dnnBackend;
 69             fs["dnnTarget"] >> dnnTarget;
 70             strcpy(inPath, fs["inPath"].string().c_str());
 71             strcpy(cfgPath, fs["cfgPath"].string().c_str());
 72             strcpy(modelPath, fs["modelPath"].string().c_str());
 73             strcpy(classPath, fs["classPath"].string().c_str());
 74             return true;
 75         }
 76         string print(string savePath = "")
 77         {
 78             string str;
 79             str += fmt::format("inScale: {}
", inScale);
 80             str += fmt::format("inSize: [{},{}]
", inSize.width, inSize.height);
 81             str += fmt::format("inMean: [{},{},{},{}]
", inMean[0], inMean[1], inMean[2], inMean[3]);
 82             str += fmt::format("swapRB: {}
", swapRB);
 83             str += fmt::format("doCrop: {}
", doCrop);
 84             str += fmt::format("dnnBackend: {}
", dnnBackend);
 85             str += fmt::format("dnnTarget: {}
", dnnTarget);
 86             str += fmt::format("inPath: {}
", inPath);
 87             str += fmt::format("cfgPath: {}
", cfgPath);
 88             str += fmt::format("modelPath: {}
", modelPath);
 89             str += fmt::format("classPath: {}
", classPath);
 90             if (savePath.empty() == false) { FILE* out = fopen(savePath.c_str(), "w"); fprintf(out, str.c_str()); fclose(out); }
 91             return str;
 92         }
 93         static DNNInput GetOne(string fsPath, bool doPrint = true)
 94         {
 95             DNNInput dnnIn;
 96             if (utils::fs::exists(fsPath) == false)
 97             {
 98                 utils::fs::createDirectories(utils::fs::getParent(fsPath));
 99                 FileStorage fs(fsPath, FileStorage::WRITE);
100                 dnnIn.write(fs);
101                 fs.release();
102                 //memset(&dnnIn, 0, sizeof(dnnIn));
103                 spdlog::critical("No exist: {}", fsPath);
104                 spdlog::info("Created file: {}", fsPath);
105                 spdlog::info("Modify default values and relauch");
106             }
107             else
108             {
109                 FileStorage fs(fsPath, FileStorage::READ);
110                 dnnIn.read(fs);
111                 fs.release();
112             }
113             if (doPrint) spdlog::info(dnnIn.print());
114             return dnnIn;
115         }
116     };
117 
118 public:
119     static void testDetection(int argc, char** argv)
120     {
121         //ILSVRC: ImageNet Large-Scale Visual Recognition Challenge from 2012 to 2017
122         //ClassificationSynset(1000): 2010!=2011!==2012==2013==2014==2015==2016==2017
123         //ImageDetectionSynset(200): 2013==2014==2015==2016==2017
124         //VedioDetectionSynset(30): 2015==2016==2017
125         //OIC: Open Images Challenge from 2018 to
126 
127         //1.
128         string dnnInputPath = "./data/dnnInput.yml";
129         DNNInput dnnIn = DNNInput::GetOne(dnnInputPath, true);
130         if (dnnIn.inSize.height == 0) return;
131         vector<string> classLabs(1024);
132         ifstream classFile(dnnIn.classPath);
133         for (int k = 0; std::getline(classFile, classLabs[k]); ++k);
134 
135         //2.
136         dnn::DetectionModel model(dnnIn.modelPath, dnnIn.cfgPath);
137         model.setInputScale(dnnIn.inScale);
138         model.setInputSize(dnnIn.inSize);
139         model.setInputMean(dnnIn.inMean);
140         model.setInputSwapRB(dnnIn.swapRB);
141         model.setInputCrop(dnnIn.doCrop);
142         model.setPreferableBackend(dnnIn.dnnBackend);
143         model.setPreferableTarget(dnnIn.dnnTarget);
144 
145         //3.
146         string winname = __FUNCTION__; cv::namedWindow(winname, 0);
147         int confThr = 40; cv::createTrackbar("confThr", winname, &confThr, 256);
148         int nmsThr = 45; cv::createTrackbar("nmsThr", winname, &nmsThr, 256);
149         auto LamDetectFrame = [&confThr, &nmsThr, &classLabs, &model, winname](Mat frame)->int
150         {
151             vector<int> ids;
152             vector<float> confs;
153             vector<Rect> boxes;
154             model.detect(frame, ids, confs, boxes, confThr * 0.01f, nmsThr * 0.01f);
155 
156             for (int k = 0; k < ids.size(); ++k)
157             {
158                 cv::rectangle(frame, boxes[k].tl(), boxes[k].br(), Scalar(255, 255, 255), 4);
159                 string str = fmt::format("{}({}): {:.3f}", classLabs.empty() ? string("xxxxxx") : classLabs[ids[k]], ids[k], confs[k]);
160                 cv::putText(frame, str, boxes[k].tl() + Point(5, 5), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1);
161             }
162             vector<double> ticks;
163             cv::putText(frame, fmt::format("inference timecost: {:.0f}", model.getPerfProfile(ticks) / (getTickFrequency() / 1000)), Point(0, 20), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1);
164             cv::imshow(winname, frame);
165             return cv::waitKey(30);
166         };
167         if (utils::fs::isDirectory(dnnIn.inPath))
168         {
169             vector<string> filePaths;
170             utils::fs::glob(dnnIn.inPath, "*", filePaths);
171             if (filePaths.size() > 1) stable_sort(filePaths.begin(), filePaths.end(), less<string>());
172             if (filePaths.size() > 1) stable_sort(filePaths.begin(), filePaths.end(), [](string str1, string str2)->bool {return str1.length() < str2.length(); });
173             for (size_t fileId = 0; ;)
174             {
175                 Mat frame = imread(filePaths[std::llabs(fileId % filePaths.size())]);
176                 char c = LamDetectFrame(frame);
177                 if (c == 'q') break;
178                 else if (c == '1') --fileId;
179                 else if (c == '2') ++fileId;
180             }
181         }
182         else
183         {
184             Mat frame;
185             cv::VideoCapture cap(dnnIn.inPath);
186             while (cap.read(frame)) if (LamDetectFrame(frame) == 'q') break;
187         } cv::destroyWindow(winname);
188     }
189 };
190 
191 int main(int argc, char** argv) { AboutDNN::testDetection(argc, argv); return 0; }
View Code
原文地址:https://www.cnblogs.com/dzyBK/p/14556630.html