CoCalc -- run-all.cpp

GitHub Repository: hackassin/learnopencv
Path: blob/master/FaceDetectionComparison/run-all.cpp
³¹¹⁸ views
1
#include <iostream>
2
#include <string>
3
#include <vector>
4
#include <stdlib.h>
5

6
#include <opencv2/core.hpp>
7
#include <opencv2/core/version.hpp>
8
#include <opencv2/imgproc.hpp>
9
#include <opencv2/highgui.hpp>
10
#include <opencv2/objdetect.hpp>
11
#include <opencv2/videoio.hpp>
12

13
#if(CV_MAJOR_VERSION >= 3)
14
# include <opencv2/dnn.hpp>
15

16
using namespace cv::dnn;
17
#endif
18

19
#if(CV_MAJOR_VERSION < 3)
20
# include <dlib/opencv.h>
21
# include <dlib/image_processing.h>
22
# include <dlib/dnn.h>
23
# include <dlib/data_io.h>
24
# include <dlib/image_processing/frontal_face_detector.h>
25

26
using namespace dlib;
27
#endif
28

29
#include <boost/algorithm/string.hpp>
30

31
using namespace cv;
32
using namespace std;
33

34
/** Global variables */
35
String faceCascadePath;
36
CascadeClassifier faceCascade;
37

38
void detectFaceOpenCVHaar(CascadeClassifier faceCascade, Mat &frameOpenCVHaar, int inHeight=300, int inWidth=0)
39
{
40
    int frameHeight = frameOpenCVHaar.rows;
41
    int frameWidth = frameOpenCVHaar.cols;
42
    if (!inWidth)
43
        inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);
44

45
    float scaleHeight = frameHeight / (float)inHeight;
46
    float scaleWidth = frameWidth / (float)inWidth;
47

48
    Mat frameOpenCVHaarSmall, frameGray;
49
    resize(frameOpenCVHaar, frameOpenCVHaarSmall, Size(inWidth, inHeight));
50
    cvtColor(frameOpenCVHaarSmall, frameGray, COLOR_BGR2GRAY);
51

52
    std::vector<Rect> faces;
53
    faceCascade.detectMultiScale(frameGray, faces);
54

55
    for ( size_t i = 0; i < faces.size(); i++ )
56
    {
57
        int x1 = (int)(faces[i].x * scaleWidth);
58
        int y1 = (int)(faces[i].y * scaleHeight);
59
        int x2 = (int)((faces[i].x + faces[i].width) * scaleWidth);
60
        int y2 = (int)((faces[i].y + faces[i].height) * scaleHeight);
61
        cv::rectangle(frameOpenCVHaar, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);
62
    }
63
}
64

65
#if(CV_MAJOR_VERSION >= 3)
66
const size_t inWidth = 300;
67
const size_t inHeight = 300;
68
const double inScaleFactor = 1.0;
69
const float confidenceThreshold = 0.7;
70
const cv::Scalar meanVal(104.0, 177.0, 123.0);
71

72

73
const std::string caffeConfigFile = "models/deploy.prototxt";
74
const std::string caffeWeightFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel";
75

76
const std::string tensorflowConfigFile = "models/opencv_face_detector.pbtxt";
77
const std::string tensorflowWeightFile = "models/opencv_face_detector_uint8.pb";
78

79
void detectFaceOpenCVDNN(Net net, Mat &frameOpenCVDNN, string framework="caffe")
80
{
81
    int frameHeight = frameOpenCVDNN.rows;
82
    int frameWidth = frameOpenCVDNN.cols;
83
    cv::Mat inputBlob;
84
    if (framework == "caffe")
85
        inputBlob = cv::dnn::blobFromImage(frameOpenCVDNN, inScaleFactor, cv::Size(inWidth, inHeight), meanVal, false, false);
86
    else
87
        inputBlob = cv::dnn::blobFromImage(frameOpenCVDNN, inScaleFactor, cv::Size(inWidth, inHeight), meanVal, true, false);
88

89
    net.setInput(inputBlob, "data");
90
    cv::Mat detection = net.forward("detection_out");
91

92
    cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
93

94
    for(int i = 0; i < detectionMat.rows; i++)
95
    {
96
        float confidence = detectionMat.at<float>(i, 2);
97

98
        if(confidence > confidenceThreshold)
99
        {
100
            int x1 = static_cast<int>(detectionMat.at<float>(i, 3) * frameWidth);
101
            int y1 = static_cast<int>(detectionMat.at<float>(i, 4) * frameHeight);
102
            int x2 = static_cast<int>(detectionMat.at<float>(i, 5) * frameWidth);
103
            int y2 = static_cast<int>(detectionMat.at<float>(i, 6) * frameHeight);
104

105
            cv::rectangle(frameOpenCVDNN, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0),(int)(frameHeight/150.0), 4);
106
        }
107
    }
108
}
109
#endif
110

111
#if(CV_MAJOR_VERSION < 3)
112
void detectFaceDlibHog(frontal_face_detector hogFaceDetector, Mat &frameDlibHog, int inHeight=300, int inWidth=0)
113
{
114

115
    int frameHeight = frameDlibHog.rows;
116
    int frameWidth = frameDlibHog.cols;
117
    if (!inWidth)
118
        inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);
119

120
    float scaleHeight = frameHeight / (float)inHeight;
121
    float scaleWidth = frameWidth / (float)inWidth;
122

123
    Mat frameDlibHogSmall;
124
    resize(frameDlibHog, frameDlibHogSmall, Size(inWidth, inHeight));
125

126
    // Convert OpenCV image format to Dlib's image format
127
    cv_image<bgr_pixel> dlibIm(frameDlibHogSmall);
128

129
    // Detect faces in the image
130
    std::vector<dlib::rectangle> faceRects = hogFaceDetector(dlibIm);
131

132
    for ( size_t i = 0; i < faceRects.size(); i++ )
133
    {
134
        int x1 = (int)(faceRects[i].left() * scaleWidth);
135
        int y1 = (int)(faceRects[i].top() * scaleHeight);
136
        int x2 = (int)(faceRects[i].right() * scaleWidth);
137
        int y2 = (int)(faceRects[i].bottom() * scaleHeight);
138
        cv::rectangle(frameDlibHog, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);
139
    }
140
}
141

142
// Network Definition
143
/////////////////////////////////////////////////////////////////////////////////////////////////////
144
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
145
template <long num_filters, typename SUBNET> using con5  = con<num_filters,5,5,1,1,SUBNET>;
146

147
template <typename SUBNET> using downsampler  = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
148
template <typename SUBNET> using rcon5  = relu<affine<con5<45,SUBNET>>>;
149

150
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
151
/////////////////////////////////////////////////////////////////////////////////////////////////////
152

153
void detectFaceDlibMMOD(net_type mmodFaceDetector, Mat &frameDlibMmod, int inHeight=300, int inWidth=0)
154
{
155

156
    int frameHeight = frameDlibMmod.rows;
157
    int frameWidth = frameDlibMmod.cols;
158
    if (!inWidth)
159
        inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);
160

161
    float scaleHeight = frameHeight / (float)inHeight;
162
    float scaleWidth = frameWidth / (float)inWidth;
163

164
    Mat frameDlibMmodSmall;
165
    resize(frameDlibMmod, frameDlibMmodSmall, Size(inWidth, inHeight));
166

167
    // Convert OpenCV image format to Dlib's image format
168
    cv_image<bgr_pixel> dlibIm(frameDlibMmodSmall);
169
    matrix<rgb_pixel> dlibMatrix;
170
    assign_image(dlibMatrix, dlibIm);
171

172
    // Detect faces in the image
173
    std::vector<dlib::mmod_rect> faceRects = mmodFaceDetector(dlibMatrix);
174

175
    for ( size_t i = 0; i < faceRects.size(); i++ )
176
    {
177
        int x1 = (int)(faceRects[i].rect.left() * scaleWidth);
178
        int y1 = (int)(faceRects[i].rect.top() * scaleHeight);
179
        int x2 = (int)(faceRects[i].rect.right() * scaleWidth);
180
        int y2 = (int)(faceRects[i].rect.bottom() * scaleHeight);
181
        cv::rectangle(frameDlibMmod, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);
182
    }
183
}
184
#endif
185

186
int main( int argc, const char** argv )
187
{
188
    faceCascadePath = "models/haarcascade_frontalface_default.xml";
189

190
    if(!faceCascade.load(faceCascadePath))
191
    {
192
        printf("--(!)Error loading face cascade\n");
193
        return -1;
194
    }
195

196
#if(CV_MAJOR_VERSION < 3)
197
    frontal_face_detector hogFaceDetector = get_frontal_face_detector();
198
    String mmodModelPath = "models/mmod_human_face_detector.dat";
199
    net_type mmodFaceDetector;
200
    deserialize(mmodModelPath) >> mmodFaceDetector;
201
#endif
202

203
    string videoFileName;
204
    string device;
205
    string framework;
206
    // Take arguments from command line
207
    if (argc == 4)
208
    {
209
        videoFileName = argv[1];
210
        device = argv[2];
211
        framework = argv[3];
212
    }
213
    else if (argc == 3)
214
    {
215
        videoFileName = argv[1];
216
        device = argv[2];
217
        framework = "caffe";
218
    }
219
    else if (argc == 2)
220
    {
221
        videoFileName = argv[1];
222
        device = "cpu";
223
        framework = "caffe";
224
    }
225
    else
226
    {
227
        videoFileName = "";
228
        device = "gpu";
229
        framework = "caffe";
230
    }
231

232
    boost::to_upper(device);
233
    cout << "OpenCV DNN Configuration:" << endl;
234
    cout << "Device - "<< device << endl;
235
    if (framework == "caffe")
236
        cout << "Framework - Caffe" << endl;
237
    else
238
        cout << "Framework - TensorFlow" << endl;
239
    if (videoFileName == "")
240
        cout << "No video found, using camera stream" << endl;
241
    else
242
        cout << "Video file - " << videoFileName << endl;
243

244
    Net net;
245

246
    if (framework == "caffe")
247
        net = cv::dnn::readNetFromCaffe(caffeConfigFile, caffeWeightFile);
248
    else
249
        net = cv::dnn::readNetFromTensorflow(tensorflowWeightFile, tensorflowConfigFile);
250

251
#if (CV_MAJOR_VERSION >= 4)
252
    if (device == "CPU")
253
    {
254
        net.setPreferableBackend(DNN_TARGET_CPU);
255
        cout << "Device - "<< device << endl;
256
    }
257
    else
258
    {
259
        net.setPreferableBackend(DNN_BACKEND_CUDA);
260
        net.setPreferableTarget(DNN_TARGET_CUDA);
261
        cout << "Device - "<< device << endl;
262
    }
263
#elif(CV_MAJOR_VERSION == 3)
264
    // OpenCV 3.4.x does not support GPU backend
265
    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
266
    device = "cpu";
267
    cout << "Device - "<< device << endl;
268
#endif
269

270
    cv::VideoCapture source;
271
    if (videoFileName != "")
272
        source.open(videoFileName);
273
    else
274
        source.open(0, CAP_V4L);
275

276
    Mat frame;
277

278
    int frame_count = 0;
279
    double tt_opencvHaar = 0;
280
    double tt_opencvDNN = 0;
281
    double tt_dlibHog = 0;
282
    double tt_dlibMmod = 0;
283

284
    namedWindow("Face Detection Comparison", WINDOW_NORMAL);
285
    double t = 0;
286
    while (true)
287
    {
288
        source >> frame;
289
        if(frame.empty())
290
            break;
291

292
        frame_count++;
293

294
        t = cv::getTickCount();
295
        Mat frameOpenCVHaar = frame.clone();
296
        detectFaceOpenCVHaar ( faceCascade, frameOpenCVHaar );
297
        tt_opencvHaar += ((double)cv::getTickCount() - t)/cv::getTickFrequency();
298
        double fpsOpencvHaar = frame_count/tt_opencvHaar;
299
        putText(frameOpenCVHaar, format("OpenCV HAAR; FPS = %.2f",fpsOpencvHaar), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
300

301
        Mat frameOpenCVDNN = frame.clone();
302
#if(CV_MAJOR_VERSION >= 3)
303
        t = cv::getTickCount();
304
        detectFaceOpenCVDNN (net, frameOpenCVDNN, framework);
305
        tt_opencvDNN += ((double)cv::getTickCount() - t)/cv::getTickFrequency();
306
        double fpsOpencvDNN = frame_count/tt_opencvDNN;
307
        putText(frameOpenCVDNN, format("OpenCV DNN %s FPS = %.2f", device.c_str(), fpsOpencvDNN), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
308
#else
309
        putText(frameOpenCVDNN, "OpenCV DNN NOT SUPPORTED", Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
310
#endif
311

312
#if(CV_MAJOR_VERSION < 3)
313
        t = cv::getTickCount();
314
        Mat frameDlibHog = frame.clone();
315
        detectFaceDlibHog ( hogFaceDetector, frameDlibHog );
316
        tt_dlibHog += ((double)cv::getTickCount() - t)/cv::getTickFrequency();
317
        double fpsDlibHog = frame_count/tt_dlibHog;
318
        putText(frameDlibHog, format("DLIB HoG; FPS = %.2f",fpsDlibHog), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
319

320
        t = cv::getTickCount();
321
        Mat frameDlibMmod = frame.clone();
322
        detectFaceDlibMMOD ( mmodFaceDetector, frameDlibMmod );
323
        tt_dlibMmod += ((double)cv::getTickCount() - t)/cv::getTickFrequency();
324
        double fpsDlibMmod = frame_count/tt_dlibMmod;
325
        putText(frameDlibMmod, format("DLIB MMOD; FPS = %.2f",fpsDlibMmod), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);
326
#endif
327

328
        Mat top, bottom, combined;
329
        hconcat(frameOpenCVHaar, frameOpenCVDNN, top);
330
#if(CV_MAJOR_VERSION < 3)
331
        hconcat(frameDlibHog, frameDlibMmod, bottom);
332
        vconcat(top, bottom, combined);
333
#else
334
        combined = top;
335
#endif
336

337
        cv::resize(combined, combined, Size(), .5, .5);
338
        imshow("Face Detection Comparison", combined);
339

340
        int k = waitKey(5);
341
        if(k == 27)
342
        {
343
          destroyAllWindows();
344
          break;
345
        }
346

347
        if(frame_count == 1)
348
        {
349
            tt_opencvHaar = 0;
350
            tt_opencvDNN = 0;
351
            tt_dlibHog = 0;
352
            tt_dlibMmod = 0;
353
        }
354
    }
355
}
356

357
Product

Resources

Company