Path: blob/master/FaceDetectionComparison/run-all.cpp
3118 views
#include <iostream>1#include <string>2#include <vector>3#include <stdlib.h>45#include <opencv2/core.hpp>6#include <opencv2/core/version.hpp>7#include <opencv2/imgproc.hpp>8#include <opencv2/highgui.hpp>9#include <opencv2/objdetect.hpp>10#include <opencv2/videoio.hpp>1112#if(CV_MAJOR_VERSION >= 3)13# include <opencv2/dnn.hpp>1415using namespace cv::dnn;16#endif1718#if(CV_MAJOR_VERSION < 3)19# include <dlib/opencv.h>20# include <dlib/image_processing.h>21# include <dlib/dnn.h>22# include <dlib/data_io.h>23# include <dlib/image_processing/frontal_face_detector.h>2425using namespace dlib;26#endif2728#include <boost/algorithm/string.hpp>2930using namespace cv;31using namespace std;3233/** Global variables */34String faceCascadePath;35CascadeClassifier faceCascade;3637void detectFaceOpenCVHaar(CascadeClassifier faceCascade, Mat &frameOpenCVHaar, int inHeight=300, int inWidth=0)38{39int frameHeight = frameOpenCVHaar.rows;40int frameWidth = frameOpenCVHaar.cols;41if (!inWidth)42inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);4344float scaleHeight = frameHeight / (float)inHeight;45float scaleWidth = frameWidth / (float)inWidth;4647Mat frameOpenCVHaarSmall, frameGray;48resize(frameOpenCVHaar, frameOpenCVHaarSmall, Size(inWidth, inHeight));49cvtColor(frameOpenCVHaarSmall, frameGray, COLOR_BGR2GRAY);5051std::vector<Rect> faces;52faceCascade.detectMultiScale(frameGray, faces);5354for ( size_t i = 0; i < faces.size(); i++ )55{56int x1 = (int)(faces[i].x * scaleWidth);57int y1 = (int)(faces[i].y * scaleHeight);58int x2 = (int)((faces[i].x + faces[i].width) * scaleWidth);59int y2 = (int)((faces[i].y + faces[i].height) * scaleHeight);60cv::rectangle(frameOpenCVHaar, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);61}62}6364#if(CV_MAJOR_VERSION >= 3)65const size_t inWidth = 300;66const size_t inHeight = 300;67const double inScaleFactor = 1.0;68const float confidenceThreshold = 0.7;69const cv::Scalar meanVal(104.0, 177.0, 123.0);707172const std::string caffeConfigFile = "models/deploy.prototxt";73const std::string caffeWeightFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel";7475const std::string tensorflowConfigFile = "models/opencv_face_detector.pbtxt";76const std::string tensorflowWeightFile = "models/opencv_face_detector_uint8.pb";7778void detectFaceOpenCVDNN(Net net, Mat &frameOpenCVDNN, string framework="caffe")79{80int frameHeight = frameOpenCVDNN.rows;81int frameWidth = frameOpenCVDNN.cols;82cv::Mat inputBlob;83if (framework == "caffe")84inputBlob = cv::dnn::blobFromImage(frameOpenCVDNN, inScaleFactor, cv::Size(inWidth, inHeight), meanVal, false, false);85else86inputBlob = cv::dnn::blobFromImage(frameOpenCVDNN, inScaleFactor, cv::Size(inWidth, inHeight), meanVal, true, false);8788net.setInput(inputBlob, "data");89cv::Mat detection = net.forward("detection_out");9091cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());9293for(int i = 0; i < detectionMat.rows; i++)94{95float confidence = detectionMat.at<float>(i, 2);9697if(confidence > confidenceThreshold)98{99int x1 = static_cast<int>(detectionMat.at<float>(i, 3) * frameWidth);100int y1 = static_cast<int>(detectionMat.at<float>(i, 4) * frameHeight);101int x2 = static_cast<int>(detectionMat.at<float>(i, 5) * frameWidth);102int y2 = static_cast<int>(detectionMat.at<float>(i, 6) * frameHeight);103104cv::rectangle(frameOpenCVDNN, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0),(int)(frameHeight/150.0), 4);105}106}107}108#endif109110#if(CV_MAJOR_VERSION < 3)111void detectFaceDlibHog(frontal_face_detector hogFaceDetector, Mat &frameDlibHog, int inHeight=300, int inWidth=0)112{113114int frameHeight = frameDlibHog.rows;115int frameWidth = frameDlibHog.cols;116if (!inWidth)117inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);118119float scaleHeight = frameHeight / (float)inHeight;120float scaleWidth = frameWidth / (float)inWidth;121122Mat frameDlibHogSmall;123resize(frameDlibHog, frameDlibHogSmall, Size(inWidth, inHeight));124125// Convert OpenCV image format to Dlib's image format126cv_image<bgr_pixel> dlibIm(frameDlibHogSmall);127128// Detect faces in the image129std::vector<dlib::rectangle> faceRects = hogFaceDetector(dlibIm);130131for ( size_t i = 0; i < faceRects.size(); i++ )132{133int x1 = (int)(faceRects[i].left() * scaleWidth);134int y1 = (int)(faceRects[i].top() * scaleHeight);135int x2 = (int)(faceRects[i].right() * scaleWidth);136int y2 = (int)(faceRects[i].bottom() * scaleHeight);137cv::rectangle(frameDlibHog, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);138}139}140141// Network Definition142/////////////////////////////////////////////////////////////////////////////////////////////////////143template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;144template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;145146template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;147template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>;148149using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;150/////////////////////////////////////////////////////////////////////////////////////////////////////151152void detectFaceDlibMMOD(net_type mmodFaceDetector, Mat &frameDlibMmod, int inHeight=300, int inWidth=0)153{154155int frameHeight = frameDlibMmod.rows;156int frameWidth = frameDlibMmod.cols;157if (!inWidth)158inWidth = (int)((frameWidth / (float)frameHeight) * inHeight);159160float scaleHeight = frameHeight / (float)inHeight;161float scaleWidth = frameWidth / (float)inWidth;162163Mat frameDlibMmodSmall;164resize(frameDlibMmod, frameDlibMmodSmall, Size(inWidth, inHeight));165166// Convert OpenCV image format to Dlib's image format167cv_image<bgr_pixel> dlibIm(frameDlibMmodSmall);168matrix<rgb_pixel> dlibMatrix;169assign_image(dlibMatrix, dlibIm);170171// Detect faces in the image172std::vector<dlib::mmod_rect> faceRects = mmodFaceDetector(dlibMatrix);173174for ( size_t i = 0; i < faceRects.size(); i++ )175{176int x1 = (int)(faceRects[i].rect.left() * scaleWidth);177int y1 = (int)(faceRects[i].rect.top() * scaleHeight);178int x2 = (int)(faceRects[i].rect.right() * scaleWidth);179int y2 = (int)(faceRects[i].rect.bottom() * scaleHeight);180cv::rectangle(frameDlibMmod, Point(x1, y1), Point(x2, y2), Scalar(0,255,0), (int)(frameHeight/150.0), 4);181}182}183#endif184185int main( int argc, const char** argv )186{187faceCascadePath = "models/haarcascade_frontalface_default.xml";188189if(!faceCascade.load(faceCascadePath))190{191printf("--(!)Error loading face cascade\n");192return -1;193}194195#if(CV_MAJOR_VERSION < 3)196frontal_face_detector hogFaceDetector = get_frontal_face_detector();197String mmodModelPath = "models/mmod_human_face_detector.dat";198net_type mmodFaceDetector;199deserialize(mmodModelPath) >> mmodFaceDetector;200#endif201202string videoFileName;203string device;204string framework;205// Take arguments from command line206if (argc == 4)207{208videoFileName = argv[1];209device = argv[2];210framework = argv[3];211}212else if (argc == 3)213{214videoFileName = argv[1];215device = argv[2];216framework = "caffe";217}218else if (argc == 2)219{220videoFileName = argv[1];221device = "cpu";222framework = "caffe";223}224else225{226videoFileName = "";227device = "gpu";228framework = "caffe";229}230231boost::to_upper(device);232cout << "OpenCV DNN Configuration:" << endl;233cout << "Device - "<< device << endl;234if (framework == "caffe")235cout << "Framework - Caffe" << endl;236else237cout << "Framework - TensorFlow" << endl;238if (videoFileName == "")239cout << "No video found, using camera stream" << endl;240else241cout << "Video file - " << videoFileName << endl;242243Net net;244245if (framework == "caffe")246net = cv::dnn::readNetFromCaffe(caffeConfigFile, caffeWeightFile);247else248net = cv::dnn::readNetFromTensorflow(tensorflowWeightFile, tensorflowConfigFile);249250#if (CV_MAJOR_VERSION >= 4)251if (device == "CPU")252{253net.setPreferableBackend(DNN_TARGET_CPU);254cout << "Device - "<< device << endl;255}256else257{258net.setPreferableBackend(DNN_BACKEND_CUDA);259net.setPreferableTarget(DNN_TARGET_CUDA);260cout << "Device - "<< device << endl;261}262#elif(CV_MAJOR_VERSION == 3)263// OpenCV 3.4.x does not support GPU backend264net.setPreferableBackend(DNN_BACKEND_DEFAULT);265device = "cpu";266cout << "Device - "<< device << endl;267#endif268269cv::VideoCapture source;270if (videoFileName != "")271source.open(videoFileName);272else273source.open(0, CAP_V4L);274275Mat frame;276277int frame_count = 0;278double tt_opencvHaar = 0;279double tt_opencvDNN = 0;280double tt_dlibHog = 0;281double tt_dlibMmod = 0;282283namedWindow("Face Detection Comparison", WINDOW_NORMAL);284double t = 0;285while (true)286{287source >> frame;288if(frame.empty())289break;290291frame_count++;292293t = cv::getTickCount();294Mat frameOpenCVHaar = frame.clone();295detectFaceOpenCVHaar ( faceCascade, frameOpenCVHaar );296tt_opencvHaar += ((double)cv::getTickCount() - t)/cv::getTickFrequency();297double fpsOpencvHaar = frame_count/tt_opencvHaar;298putText(frameOpenCVHaar, format("OpenCV HAAR; FPS = %.2f",fpsOpencvHaar), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);299300Mat frameOpenCVDNN = frame.clone();301#if(CV_MAJOR_VERSION >= 3)302t = cv::getTickCount();303detectFaceOpenCVDNN (net, frameOpenCVDNN, framework);304tt_opencvDNN += ((double)cv::getTickCount() - t)/cv::getTickFrequency();305double fpsOpencvDNN = frame_count/tt_opencvDNN;306putText(frameOpenCVDNN, format("OpenCV DNN %s FPS = %.2f", device.c_str(), fpsOpencvDNN), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);307#else308putText(frameOpenCVDNN, "OpenCV DNN NOT SUPPORTED", Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);309#endif310311#if(CV_MAJOR_VERSION < 3)312t = cv::getTickCount();313Mat frameDlibHog = frame.clone();314detectFaceDlibHog ( hogFaceDetector, frameDlibHog );315tt_dlibHog += ((double)cv::getTickCount() - t)/cv::getTickFrequency();316double fpsDlibHog = frame_count/tt_dlibHog;317putText(frameDlibHog, format("DLIB HoG; FPS = %.2f",fpsDlibHog), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);318319t = cv::getTickCount();320Mat frameDlibMmod = frame.clone();321detectFaceDlibMMOD ( mmodFaceDetector, frameDlibMmod );322tt_dlibMmod += ((double)cv::getTickCount() - t)/cv::getTickFrequency();323double fpsDlibMmod = frame_count/tt_dlibMmod;324putText(frameDlibMmod, format("DLIB MMOD; FPS = %.2f",fpsDlibMmod), Point(10, 50), FONT_HERSHEY_SIMPLEX, 1.3, Scalar(0, 0, 255), 4);325#endif326327Mat top, bottom, combined;328hconcat(frameOpenCVHaar, frameOpenCVDNN, top);329#if(CV_MAJOR_VERSION < 3)330hconcat(frameDlibHog, frameDlibMmod, bottom);331vconcat(top, bottom, combined);332#else333combined = top;334#endif335336cv::resize(combined, combined, Size(), .5, .5);337imshow("Face Detection Comparison", combined);338339int k = waitKey(5);340if(k == 27)341{342destroyAllWindows();343break;344}345346if(frame_count == 1)347{348tt_opencvHaar = 0;349tt_opencvDNN = 0;350tt_dlibHog = 0;351tt_dlibMmod = 0;352}353}354}355356357