Path: blob/master/FaceDetectionComparison/run-all.py
3118 views
import argparse1import os2import time34import cv25import dlib6import numpy as np78# Model files9# OpenCV HAAR10faceCascade = cv2.CascadeClassifier("models/haarcascade_frontalface_default.xml")1112# DLIB HOG13hogFaceDetector = dlib.get_frontal_face_detector()1415# DLIB MMOD16dnnFaceDetector = dlib.cnn_face_detection_model_v1(17"models/mmod_human_face_detector.dat",18)192021def detectFaceOpenCVHaar(faceCascade, frame, inHeight=300, inWidth=0):22frameOpenCVHaar = frame.copy()23frameHeight = frameOpenCVHaar.shape[0]24frameWidth = frameOpenCVHaar.shape[1]25if not inWidth:26inWidth = int((frameWidth / frameHeight) * inHeight)2728scaleHeight = frameHeight / inHeight29scaleWidth = frameWidth / inWidth3031frameOpenCVHaarSmall = cv2.resize(frameOpenCVHaar, (inWidth, inHeight))32frameGray = cv2.cvtColor(frameOpenCVHaarSmall, cv2.COLOR_BGR2GRAY)3334faces = faceCascade.detectMultiScale(frameGray)35bboxes = []36for (x, y, w, h) in faces:37x1 = x38y1 = y39x2 = x + w40y2 = y + h41cvRect = [42int(x1 * scaleWidth),43int(y1 * scaleHeight),44int(x2 * scaleWidth),45int(y2 * scaleHeight),46]47bboxes.append(cvRect)48cv2.rectangle(49frameOpenCVHaar,50(cvRect[0], cvRect[1]),51(cvRect[2], cvRect[3]),52(0, 255, 0),53int(round(frameHeight / 150)),544,55)56return frameOpenCVHaar, bboxes575859def detectFaceOpenCVDnn(net, frame, conf_threshold=0.7):60frameOpencvDnn = frame.copy()61frameHeight = frameOpencvDnn.shape[0]62frameWidth = frameOpencvDnn.shape[1]63blob = cv2.dnn.blobFromImage(64frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], False, False,65)6667net.setInput(blob)68detections = net.forward()69bboxes = []70for i in range(detections.shape[2]):71confidence = detections[0, 0, i, 2]72if confidence > conf_threshold:73x1 = int(detections[0, 0, i, 3] * frameWidth)74y1 = int(detections[0, 0, i, 4] * frameHeight)75x2 = int(detections[0, 0, i, 5] * frameWidth)76y2 = int(detections[0, 0, i, 6] * frameHeight)77bboxes.append([x1, y1, x2, y2])78cv2.rectangle(79frameOpencvDnn,80(x1, y1),81(x2, y2),82(0, 255, 0),83int(round(frameHeight / 150)),848,85)86return frameOpencvDnn, bboxes878889def detectFaceDlibHog(detector, frame, inHeight=300, inWidth=0):90frameDlibHog = frame.copy()91frameHeight = frameDlibHog.shape[0]92frameWidth = frameDlibHog.shape[1]93if not inWidth:94inWidth = int((frameWidth / frameHeight) * inHeight)9596scaleHeight = frameHeight / inHeight97scaleWidth = frameWidth / inWidth9899frameDlibHogSmall = cv2.resize(frameDlibHog, (inWidth, inHeight))100101frameDlibHogSmall = cv2.cvtColor(frameDlibHogSmall, cv2.COLOR_BGR2RGB)102faceRects = detector(frameDlibHogSmall, 0)103bboxes = []104for faceRect in faceRects:105106cvRect = [107int(faceRect.left() * scaleWidth),108int(faceRect.top() * scaleHeight),109int(faceRect.right() * scaleWidth),110int(faceRect.bottom() * scaleHeight),111]112bboxes.append(cvRect)113cv2.rectangle(114frameDlibHog,115(cvRect[0], cvRect[1]),116(cvRect[2], cvRect[3]),117(0, 255, 0),118int(round(frameHeight / 150)),1194,120)121return frameDlibHog, bboxes122123124def detectFaceDlibMMOD(detector, frame, inHeight=300, inWidth=0):125frameDlibMMOD = frame.copy()126frameHeight = frameDlibMMOD.shape[0]127frameWidth = frameDlibMMOD.shape[1]128if not inWidth:129inWidth = int((frameWidth / frameHeight) * inHeight)130131scaleHeight = frameHeight / inHeight132scaleWidth = frameWidth / inWidth133134frameDlibMMODSmall = cv2.resize(frameDlibMMOD, (inWidth, inHeight))135136frameDlibMMODSmall = cv2.cvtColor(frameDlibMMODSmall, cv2.COLOR_BGR2RGB)137faceRects = detector(frameDlibMMODSmall, 0)138139bboxes = []140for faceRect in faceRects:141cvRect = [142int(faceRect.rect.left() * scaleWidth),143int(faceRect.rect.top() * scaleHeight),144int(faceRect.rect.right() * scaleWidth),145int(faceRect.rect.bottom() * scaleHeight),146]147bboxes.append(cvRect)148cv2.rectangle(149frameDlibMMOD,150(cvRect[0], cvRect[1]),151(cvRect[2], cvRect[3]),152(0, 255, 0),153int(round(frameHeight / 150)),1544,155)156return frameDlibMMOD, bboxes157158159if __name__ == "__main__":160161parser = argparse.ArgumentParser(description="Face detection")162parser.add_argument("--video", type=str, help="Path to video file")163parser.add_argument(164"--device",165type=str,166default="gpu",167choices=["cpu", "gpu"],168help="Device to use",169)170parser.add_argument(171"--net_type",172type=str,173default="caffe",174choices=["caffe", "tf"],175help="Type of network to run",176)177args = parser.parse_args()178179net_type = args.net_type180source = args.video181device = args.device182183# OpenCV DNN supports 2 networks.184# 1. FP16 version of the original Caffe implementation ( 5.4 MB )185# 2. 8 bit Quantized version using TensorFlow ( 2.7 MB )186187if net_type == "caffe":188modelFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"189configFile = "models/deploy.prototxt"190net = cv2.dnn.readNetFromCaffe(configFile, modelFile)191else:192modelFile = "models/opencv_face_detector_uint8.pb"193configFile = "models/opencv_face_detector.pbtxt"194net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)195196if device == "cpu":197net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)198else:199net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)200net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)201202if source:203cap = cv2.VideoCapture(source)204else:205cap = cv2.VideoCapture(0, cv2.CAP_V4L)206207hasFrame, frame = cap.read()208209outputFolder = "output-dnn-videos"210if source:211outputFile = os.path.basename(source)[:-4] + ".avi"212else:213outputFile = "grabbed_from_camera.avi"214215if not os.path.exists(outputFolder):216os.makedirs(outputFolder)217218vid_writer = cv2.VideoWriter(219os.path.join(outputFolder, outputFile),220cv2.VideoWriter_fourcc("M", "J", "P", "G"),22125,222(frame.shape[1], frame.shape[0]),223)224225frame_count = 0226tt_opencvHaar = 0227tt_opencvDnn = 0228tt_dlibHog = 0229tt_dlibMmod = 0230231while True:232hasFrame, frame = cap.read()233if not hasFrame:234break235236frame_count += 1237238t = time.time()239outOpencvHaar, bboxes = detectFaceOpenCVHaar(faceCascade, frame)240tt_opencvHaar += time.time() - t241fpsOpencvHaar = frame_count / tt_opencvHaar242243label = "OpenCV Haar; FPS : {:.2f}".format(fpsOpencvHaar)244cv2.putText(245outOpencvHaar,246label,247(10, 50),248cv2.FONT_HERSHEY_SIMPLEX,2491.3,250(0, 0, 255),2513,252cv2.LINE_AA,253)254255t = time.time()256outOpencvDnn, bboxes = detectFaceOpenCVDnn(net, frame)257tt_opencvDnn += time.time() - t258fpsOpencvDnn = frame_count / tt_opencvDnn259260label = "OpenCV DNN {} FPS : {:.2f}".format(device.upper(), fpsOpencvDnn)261cv2.putText(262outOpencvDnn,263label,264(10, 50),265cv2.FONT_HERSHEY_SIMPLEX,2661.3,267(0, 0, 255),2683,269cv2.LINE_AA,270)271272t = time.time()273outDlibHog, bboxes = detectFaceDlibHog(hogFaceDetector, frame)274tt_dlibHog += time.time() - t275fpsDlibHog = frame_count / tt_dlibHog276277label = "DLIB HoG; FPS : {:.2f}".format(fpsDlibHog)278cv2.putText(279outDlibHog,280label,281(10, 50),282cv2.FONT_HERSHEY_SIMPLEX,2831.3,284(0, 0, 255),2853,286cv2.LINE_AA,287)288289t = time.time()290outDlibMMOD, bboxes = detectFaceDlibMMOD(dnnFaceDetector, frame)291tt_dlibMmod += time.time() - t292fpsDlibMmod = frame_count / tt_dlibMmod293294label = "DLIB MMOD; FPS : {:.2f}".format(fpsDlibMmod)295cv2.putText(296outDlibMMOD,297label,298(10, 50),299cv2.FONT_HERSHEY_SIMPLEX,3001.3,301(0, 0, 255),3023,303cv2.LINE_AA,304)305306top = np.hstack([outOpencvHaar, outOpencvDnn])307bottom = np.hstack([outDlibHog, outDlibMMOD])308combined = np.vstack([top, bottom])309cv2.imshow("Face Detection Comparison", combined)310311if frame_count == 1:312tt_opencvHaar = 0313tt_opencvDnn = 0314tt_dlibHog = 0315tt_dlibMmod = 0316317vid_writer.write(combined)318319k = cv2.waitKey(5)320if k == 27:321break322323cv2.destroyAllWindows()324vid_writer.release()325326327