CoCalc -- run-all.py

GitHub Repository: hackassin/learnopencv
Path: blob/master/FaceDetectionComparison/run-all.py
³¹¹⁸ views
1
import argparse
2
import os
3
import time
4

5
import cv2
6
import dlib
7
import numpy as np
8

9
# Model files
10
# OpenCV HAAR
11
faceCascade = cv2.CascadeClassifier("models/haarcascade_frontalface_default.xml")
12

13
# DLIB HOG
14
hogFaceDetector = dlib.get_frontal_face_detector()
15

16
# DLIB MMOD
17
dnnFaceDetector = dlib.cnn_face_detection_model_v1(
18
    "models/mmod_human_face_detector.dat",
19
)
20

21

22
def detectFaceOpenCVHaar(faceCascade, frame, inHeight=300, inWidth=0):
23
    frameOpenCVHaar = frame.copy()
24
    frameHeight = frameOpenCVHaar.shape[0]
25
    frameWidth = frameOpenCVHaar.shape[1]
26
    if not inWidth:
27
        inWidth = int((frameWidth / frameHeight) * inHeight)
28

29
    scaleHeight = frameHeight / inHeight
30
    scaleWidth = frameWidth / inWidth
31

32
    frameOpenCVHaarSmall = cv2.resize(frameOpenCVHaar, (inWidth, inHeight))
33
    frameGray = cv2.cvtColor(frameOpenCVHaarSmall, cv2.COLOR_BGR2GRAY)
34

35
    faces = faceCascade.detectMultiScale(frameGray)
36
    bboxes = []
37
    for (x, y, w, h) in faces:
38
        x1 = x
39
        y1 = y
40
        x2 = x + w
41
        y2 = y + h
42
        cvRect = [
43
            int(x1 * scaleWidth),
44
            int(y1 * scaleHeight),
45
            int(x2 * scaleWidth),
46
            int(y2 * scaleHeight),
47
        ]
48
        bboxes.append(cvRect)
49
        cv2.rectangle(
50
            frameOpenCVHaar,
51
            (cvRect[0], cvRect[1]),
52
            (cvRect[2], cvRect[3]),
53
            (0, 255, 0),
54
            int(round(frameHeight / 150)),
55
            4,
56
        )
57
    return frameOpenCVHaar, bboxes
58

59

60
def detectFaceOpenCVDnn(net, frame, conf_threshold=0.7):
61
    frameOpencvDnn = frame.copy()
62
    frameHeight = frameOpencvDnn.shape[0]
63
    frameWidth = frameOpencvDnn.shape[1]
64
    blob = cv2.dnn.blobFromImage(
65
        frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], False, False,
66
    )
67

68
    net.setInput(blob)
69
    detections = net.forward()
70
    bboxes = []
71
    for i in range(detections.shape[2]):
72
        confidence = detections[0, 0, i, 2]
73
        if confidence > conf_threshold:
74
            x1 = int(detections[0, 0, i, 3] * frameWidth)
75
            y1 = int(detections[0, 0, i, 4] * frameHeight)
76
            x2 = int(detections[0, 0, i, 5] * frameWidth)
77
            y2 = int(detections[0, 0, i, 6] * frameHeight)
78
            bboxes.append([x1, y1, x2, y2])
79
            cv2.rectangle(
80
                frameOpencvDnn,
81
                (x1, y1),
82
                (x2, y2),
83
                (0, 255, 0),
84
                int(round(frameHeight / 150)),
85
                8,
86
            )
87
    return frameOpencvDnn, bboxes
88

89

90
def detectFaceDlibHog(detector, frame, inHeight=300, inWidth=0):
91
    frameDlibHog = frame.copy()
92
    frameHeight = frameDlibHog.shape[0]
93
    frameWidth = frameDlibHog.shape[1]
94
    if not inWidth:
95
        inWidth = int((frameWidth / frameHeight) * inHeight)
96

97
    scaleHeight = frameHeight / inHeight
98
    scaleWidth = frameWidth / inWidth
99

100
    frameDlibHogSmall = cv2.resize(frameDlibHog, (inWidth, inHeight))
101

102
    frameDlibHogSmall = cv2.cvtColor(frameDlibHogSmall, cv2.COLOR_BGR2RGB)
103
    faceRects = detector(frameDlibHogSmall, 0)
104
    bboxes = []
105
    for faceRect in faceRects:
106

107
        cvRect = [
108
            int(faceRect.left() * scaleWidth),
109
            int(faceRect.top() * scaleHeight),
110
            int(faceRect.right() * scaleWidth),
111
            int(faceRect.bottom() * scaleHeight),
112
        ]
113
        bboxes.append(cvRect)
114
        cv2.rectangle(
115
            frameDlibHog,
116
            (cvRect[0], cvRect[1]),
117
            (cvRect[2], cvRect[3]),
118
            (0, 255, 0),
119
            int(round(frameHeight / 150)),
120
            4,
121
        )
122
    return frameDlibHog, bboxes
123

124

125
def detectFaceDlibMMOD(detector, frame, inHeight=300, inWidth=0):
126
    frameDlibMMOD = frame.copy()
127
    frameHeight = frameDlibMMOD.shape[0]
128
    frameWidth = frameDlibMMOD.shape[1]
129
    if not inWidth:
130
        inWidth = int((frameWidth / frameHeight) * inHeight)
131

132
    scaleHeight = frameHeight / inHeight
133
    scaleWidth = frameWidth / inWidth
134

135
    frameDlibMMODSmall = cv2.resize(frameDlibMMOD, (inWidth, inHeight))
136

137
    frameDlibMMODSmall = cv2.cvtColor(frameDlibMMODSmall, cv2.COLOR_BGR2RGB)
138
    faceRects = detector(frameDlibMMODSmall, 0)
139

140
    bboxes = []
141
    for faceRect in faceRects:
142
        cvRect = [
143
            int(faceRect.rect.left() * scaleWidth),
144
            int(faceRect.rect.top() * scaleHeight),
145
            int(faceRect.rect.right() * scaleWidth),
146
            int(faceRect.rect.bottom() * scaleHeight),
147
        ]
148
        bboxes.append(cvRect)
149
        cv2.rectangle(
150
            frameDlibMMOD,
151
            (cvRect[0], cvRect[1]),
152
            (cvRect[2], cvRect[3]),
153
            (0, 255, 0),
154
            int(round(frameHeight / 150)),
155
            4,
156
        )
157
    return frameDlibMMOD, bboxes
158

159

160
if __name__ == "__main__":
161

162
    parser = argparse.ArgumentParser(description="Face detection")
163
    parser.add_argument("--video", type=str, help="Path to video file")
164
    parser.add_argument(
165
        "--device",
166
        type=str,
167
        default="gpu",
168
        choices=["cpu", "gpu"],
169
        help="Device to use",
170
    )
171
    parser.add_argument(
172
        "--net_type",
173
        type=str,
174
        default="caffe",
175
        choices=["caffe", "tf"],
176
        help="Type of network to run",
177
    )
178
    args = parser.parse_args()
179

180
    net_type = args.net_type
181
    source = args.video
182
    device = args.device
183

184
    # OpenCV DNN supports 2 networks.
185
    # 1. FP16 version of the original Caffe implementation ( 5.4 MB )
186
    # 2. 8 bit Quantized version using TensorFlow ( 2.7 MB )
187

188
    if net_type == "caffe":
189
        modelFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"
190
        configFile = "models/deploy.prototxt"
191
        net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
192
    else:
193
        modelFile = "models/opencv_face_detector_uint8.pb"
194
        configFile = "models/opencv_face_detector.pbtxt"
195
        net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
196

197
    if device == "cpu":
198
        net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)
199
    else:
200
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
201
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
202

203
    if source:
204
        cap = cv2.VideoCapture(source)
205
    else:
206
        cap = cv2.VideoCapture(0, cv2.CAP_V4L)
207

208
    hasFrame, frame = cap.read()
209

210
    outputFolder = "output-dnn-videos"
211
    if source:
212
        outputFile = os.path.basename(source)[:-4] + ".avi"
213
    else:
214
        outputFile = "grabbed_from_camera.avi"
215

216
    if not os.path.exists(outputFolder):
217
        os.makedirs(outputFolder)
218

219
    vid_writer = cv2.VideoWriter(
220
        os.path.join(outputFolder, outputFile),
221
        cv2.VideoWriter_fourcc("M", "J", "P", "G"),
222
        25,
223
        (frame.shape[1], frame.shape[0]),
224
    )
225

226
    frame_count = 0
227
    tt_opencvHaar = 0
228
    tt_opencvDnn = 0
229
    tt_dlibHog = 0
230
    tt_dlibMmod = 0
231

232
    while True:
233
        hasFrame, frame = cap.read()
234
        if not hasFrame:
235
            break
236

237
        frame_count += 1
238

239
        t = time.time()
240
        outOpencvHaar, bboxes = detectFaceOpenCVHaar(faceCascade, frame)
241
        tt_opencvHaar += time.time() - t
242
        fpsOpencvHaar = frame_count / tt_opencvHaar
243

244
        label = "OpenCV Haar; FPS : {:.2f}".format(fpsOpencvHaar)
245
        cv2.putText(
246
            outOpencvHaar,
247
            label,
248
            (10, 50),
249
            cv2.FONT_HERSHEY_SIMPLEX,
250
            1.3,
251
            (0, 0, 255),
252
            3,
253
            cv2.LINE_AA,
254
        )
255

256
        t = time.time()
257
        outOpencvDnn, bboxes = detectFaceOpenCVDnn(net, frame)
258
        tt_opencvDnn += time.time() - t
259
        fpsOpencvDnn = frame_count / tt_opencvDnn
260

261
        label = "OpenCV DNN {} FPS : {:.2f}".format(device.upper(), fpsOpencvDnn)
262
        cv2.putText(
263
            outOpencvDnn,
264
            label,
265
            (10, 50),
266
            cv2.FONT_HERSHEY_SIMPLEX,
267
            1.3,
268
            (0, 0, 255),
269
            3,
270
            cv2.LINE_AA,
271
        )
272

273
        t = time.time()
274
        outDlibHog, bboxes = detectFaceDlibHog(hogFaceDetector, frame)
275
        tt_dlibHog += time.time() - t
276
        fpsDlibHog = frame_count / tt_dlibHog
277

278
        label = "DLIB HoG; FPS : {:.2f}".format(fpsDlibHog)
279
        cv2.putText(
280
            outDlibHog,
281
            label,
282
            (10, 50),
283
            cv2.FONT_HERSHEY_SIMPLEX,
284
            1.3,
285
            (0, 0, 255),
286
            3,
287
            cv2.LINE_AA,
288
        )
289

290
        t = time.time()
291
        outDlibMMOD, bboxes = detectFaceDlibMMOD(dnnFaceDetector, frame)
292
        tt_dlibMmod += time.time() - t
293
        fpsDlibMmod = frame_count / tt_dlibMmod
294

295
        label = "DLIB MMOD; FPS : {:.2f}".format(fpsDlibMmod)
296
        cv2.putText(
297
            outDlibMMOD,
298
            label,
299
            (10, 50),
300
            cv2.FONT_HERSHEY_SIMPLEX,
301
            1.3,
302
            (0, 0, 255),
303
            3,
304
            cv2.LINE_AA,
305
        )
306

307
        top = np.hstack([outOpencvHaar, outOpencvDnn])
308
        bottom = np.hstack([outDlibHog, outDlibMMOD])
309
        combined = np.vstack([top, bottom])
310
        cv2.imshow("Face Detection Comparison", combined)
311

312
        if frame_count == 1:
313
            tt_opencvHaar = 0
314
            tt_opencvDnn = 0
315
            tt_dlibHog = 0
316
            tt_dlibMmod = 0
317

318
        vid_writer.write(combined)
319

320
        k = cv2.waitKey(5)
321
        if k == 27:
322
            break
323

324
    cv2.destroyAllWindows()
325
    vid_writer.release()
326

327
Product

Resources

Company