CoCalc -- overlay_with

GitHub Repository: hackassin/learnopencv
Path: blob/master/FaceMaskOverlay/overlay_with_mask.py
³¹¹⁸ views
1
import argparse
2
import csv
3
import os
4
import pprint
5
from collections import OrderedDict
6

7
import cv2
8
import numpy as np
9
import torch
10

11
import lib.models as models
12
from lib.config import (
13
    config,
14
    update_config,
15
)
16
from lib.core.evaluation import decode_preds
17
from lib.utils import utils
18
from lib.utils.transforms import crop
19

20

21
def parse_args():
22

23
    parser = argparse.ArgumentParser(description="Face Mask Overlay")
24

25
    parser.add_argument(
26
        "--cfg", help="experiment configuration filename", required=True, type=str,
27
    )
28
    parser.add_argument(
29
        "--landmark_model",
30
        help="path to model for landmarks exctraction",
31
        required=True,
32
        type=str,
33
    )
34
    parser.add_argument(
35
        "--detector_model",
36
        help="path to detector model",
37
        type=str,
38
        default="detection/face_detector.prototxt",
39
    )
40
    parser.add_argument(
41
        "--detector_weights",
42
        help="path to detector weights",
43
        type=str,
44
        default="detection/face_detector.caffemodel",
45
    )
46
    parser.add_argument(
47
        "--mask_image", help="path to a .png file with a mask", required=True, type=str,
48
    )
49
    parser.add_argument("--device", default="cpu", help="Device to inference on")
50

51
    args = parser.parse_args()
52
    update_config(config, args)
53
    return args
54

55

56
def main():
57

58
    # parsing script arguments
59
    args = parse_args()
60
    device = torch.device(args.device)
61

62
    # initialize logger
63
    logger, final_output_dir, tb_log_dir = utils.create_logger(config, args.cfg, "demo")
64

65
    # log arguments and config values
66
    logger.info(pprint.pformat(args))
67
    logger.info(pprint.pformat(config))
68

69
    # init landmark model
70
    model = models.get_face_alignment_net(config)
71

72
    # get input size from the config
73
    input_size = config.MODEL.IMAGE_SIZE
74

75
    # load model
76
    state_dict = torch.load(args.landmark_model, map_location=device)
77

78
    # remove `module.` prefix from the pre-trained weights
79
    new_state_dict = OrderedDict()
80
    for key, value in state_dict.items():
81
        name = key[7:]
82
        new_state_dict[name] = value
83

84
    # load weights without the prefix
85
    model.load_state_dict(new_state_dict)
86
    # run model on device
87
    model = model.to(device)
88

89
    # init mean and std values for the landmark model's input
90
    mean = config.MODEL.MEAN
91
    mean = np.array(mean, dtype=np.float32)
92
    std = config.MODEL.STD
93
    std = np.array(std, dtype=np.float32)
94

95
    # defining prototxt and caffemodel paths
96
    detector_model = args.detector_model
97
    detector_weights = args.detector_weights
98

99
    # load model
100
    detector = cv2.dnn.readNetFromCaffe(detector_model, detector_weights)
101
    capture = cv2.VideoCapture(0)
102

103
    frame_num = 0
104
    while True:
105
        # capture frame-by-frame
106
        success, frame = capture.read()
107

108
        # break if no frame
109
        if not success:
110
            break
111

112
        frame_num += 1
113
        print("frame_num: ", frame_num)
114
        landmarks_img = frame.copy()
115
        result = frame.copy()
116
        result = result.astype(np.float32) / 255.0
117

118
        # get frame's height and width
119
        height, width = frame.shape[:2]  # 640x480
120

121
        # resize and subtract BGR mean values, since Caffe uses BGR images for input
122
        blob = cv2.dnn.blobFromImage(
123
            frame, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0),
124
        )
125
        # passing blob through the network to detect faces
126
        detector.setInput(blob)
127
        # detector output format:
128
        # [image_id, class, confidence, left, bottom, right, top]
129
        face_detections = detector.forward()
130

131
        # loop over the detections
132
        for i in range(0, face_detections.shape[2]):
133
            # extract confidence
134
            confidence = face_detections[0, 0, i, 2]
135

136
            # filter detections by confidence greater than the minimum threshold
137
            if confidence > 0.5:
138
                # get coordinates of the bounding box
139
                box = face_detections[0, 0, i, 3:7] * np.array(
140
                    [width, height, width, height],
141
                )
142
                (x1, y1, x2, y2) = box.astype("int")
143

144
                # show original image
145
                cv2.imshow("original image", frame)
146

147
                # crop to detection and resize
148
                resized = crop(
149
                    frame,
150
                    torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2]),
151
                    1.5,
152
                    tuple(input_size),
153
                )
154

155
                # convert from BGR to RGB since HRNet expects RGB format
156
                resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
157
                img = resized.astype(np.float32) / 255.0
158
                # normalize landmark net input
159
                normalized_img = (img - mean) / std
160

161
                # predict face landmarks
162
                model = model.eval()
163
                with torch.no_grad():
164
                    input = torch.Tensor(normalized_img.transpose([2, 0, 1]))
165
                    input = input.to(device)
166
                    output = model(input.unsqueeze(0))
167
                    score_map = output.data.cpu()
168
                    preds = decode_preds(
169
                        score_map,
170
                        [torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2])],
171
                        [1.5],
172
                        score_map.shape[2:4],
173
                    )
174

175
                    preds = preds.squeeze(0)
176
                    landmarks = preds.data.cpu().detach().numpy()
177
                    # draw landmarks
178
                    for k, landmark in enumerate(landmarks, 1):
179
                        landmarks_img = cv2.circle(
180
                            landmarks_img,
181
                            center=(landmark[0], landmark[1]),
182
                            radius=3,
183
                            color=(0, 0, 255),
184
                            thickness=-1,
185
                        )
186
                        # draw landmarks' labels
187
                        landmarks_img = cv2.putText(
188
                            img=landmarks_img,
189
                            text=str(k),
190
                            org=(int(landmark[0]) + 5, int(landmark[1]) + 5),
191
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
192
                            fontScale=0.5,
193
                            color=(0, 0, 255),
194
                        )
195

196
                # show results by drawing predicted landmarks and their labels
197
                cv2.imshow("image with landmarks", landmarks_img)
198

199
                # get chosen landmarks 2-16, 30 as destination points
200
                # note that landmarks numbering starts from 0
201
                dst_pts = np.array(
202
                    [
203
                        landmarks[1],
204
                        landmarks[2],
205
                        landmarks[3],
206
                        landmarks[4],
207
                        landmarks[5],
208
                        landmarks[6],
209
                        landmarks[7],
210
                        landmarks[8],
211
                        landmarks[9],
212
                        landmarks[10],
213
                        landmarks[11],
214
                        landmarks[12],
215
                        landmarks[13],
216
                        landmarks[14],
217
                        landmarks[15],
218
                        landmarks[29],
219
                    ],
220
                    dtype="float32",
221
                )
222

223
                # load mask annotations from csv file to source points
224
                mask_annotation = os.path.splitext(os.path.basename(args.mask_image))[0]
225
                mask_annotation = os.path.join(
226
                    os.path.dirname(args.mask_image), mask_annotation + ".csv",
227
                )
228

229
                with open(mask_annotation) as csv_file:
230
                    csv_reader = csv.reader(csv_file, delimiter=",")
231
                    src_pts = []
232
                    for i, row in enumerate(csv_reader):
233
                        # skip head or empty line if it's there
234
                        try:
235
                            src_pts.append(np.array([float(row[1]), float(row[2])]))
236
                        except ValueError:
237
                            continue
238
                src_pts = np.array(src_pts, dtype="float32")
239

240
                # overlay with a mask only if all landmarks have positive coordinates:
241
                if (landmarks > 0).all():
242
                    # load mask image
243
                    mask_img = cv2.imread(args.mask_image, cv2.IMREAD_UNCHANGED)
244
                    mask_img = mask_img.astype(np.float32)
245
                    mask_img = mask_img / 255.0
246

247
                    # get the perspective transformation matrix
248
                    M, _ = cv2.findHomography(src_pts, dst_pts)
249

250
                    # transformed masked image
251
                    transformed_mask = cv2.warpPerspective(
252
                        mask_img,
253
                        M,
254
                        (result.shape[1], result.shape[0]),
255
                        None,
256
                        cv2.INTER_LINEAR,
257
                        cv2.BORDER_CONSTANT,
258
                    )
259

260
                    # mask overlay
261
                    alpha_mask = transformed_mask[:, :, 3]
262
                    alpha_image = 1.0 - alpha_mask
263

264
                    for c in range(0, 3):
265
                        result[:, :, c] = (
266
                            alpha_mask * transformed_mask[:, :, c]
267
                            + alpha_image * result[:, :, c]
268
                        )
269

270
        # display the resulting frame
271
        cv2.imshow("image with mask overlay", result)
272

273
        # waiting for the escape button to exit
274
        k = cv2.waitKey(1)
275
        if k == 27:
276
            break
277

278
    # when everything done, release the capture
279
    capture.release()
280
    cv2.destroyAllWindows()
281

282

283
if __name__ == "__main__":
284
    main()
285

286
Product

Resources

Company