Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hackassin
GitHub Repository: hackassin/learnopencv
Path: blob/master/FaceMaskOverlay/overlay_with_mask.py
3118 views
1
import argparse
2
import csv
3
import os
4
import pprint
5
from collections import OrderedDict
6
7
import cv2
8
import numpy as np
9
import torch
10
11
import lib.models as models
12
from lib.config import (
13
config,
14
update_config,
15
)
16
from lib.core.evaluation import decode_preds
17
from lib.utils import utils
18
from lib.utils.transforms import crop
19
20
21
def parse_args():
22
23
parser = argparse.ArgumentParser(description="Face Mask Overlay")
24
25
parser.add_argument(
26
"--cfg", help="experiment configuration filename", required=True, type=str,
27
)
28
parser.add_argument(
29
"--landmark_model",
30
help="path to model for landmarks exctraction",
31
required=True,
32
type=str,
33
)
34
parser.add_argument(
35
"--detector_model",
36
help="path to detector model",
37
type=str,
38
default="detection/face_detector.prototxt",
39
)
40
parser.add_argument(
41
"--detector_weights",
42
help="path to detector weights",
43
type=str,
44
default="detection/face_detector.caffemodel",
45
)
46
parser.add_argument(
47
"--mask_image", help="path to a .png file with a mask", required=True, type=str,
48
)
49
parser.add_argument("--device", default="cpu", help="Device to inference on")
50
51
args = parser.parse_args()
52
update_config(config, args)
53
return args
54
55
56
def main():
57
58
# parsing script arguments
59
args = parse_args()
60
device = torch.device(args.device)
61
62
# initialize logger
63
logger, final_output_dir, tb_log_dir = utils.create_logger(config, args.cfg, "demo")
64
65
# log arguments and config values
66
logger.info(pprint.pformat(args))
67
logger.info(pprint.pformat(config))
68
69
# init landmark model
70
model = models.get_face_alignment_net(config)
71
72
# get input size from the config
73
input_size = config.MODEL.IMAGE_SIZE
74
75
# load model
76
state_dict = torch.load(args.landmark_model, map_location=device)
77
78
# remove `module.` prefix from the pre-trained weights
79
new_state_dict = OrderedDict()
80
for key, value in state_dict.items():
81
name = key[7:]
82
new_state_dict[name] = value
83
84
# load weights without the prefix
85
model.load_state_dict(new_state_dict)
86
# run model on device
87
model = model.to(device)
88
89
# init mean and std values for the landmark model's input
90
mean = config.MODEL.MEAN
91
mean = np.array(mean, dtype=np.float32)
92
std = config.MODEL.STD
93
std = np.array(std, dtype=np.float32)
94
95
# defining prototxt and caffemodel paths
96
detector_model = args.detector_model
97
detector_weights = args.detector_weights
98
99
# load model
100
detector = cv2.dnn.readNetFromCaffe(detector_model, detector_weights)
101
capture = cv2.VideoCapture(0)
102
103
frame_num = 0
104
while True:
105
# capture frame-by-frame
106
success, frame = capture.read()
107
108
# break if no frame
109
if not success:
110
break
111
112
frame_num += 1
113
print("frame_num: ", frame_num)
114
landmarks_img = frame.copy()
115
result = frame.copy()
116
result = result.astype(np.float32) / 255.0
117
118
# get frame's height and width
119
height, width = frame.shape[:2] # 640x480
120
121
# resize and subtract BGR mean values, since Caffe uses BGR images for input
122
blob = cv2.dnn.blobFromImage(
123
frame, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0),
124
)
125
# passing blob through the network to detect faces
126
detector.setInput(blob)
127
# detector output format:
128
# [image_id, class, confidence, left, bottom, right, top]
129
face_detections = detector.forward()
130
131
# loop over the detections
132
for i in range(0, face_detections.shape[2]):
133
# extract confidence
134
confidence = face_detections[0, 0, i, 2]
135
136
# filter detections by confidence greater than the minimum threshold
137
if confidence > 0.5:
138
# get coordinates of the bounding box
139
box = face_detections[0, 0, i, 3:7] * np.array(
140
[width, height, width, height],
141
)
142
(x1, y1, x2, y2) = box.astype("int")
143
144
# show original image
145
cv2.imshow("original image", frame)
146
147
# crop to detection and resize
148
resized = crop(
149
frame,
150
torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2]),
151
1.5,
152
tuple(input_size),
153
)
154
155
# convert from BGR to RGB since HRNet expects RGB format
156
resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
157
img = resized.astype(np.float32) / 255.0
158
# normalize landmark net input
159
normalized_img = (img - mean) / std
160
161
# predict face landmarks
162
model = model.eval()
163
with torch.no_grad():
164
input = torch.Tensor(normalized_img.transpose([2, 0, 1]))
165
input = input.to(device)
166
output = model(input.unsqueeze(0))
167
score_map = output.data.cpu()
168
preds = decode_preds(
169
score_map,
170
[torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2])],
171
[1.5],
172
score_map.shape[2:4],
173
)
174
175
preds = preds.squeeze(0)
176
landmarks = preds.data.cpu().detach().numpy()
177
# draw landmarks
178
for k, landmark in enumerate(landmarks, 1):
179
landmarks_img = cv2.circle(
180
landmarks_img,
181
center=(landmark[0], landmark[1]),
182
radius=3,
183
color=(0, 0, 255),
184
thickness=-1,
185
)
186
# draw landmarks' labels
187
landmarks_img = cv2.putText(
188
img=landmarks_img,
189
text=str(k),
190
org=(int(landmark[0]) + 5, int(landmark[1]) + 5),
191
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
192
fontScale=0.5,
193
color=(0, 0, 255),
194
)
195
196
# show results by drawing predicted landmarks and their labels
197
cv2.imshow("image with landmarks", landmarks_img)
198
199
# get chosen landmarks 2-16, 30 as destination points
200
# note that landmarks numbering starts from 0
201
dst_pts = np.array(
202
[
203
landmarks[1],
204
landmarks[2],
205
landmarks[3],
206
landmarks[4],
207
landmarks[5],
208
landmarks[6],
209
landmarks[7],
210
landmarks[8],
211
landmarks[9],
212
landmarks[10],
213
landmarks[11],
214
landmarks[12],
215
landmarks[13],
216
landmarks[14],
217
landmarks[15],
218
landmarks[29],
219
],
220
dtype="float32",
221
)
222
223
# load mask annotations from csv file to source points
224
mask_annotation = os.path.splitext(os.path.basename(args.mask_image))[0]
225
mask_annotation = os.path.join(
226
os.path.dirname(args.mask_image), mask_annotation + ".csv",
227
)
228
229
with open(mask_annotation) as csv_file:
230
csv_reader = csv.reader(csv_file, delimiter=",")
231
src_pts = []
232
for i, row in enumerate(csv_reader):
233
# skip head or empty line if it's there
234
try:
235
src_pts.append(np.array([float(row[1]), float(row[2])]))
236
except ValueError:
237
continue
238
src_pts = np.array(src_pts, dtype="float32")
239
240
# overlay with a mask only if all landmarks have positive coordinates:
241
if (landmarks > 0).all():
242
# load mask image
243
mask_img = cv2.imread(args.mask_image, cv2.IMREAD_UNCHANGED)
244
mask_img = mask_img.astype(np.float32)
245
mask_img = mask_img / 255.0
246
247
# get the perspective transformation matrix
248
M, _ = cv2.findHomography(src_pts, dst_pts)
249
250
# transformed masked image
251
transformed_mask = cv2.warpPerspective(
252
mask_img,
253
M,
254
(result.shape[1], result.shape[0]),
255
None,
256
cv2.INTER_LINEAR,
257
cv2.BORDER_CONSTANT,
258
)
259
260
# mask overlay
261
alpha_mask = transformed_mask[:, :, 3]
262
alpha_image = 1.0 - alpha_mask
263
264
for c in range(0, 3):
265
result[:, :, c] = (
266
alpha_mask * transformed_mask[:, :, c]
267
+ alpha_image * result[:, :, c]
268
)
269
270
# display the resulting frame
271
cv2.imshow("image with mask overlay", result)
272
273
# waiting for the escape button to exit
274
k = cv2.waitKey(1)
275
if k == 27:
276
break
277
278
# when everything done, release the capture
279
capture.release()
280
cv2.destroyAllWindows()
281
282
283
if __name__ == "__main__":
284
main()
285
286