Path: blob/master/FaceMaskOverlay/overlay_with_mask.py
3118 views
import argparse1import csv2import os3import pprint4from collections import OrderedDict56import cv27import numpy as np8import torch910import lib.models as models11from lib.config import (12config,13update_config,14)15from lib.core.evaluation import decode_preds16from lib.utils import utils17from lib.utils.transforms import crop181920def parse_args():2122parser = argparse.ArgumentParser(description="Face Mask Overlay")2324parser.add_argument(25"--cfg", help="experiment configuration filename", required=True, type=str,26)27parser.add_argument(28"--landmark_model",29help="path to model for landmarks exctraction",30required=True,31type=str,32)33parser.add_argument(34"--detector_model",35help="path to detector model",36type=str,37default="detection/face_detector.prototxt",38)39parser.add_argument(40"--detector_weights",41help="path to detector weights",42type=str,43default="detection/face_detector.caffemodel",44)45parser.add_argument(46"--mask_image", help="path to a .png file with a mask", required=True, type=str,47)48parser.add_argument("--device", default="cpu", help="Device to inference on")4950args = parser.parse_args()51update_config(config, args)52return args535455def main():5657# parsing script arguments58args = parse_args()59device = torch.device(args.device)6061# initialize logger62logger, final_output_dir, tb_log_dir = utils.create_logger(config, args.cfg, "demo")6364# log arguments and config values65logger.info(pprint.pformat(args))66logger.info(pprint.pformat(config))6768# init landmark model69model = models.get_face_alignment_net(config)7071# get input size from the config72input_size = config.MODEL.IMAGE_SIZE7374# load model75state_dict = torch.load(args.landmark_model, map_location=device)7677# remove `module.` prefix from the pre-trained weights78new_state_dict = OrderedDict()79for key, value in state_dict.items():80name = key[7:]81new_state_dict[name] = value8283# load weights without the prefix84model.load_state_dict(new_state_dict)85# run model on device86model = model.to(device)8788# init mean and std values for the landmark model's input89mean = config.MODEL.MEAN90mean = np.array(mean, dtype=np.float32)91std = config.MODEL.STD92std = np.array(std, dtype=np.float32)9394# defining prototxt and caffemodel paths95detector_model = args.detector_model96detector_weights = args.detector_weights9798# load model99detector = cv2.dnn.readNetFromCaffe(detector_model, detector_weights)100capture = cv2.VideoCapture(0)101102frame_num = 0103while True:104# capture frame-by-frame105success, frame = capture.read()106107# break if no frame108if not success:109break110111frame_num += 1112print("frame_num: ", frame_num)113landmarks_img = frame.copy()114result = frame.copy()115result = result.astype(np.float32) / 255.0116117# get frame's height and width118height, width = frame.shape[:2] # 640x480119120# resize and subtract BGR mean values, since Caffe uses BGR images for input121blob = cv2.dnn.blobFromImage(122frame, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0),123)124# passing blob through the network to detect faces125detector.setInput(blob)126# detector output format:127# [image_id, class, confidence, left, bottom, right, top]128face_detections = detector.forward()129130# loop over the detections131for i in range(0, face_detections.shape[2]):132# extract confidence133confidence = face_detections[0, 0, i, 2]134135# filter detections by confidence greater than the minimum threshold136if confidence > 0.5:137# get coordinates of the bounding box138box = face_detections[0, 0, i, 3:7] * np.array(139[width, height, width, height],140)141(x1, y1, x2, y2) = box.astype("int")142143# show original image144cv2.imshow("original image", frame)145146# crop to detection and resize147resized = crop(148frame,149torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2]),1501.5,151tuple(input_size),152)153154# convert from BGR to RGB since HRNet expects RGB format155resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)156img = resized.astype(np.float32) / 255.0157# normalize landmark net input158normalized_img = (img - mean) / std159160# predict face landmarks161model = model.eval()162with torch.no_grad():163input = torch.Tensor(normalized_img.transpose([2, 0, 1]))164input = input.to(device)165output = model(input.unsqueeze(0))166score_map = output.data.cpu()167preds = decode_preds(168score_map,169[torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2])],170[1.5],171score_map.shape[2:4],172)173174preds = preds.squeeze(0)175landmarks = preds.data.cpu().detach().numpy()176# draw landmarks177for k, landmark in enumerate(landmarks, 1):178landmarks_img = cv2.circle(179landmarks_img,180center=(landmark[0], landmark[1]),181radius=3,182color=(0, 0, 255),183thickness=-1,184)185# draw landmarks' labels186landmarks_img = cv2.putText(187img=landmarks_img,188text=str(k),189org=(int(landmark[0]) + 5, int(landmark[1]) + 5),190fontFace=cv2.FONT_HERSHEY_SIMPLEX,191fontScale=0.5,192color=(0, 0, 255),193)194195# show results by drawing predicted landmarks and their labels196cv2.imshow("image with landmarks", landmarks_img)197198# get chosen landmarks 2-16, 30 as destination points199# note that landmarks numbering starts from 0200dst_pts = np.array(201[202landmarks[1],203landmarks[2],204landmarks[3],205landmarks[4],206landmarks[5],207landmarks[6],208landmarks[7],209landmarks[8],210landmarks[9],211landmarks[10],212landmarks[11],213landmarks[12],214landmarks[13],215landmarks[14],216landmarks[15],217landmarks[29],218],219dtype="float32",220)221222# load mask annotations from csv file to source points223mask_annotation = os.path.splitext(os.path.basename(args.mask_image))[0]224mask_annotation = os.path.join(225os.path.dirname(args.mask_image), mask_annotation + ".csv",226)227228with open(mask_annotation) as csv_file:229csv_reader = csv.reader(csv_file, delimiter=",")230src_pts = []231for i, row in enumerate(csv_reader):232# skip head or empty line if it's there233try:234src_pts.append(np.array([float(row[1]), float(row[2])]))235except ValueError:236continue237src_pts = np.array(src_pts, dtype="float32")238239# overlay with a mask only if all landmarks have positive coordinates:240if (landmarks > 0).all():241# load mask image242mask_img = cv2.imread(args.mask_image, cv2.IMREAD_UNCHANGED)243mask_img = mask_img.astype(np.float32)244mask_img = mask_img / 255.0245246# get the perspective transformation matrix247M, _ = cv2.findHomography(src_pts, dst_pts)248249# transformed masked image250transformed_mask = cv2.warpPerspective(251mask_img,252M,253(result.shape[1], result.shape[0]),254None,255cv2.INTER_LINEAR,256cv2.BORDER_CONSTANT,257)258259# mask overlay260alpha_mask = transformed_mask[:, :, 3]261alpha_image = 1.0 - alpha_mask262263for c in range(0, 3):264result[:, :, c] = (265alpha_mask * transformed_mask[:, :, c]266+ alpha_image * result[:, :, c]267)268269# display the resulting frame270cv2.imshow("image with mask overlay", result)271272# waiting for the escape button to exit273k = cv2.waitKey(1)274if k == 27:275break276277# when everything done, release the capture278capture.release()279cv2.destroyAllWindows()280281282if __name__ == "__main__":283main()284285286