-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
executable file
·139 lines (110 loc) · 5.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from utils import *
import numpy as np
from utils.metrics import *
import argparse
import onnxruntime as ort
from unidecode import unidecode
import cv2
from icecream import ic
from utils.audio import *
import os
from pygame import mixer
import time
def get_args():
parser = argparse.ArgumentParser(description='Face Recognition')
parser.add_argument('-a', '--audio-dir', type=str, default='audios')
parser.add_argument('-u', '--url', type=str, default='')
parser.add_argument('-e', '--enlarge', type=int, default=20)
parser.add_argument('-s', '--save-embeddings', help='Directory to to load face embeddings', type=str, default='embedding_data')
args = parser.parse_args()
return args
def preprocess(face):
face = cv2.resize(face, (112, 112))
face = face.astype(np.float32)
face /= 255.0
face = (face - 0.5) / 0.5
face = face.transpose(2, 0, 1)
face = face[np.newaxis]
return face
class Detector:
def __init__(self, enlarge, audios, save_embeddings, threshold=0.5, use_cuda=False):
self.emb_file = osp.join(save_embeddings, 'embed_faces.pkl')
self.name_file = osp.join(save_embeddings, 'labels.pkl')
self.known_face_embs = np.squeeze(load_pickle(self.emb_file), axis=1)
self.known_names = load_pickle(self.name_file)
self.thr = threshold
self.enlarge = enlarge
self.ort_sess = ort.InferenceSession('checkpoints/webface_r50.onnx', providers=['CUDAExecutionProvider'])
self.audio_dir = audios
# load model
self.faceModel = cv2.dnn.readNetFromCaffe('checkpoints/res10_300x300_ssd_iter_140000.prototxt',
caffeModel='checkpoints/res10_300x300_ssd_iter_140000.caffemodel')
if use_cuda:
self.faceModel.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.faceModel.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
def processFrame(self):
bboxes = []
blob = cv2.dnn.blobFromImage(self.img, 1.0, (300, 300), (104.0, 107.0, 123.0), swapRB=False, crop=False)
self.faceModel.setInput(blob)
# detect the faces
predictions = self.faceModel.forward()
for i in range(0, predictions.shape[2]):
if predictions[0, 0, i, 2] > self.thr:
bbox = predictions[0, 0, i, 3:7] * np.array([self.width, self.height, self.width, self.height])
# bbox = predictions[0, 0, i, 3:7]
xmin, ymin, xmax, ymax = bbox.astype('int') # xmin, ymin, xmax, ymax
bboxes.append([xmin-self.enlarge, ymin-self.enlarge, xmax+self.enlarge, ymax+self.enlarge])
return bboxes
def checkInVideo(self, mode):
pre_name = None
cap = cv2.VideoCapture(mode)
if not cap.isOpened():
print('Failed to open video')
return
face = None
while cap.isOpened():
ret, self.img = cap.read()
self.height, self.width = self.img.shape[:2]
while ret:
bboxes = self.processFrame()
for bbox in bboxes:
face = self.img[bbox[1]:bbox[3],
bbox[0]:bbox[2]]
if face.shape[0] == 0:
break
# ic(face.shape)
# face_pr = preprocess(face)
face_pr = preprocess(face)
# using face recognition model
input_name = self.ort_sess.get_inputs()[0].name
emb = self.ort_sess.run([], {input_name: face_pr})[0]
max_sim, name = most_similarity(self.known_face_embs, emb, self.known_names)
if max_sim < 0.25:
name = 'Người lạ'
if pre_name != name:
play(self.audio_dir, name)
pre_name = name
name = unidecode(name)
cv2.putText(self.img, name+f' {max_sim:.2f}', (bbox[0], bbox[1] - 20), cv2.FONT_HERSHEY_PLAIN,
1, (255, 0, 255), 2)
cv2.rectangle(self.img, bbox[:2], bbox[2:4], (255, 0, 255), 2)
if face is not None and face.shape[0] != 0:
cv2.imshow('Face', face)
cv2.imshow('Check In Camera', self.img)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
ret, self.img = cap.read()
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
cfg = get_args()
enlarge = cfg.enlarge
audio_dir = cfg.audio_dir
save_embeddings = cfg.save_embeddings
url = cfg.url
url = 0 if len(url) == 0 else url
if not osp.exists(audio_dir):
os.mkdir(audio_dir)
det = Detector(enlarge, audio_dir, save_embeddings)
det.checkInVideo(url)