-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect.py
178 lines (151 loc) · 6.83 KB
/
detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import argparse
import os
import tensorflow as tf
from tensorflow import keras
import cv2
import numpy as np
import matplotlib.pyplot as plt
from utils.general import download_and_unzip, run_inference
from skimage.io import imread_collection
from collections import Counter
import time
from pathlib import Path
from pymediainfo import MediaInfo
default_model = './data/yolov5s_saved_model/'
window_name = 'Yolov5 object detection'
detection_folder = './detections/'
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
def run_model(model, image_BGR, prob, thres):
blob = cv2.dnn.blobFromImage(image_BGR, 1 / 255.0, (640, 640), swapRB=True, crop=False)
blob = np.transpose(blob, (0, 2, 3, 1))
pred = model(blob)
h, w = (image_BGR.shape[:2])
return run_inference(pred, prob, thres, h, w)
def print_results(results, class_numbers, labels, calctime):
counter = 1
if len(results) > 0:
print(f"\nDETECTION DONE (in %.2f ms): \n----------------\n" % calctime)
for i in results.flatten():
print('Object {0}: {1}'.format(counter, labels[int(class_numbers[i])]))
counter += 1
else:
print('No objects detected.')
def draw_bounding_boxes(frame, results, bounding_boxes, confidences, class_numbers, labels, colours):
if len(results) > 0:
for i in results.flatten():
colour_box = colours[class_numbers[i]].tolist()
# draw rectangle
xmin, ymin = bounding_boxes[i][0], bounding_boxes[i][1]
xmax, ymax= bounding_boxes[i][2], bounding_boxes[i][3]
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), colour_box, 2)
# draw text
text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],confidences[i])
cv2.putText(frame, text_box_current, (xmin, ymin - 5), cv2.FONT_HERSHEY_COMPLEX, 0.7, colour_box, 2)
def run_image(image, to_save_path, model, labels, prob, thres, visualize, colours):
t1 = time.time()
results, bounding_boxes, confidences, class_numbers = run_model(model, image, prob, thres)
t2 = time.time() - t1
print_results(results, class_numbers, labels, t2)
if(visualize):
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
draw_bounding_boxes(image_rgb, results, bounding_boxes, confidences, class_numbers, labels, colours)
h, w = (image_rgb.shape[:2])
cv2.resizeWindow(window_name, h, w)
cv2.imshow(window_name, image_rgb)
cv2.imwrite(to_save_path, image_rgb)
cv2.waitKey(0)
cv2.destroyAllWindows()
def run_video(cap, camera, model, labels, prob, thres, visualize, colours):
# Check if video opened successfully
if (cap.isOpened()== False):
print("Error opening video stream or file")
while(cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if(camera):
frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
if ret == True:
# Apply the model on the frame
t1 = time.time()
results, bounding_boxes, confidences, class_numbers = run_model(model, frame, prob, thres)
t2 = time.time() - t1
print_results(results, class_numbers, labels, t2)
if(visualize):
draw_bounding_boxes(frame, results, bounding_boxes, confidences, class_numbers, labels, colours)
# Display the resulting frame
t3 = time.time() - t1
fps = "FPS: {fps:.2f}".format(fps = 1.0 / t3)
cv2.putText(frame, fps, (10,25), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 2)
h, w = (frame.shape[:2])
cv2.imshow(window_name, frame)
# Press Q on keyboard to exit
if cv2.waitKey(25) & 0xFF == ord('q'):
break
# Break the loop
else:
break
# When everything done, release the video capture object
cap.release()
# Closes all the frames
cv2.destroyAllWindows()
def main(
saved_model=None,
labels='./data/coco.names',
source='./data/images/',
prob=0.7,
thres=0.3,
visualize=True
):
# Load model:
if(saved_model is None):
if(os.path.isdir(default_model)):
print('No saved model is given. Using default Yolov5s coco dataset.')
else:
print('No saved model is given. Downloading the Yolov5s coco dataset.')
download_and_unzip('https://github.com/Jeremy-vdw/Yolov5-tf2/releases/download/models/yolov5s_saved_model.zip', './data/')
saved_model = default_model
model = tf.saved_model.load(saved_model)
# Loading labels
with open(labels) as f:
labels = [line.strip() for line in f]
colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')
#make detections folder
if not os.path.isdir(detection_folder):
os.mkdir(detection_folder)
if(os.path.isdir(source)):
##folder is used.
pngs = source + '/*.png'
jpgs = source + '/*.jpg'
image_list = imread_collection([pngs, jpgs])
for i, image in enumerate(image_list):
run_image(image, (detection_folder + Path(image_list.files[i]).name), model, labels,
prob, thres, visualize, colours)
elif(os.path.isfile(source)):
## file is used.
fileInfo = MediaInfo.parse(source)
for track in fileInfo.tracks:
if track.track_type == "Image":
image = cv2.imread(source)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
run_image(image, (detection_folder + Path(source).name), model, labels,
prob, thres, visualize, colours)
elif track.track_type == "Video":
cap = cv2.VideoCapture(source)
run_video(cap, False, model, labels, prob, thres, visualize, colours)
elif(source == '0'):
## camera is used.
cap = cv2.VideoCapture(0)
run_video(cap, True, model, labels, prob, thres, visualize, colours)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--saved_model', type=str, help='location where saved model is stored.')
parser.add_argument('--labels', type=str, default='./data/coco.names', help='location where dataset.names is stored.')
parser.add_argument('--source', type=str, default='0', help='location where images or video is stored. Use 0 for webcam.')
parser.add_argument('--prob', type=float, default=0.7, help='minimum probability to eliminate weak predictions.')
parser.add_argument('--thres', type=float, default=0.3, help='setting threshold for filtering weak bounding boxes with NMS.')
parser.add_argument('--visualize', type=bool, default=True, help='draw bounding boxes or not.')
opt = parser.parse_args()
return opt
if __name__ == "__main__":
opt = parse_opt()
main(**vars(opt))