-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYolov5_ObjectDetector.py
374 lines (278 loc) · 14 KB
/
Yolov5_ObjectDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# -*- coding: utf-8 -*-
"""Untitled9.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1tEvpRdofXczFtES7g-NI9bea5yTa2ofI
"""
import os
import time
from collections import Counter
import cv2
import numpy as np
# YOLOv5 PyTorch HUB Inference (DetectionModels only)
import torch
import ultralytics
class Yolov5_ObjectDetector:
"""
Object detection class using YOLOv5 model.
Attributes:
model_name (str): Name of the YOLOv5 model to be used.
labels (list): List of class labels.
colors (list): List of colors to be used for drawing bounding boxes.
conf (float, optional): Minimum confidence score required to detect an object. Default is 0.25.
iou (float, optional): Minimum intersection over union (IoU) required between two detected boxes
to consider them as separate objects. Default is 0.45.
classes (list, optional): List of classes to be detected. If None, all classes will be detected.
Default is None.
max_det (int, optional): Maximum number of detections per image. Default is 1000.
Methods:
print_results(results):
Prints the detected objects with their corresponding class names and their counts.
run_img_detection(img, verbose=True):
Runs object detection on a given image and returns the detected objects in the form of an array.
run_img_detection_from_path(img_path, verbose=True):
Runs object detection on a given image file and returns the detected objects in the form of an array.
print_detections_on_image(detections, img):
Draws bounding boxes and labels on the image for the detected objects.
save_img_detection(img, save_dir, file_name):
Saves the image with the detected objects in the given directory with the given filename.
run_multiple_imgs_detection_from_path(images_path, save_dir):
Runs object detection on multiple images in the given directory and saves the resulting images
in their corresponding folders in the given save directory.
run_video_detection_from_path(video_path, save_dir, output_FPS=15, output_format='.avi', verbose=False):
Runs object detection on a given video file and saves the resulting video in the given directory
with the given output format.
"""
def __init__(self, model_name, labels, colors,
conf=0.25, iou=0.45,
classes=None, max_det=1000):
"""
Constructor for the Yolov5_ObjectDetector class. Initializes the object detector using the specified YOLOv5 model.
Parameters:
- model_name: str
Name of the YOLOv5 model to use. Must be one of the models available in the 'ultralytics/yolov5' repository.
- labels: List[str]
List of class labels used by the model, in the order they appear in the model's output tensor.
- colors: List[Tuple[int, int, int]]
List of RGB color tuples used to draw bounding boxes and labels for each detected object.
- conf: float, optional (default=0.25)
Detection confidence threshold. Objects with a confidence score lower than this will not be detected.
- iou: float, optional (default=0.45)
Intersection over union (IoU) threshold used to filter overlapping bounding boxes.
- classes: List[int], optional (default=None)
List of indices of the object classes to detect. If None, all classes will be detected.
- max_det: int, optional (default=1000)
Maximum number of objects to detect per image. If there are more objects than this, only the highest-scoring objects will be returned.
"""
# constructor code here
self.model = torch.hub.load('ultralytics/yolov5', model_name, force_reload=True, verbose = False)
self.colors = colors
self.model_name = model_name
self.labels = labels
self.model.conf = conf
self.model.iou = iou
self.model.classes = classes
self.model.max_det = max_det
def print_results(self, results):
"""
Print the results of object detection on an image, including the detected object labels and their counts.
Parameters:
- results: List[List[float]]
A list of lists, where each inner list contains the coordinates and confidence score for a detected object.
Returns:
- None
Prints:
- The name of the YOLOv5 model used for object detection.
- The number of objects detected for each object class present in the results, in the format "label : count".
"""
# get the list of labels present in the results vector
labels_list = [self.labels[int(r[5])] for r in results]
# count the occurences of each label
counter_dict = Counter(labels_list)
print(f"model : {self.model_name}")
print("Detected objects : ", end = '')
for key in counter_dict.keys():
print(f"{key} : {counter_dict[key]} " , end = ' ')
print()
def run_img_detection(self, img : np.ndarray, verbose = True):
"""
Run object detection on a given input image using the YOLOv5 model.
Parameters:
- img: np.ndarray
A 3-dimensional numpy array with 3 colors representing the input image.
- verbose: bool, optional (default=True)
If True, print additional information about the detected objects and the image.
Returns:
- results: np.ndarray
A numpy array containing the coordinates and class indices of the detected objects.
Raises:
- ValueError: if the input image is not a 3-dimensional numpy array with 3 colors.
"""
if img.ndim != 3 or img.shape[2]!=3 :
raise ValueError("input img should be a 3-dimensional numpy array with 3 colors")
results = self.model(img)
results = np.array(results.xyxy[0])
if verbose :
print("----------------------------")
print(f"image shape : {img.shape}")
self.print_results(results)
return(results)
def run_img_detection_from_path(self, img_path, verbose = True):
"""
Runs object detection on an image file and returns the detected objects and their labels.
Parameters:
- img_path: str
Path to the input image file.
- verbose: bool, optional (default=True)
If True, print information about the detected objects.
Returns:
- results: np.ndarray
A 2D numpy array containing the detected objects and their labels.
"""
if not os.path.exists(img_path):
raise ValueError(f"File path {img_path} does not exist.")
img = cv2.imread(img_path)
img_name = os.path.basename(img_path)
if verbose :
print("\n----------------------------")
print(f"{img_name} :")
results = self.run_img_detection(img, verbose)
return (results)
def print_detections_on_image(self, detections: np.ndarray, img: np.ndarray):
"""
Prints bounding boxes and class labels on the input image for each detected object.
Parameters:
- detections: np.ndarray
Numpy array of shape (num_detections, 6) containing the detected objects. Each row contains the coordinates
of a bounding box in the format [xmin, ymin, xmax, ymax, confidence, class_id].
- img: np.ndarray
Input image as a 3-dimensional numpy array with 3 colors.
Returns:
- im: np.ndarray
Output image as a 3-dimensional numpy array with bounding boxes and class labels for each detected object.
"""
if img.ndim != 3 :
raise ValueError("input img should be a 3-dimensional numpy array")
# calculate the bounding box thickness based on the image width and height
bbx_thickness = (img.shape[0] + img.shape[1]) // 500
for r in detections:
# Extract object class and confidence score
score = r[4] * 100
r = r.astype(int)
class_id = r[5]
# Calculate font scale based on object size
fontScale = (((r[2] - r[0]) / img.shape[0]) + ((r[3] - r[1]) / img.shape[1])) / 2 * 1.5
# Draw bounding box, a centroid and label on the image
im = cv2.putText(img,f"{self.labels[class_id]} {score:,.2f}%" ,
(r[0],r[1] - 5), cv2.FONT_HERSHEY_COMPLEX,
fontScale, self.colors[class_id], 1, cv2.LINE_AA)
im = cv2.rectangle(im, (r[0],r[1]), (r[2],r[3]), self.colors[class_id], bbx_thickness)
center_coordinates = ((r[2] + r[0])//2, (r[3] + r[1]) // 2)
im = cv2.circle(im, center_coordinates, 2 , (0,0,255), -1)
return im
def save_img_detection(self, img, save_dir, file_name ):
"""
Saves the image with the detected objects overlaid as a new image file.
Parameters:
- img: np.ndarray
The input image with the detected objects overlaid.
- save_dir: str
The directory where the resulting image will be saved.
- file_name: str
The name of the resulting image file.
Returns: None
"""
if not os.path.exists(save_dir):
raise ValueError(f"File path {save_dir} does not exist.")
# save resulting images in their corresponding folders
save_file = os.path.join(save_dir, self.model_name, file_name)
print(f"Saving Detection Results of {file_name} to {save_file}")
cv2.imwrite(save_file ,img)
def run_multiple_imgs_detection_from_path(self, images_path, save_dir):
"""
Runs object detection on multiple images located in the specified folder and saves the resulting image with the
bounding boxes and labels to the specified folder.
Args:
images_path (str): Path to the folder containing the images to process.
save_dir (str): Path to the folder where the results will be saved.
Raises:
ValueError: If the specified folder does not exist.
Returns:
None
"""
for img_name in os.listdir(images_path):
img_path = os.path.join(images_path, img_name)
img = cv2.imread(img_path)
results = self.run_img_detection_from_path(img_path)
img = self.print_detections_on_image(results, img)
self.save_img_detection(img, save_dir, img_name)
def run_video_detection_from_path(self, video_path, save_dir, output_FPS = 15,
output_format = '.avi', verbose = False):
"""
Runs object detection on each frame of the video located at `video_path` and saves the resulting video
in `save_dir`. The output video file format can be specified with `output_format` (default is '.avi')
and the output FPS can be specified with `output_FPS` (default is 15). The bounding box thickness
of the detected objects is based on the dimensions of the video.
Args:
video_path (str): The path to the input video file.
save_dir (str): The directory where the resulting video file will be saved.
output_FPS (int, optional): The number of frames per second in the output video file.
Default is 15.
output_format (str, optional): The file format of the output video file. Default is '.avi'.
verbose (bool, optional): Whether to print additional information. Default is False.
Raises:
ValueError: If `video_path` does not exist.
"""
if not os.path.exists(video_path):
raise ValueError(f"File path {video_path} does not exist.")
# Open input video file
cap = cv2.VideoCapture(video_path)
print("\n----------------------------")
# Get video name
vid_name = os.path.basename(video_path)
print(vid_name, end = ' : ')
# Get frame dimensions and print information about input video file
width = int(cap.get(3) ) # get `width`
height = int(cap.get(4) ) # get `height`
print((width,height))
print(video_path)
print(self.model_name)
# Set bounding box thickness based on video dimensions
bbx_thickness = (height + width) // 500
# Define output video file
save_file = os.path.join(save_dir, vid_name[:-4] + output_format)
print('saving to :' + save_file)
# define an output VideoWriter object
out = cv2.VideoWriter(save_file,
cv2.VideoWriter_fourcc(*"MJPG"),
output_FPS,(width,height))
# Check if the video is opened correctly
if not cap.isOpened():
print("Error opening video stream or file")
# Read the video frames
while cap.isOpened():
ret, frame = cap.read()
# If the frame was not read successfully, break the loop
if not ret:
print("Error reading frame")
break
# Run object detection on the frame and calculate FPS
beg = time.time()
results = self.run_img_detection(frame, verbose= False)
fps = 1 / (time.time() - beg)
# Display FPS on frame
frame = cv2.putText(frame,f"FPS : {fps:,.2f}" ,
(5,15), cv2.FONT_HERSHEY_COMPLEX,
0.5, (0,0,255), 1, cv2.LINE_AA)
# Display detections on frame
frame = self.print_detections_on_image(results, frame)
# append frame to the video file
out.write(frame)
# the 'q' button is set as the
# quitting button you may use any
# desired button of your choice
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# After the loop release the cap
cap.release()
out.release()