Yolov5_ObjectDetector.py

# -*- coding: utf-8 -*-
"""Untitled9.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1tEvpRdofXczFtES7g-NI9bea5yTa2ofI
"""

import os
import time

from collections import Counter


import cv2
import numpy as np


# YOLOv5 PyTorch HUB Inference (DetectionModels only)
import torch
import ultralytics

class Yolov5_ObjectDetector:

    """
    Object detection class using YOLOv5 model.
    
    Attributes:
        model_name (str): Name of the YOLOv5 model to be used.
        labels (list): List of class labels.
        colors (list): List of colors to be used for drawing bounding boxes.
        conf (float, optional): Minimum confidence score required to detect an object. Default is 0.25.
        iou (float, optional): Minimum intersection over union (IoU) required between two detected boxes
            to consider them as separate objects. Default is 0.45.
        classes (list, optional): List of classes to be detected. If None, all classes will be detected. 
            Default is None.
        max_det (int, optional): Maximum number of detections per image. Default is 1000.
        
    Methods:
        print_results(results):
            Prints the detected objects with their corresponding class names and their counts.
            
        run_img_detection(img, verbose=True):
            Runs object detection on a given image and returns the detected objects in the form of an array.
            
        run_img_detection_from_path(img_path, verbose=True):
            Runs object detection on a given image file and returns the detected objects in the form of an array.
            
        print_detections_on_image(detections, img):
            Draws bounding boxes and labels on the image for the detected objects.
            
        save_img_detection(img, save_dir, file_name):
            Saves the image with the detected objects in the given directory with the given filename.
            
        run_multiple_imgs_detection_from_path(images_path, save_dir):
            Runs object detection on multiple images in the given directory and saves the resulting images
            in their corresponding folders in the given save directory.
            
        run_video_detection_from_path(video_path, save_dir, output_FPS=15, output_format='.avi', verbose=False):
            Runs object detection on a given video file and saves the resulting video in the given directory
            with the given output format.
    """

    def __init__(self, model_name, labels, colors,
                 conf=0.25, iou=0.45,
                 classes=None, max_det=1000):
        
        """
    Constructor for the Yolov5_ObjectDetector class. Initializes the object detector using the specified YOLOv5 model.

    Parameters:
    - model_name: str
        Name of the YOLOv5 model to use. Must be one of the models available in the 'ultralytics/yolov5' repository.
    - labels: List[str]
        List of class labels used by the model, in the order they appear in the model's output tensor.
    - colors: List[Tuple[int, int, int]]
        List of RGB color tuples used to draw bounding boxes and labels for each detected object.
    - conf: float, optional (default=0.25)
        Detection confidence threshold. Objects with a confidence score lower than this will not be detected.
    - iou: float, optional (default=0.45)
        Intersection over union (IoU) threshold used to filter overlapping bounding boxes.
    - classes: List[int], optional (default=None)
        List of indices of the object classes to detect. If None, all classes will be detected.
    - max_det: int, optional (default=1000)
        Maximum number of objects to detect per image. If there are more objects than this, only the highest-scoring objects will be returned.
    """
        # constructor code here
        self.model = torch.hub.load('ultralytics/yolov5', model_name, force_reload=True, verbose = False)
        self.colors = colors
        self.model_name = model_name
        self.labels = labels
        self.model.conf = conf
        self.model.iou = iou
        self.model.classes = classes
        self.model.max_det = max_det

    
    def print_results(self, results):
        """
    Print the results of object detection on an image, including the detected object labels and their counts.

    Parameters:
    - results: List[List[float]]
        A list of lists, where each inner list contains the coordinates and confidence score for a detected object.

    Returns:
    - None

    Prints:
    - The name of the YOLOv5 model used for object detection.
    - The number of objects detected for each object class present in the results, in the format "label : count".
    """
        
        # get the list of labels present in the results vector
        labels_list = [self.labels[int(r[5])] for r in results]
        # count the occurences of each label
        counter_dict = Counter(labels_list)

        print(f"model : {self.model_name}")
        print("Detected objects : ", end = '')
        for key in counter_dict.keys():
            print(f"{key} : {counter_dict[key]} " , end = ' ')
        
        print()


    def run_img_detection(self, img : np.ndarray, verbose = True):

        """
    Run object detection on a given input image using the YOLOv5 model.

    Parameters:
    - img: np.ndarray
        A 3-dimensional numpy array with 3 colors representing the input image.
    - verbose: bool, optional (default=True)
        If True, print additional information about the detected objects and the image.

    Returns:
    - results: np.ndarray
        A numpy array containing the coordinates and class indices of the detected objects.

    Raises:
    - ValueError: if the input image is not a 3-dimensional numpy array with 3 colors.
    """
        if img.ndim != 3 or img.shape[2]!=3 :
            raise ValueError("input img should be a 3-dimensional numpy array with 3 colors")

        results = self.model(img)
        results = np.array(results.xyxy[0])
        if verbose :
            print("----------------------------")
        
            print(f"image shape : {img.shape}")
            self.print_results(results)

        return(results)

    
    def run_img_detection_from_path(self, img_path, verbose = True):
        """
    Runs object detection on an image file and returns the detected objects and their labels.

    Parameters:
    - img_path: str
        Path to the input image file.
    - verbose: bool, optional (default=True)
        If True, print information about the detected objects.

    Returns:
    - results: np.ndarray
        A 2D numpy array containing the detected objects and their labels.
    """
        if not os.path.exists(img_path):
            raise ValueError(f"File path {img_path} does not exist.")
        
        img = cv2.imread(img_path)
        img_name = os.path.basename(img_path)

        if verbose : 
            print("\n----------------------------")
            print(f"{img_name} :")

        results = self.run_img_detection(img, verbose)        
        return (results)


    def print_detections_on_image(self, detections: np.ndarray, img: np.ndarray):

        """
    Prints bounding boxes and class labels on the input image for each detected object.

    Parameters:
    - detections: np.ndarray
        Numpy array of shape (num_detections, 6) containing the detected objects. Each row contains the coordinates
        of a bounding box in the format [xmin, ymin, xmax, ymax, confidence, class_id].
    - img: np.ndarray
        Input image as a 3-dimensional numpy array with 3 colors.

    Returns:
    - im: np.ndarray
        Output image as a 3-dimensional numpy array with bounding boxes and class labels for each detected object.
    """
        if img.ndim != 3 :
            raise ValueError("input img should be a 3-dimensional numpy array")

        # calculate the bounding box thickness based on the image width and height
        bbx_thickness = (img.shape[0] + img.shape[1]) // 500

        for r in detections:
            # Extract object class and confidence score
            score = r[4] * 100
            r = r.astype(int)
            class_id = r[5]

            # Calculate font scale based on object size
            fontScale = (((r[2] - r[0]) / img.shape[0]) + ((r[3] - r[1]) / img.shape[1])) / 2 * 1.5

            # Draw bounding box, a centroid and label on the image
            im = cv2.putText(img,f"{self.labels[class_id]} {score:,.2f}%" , 
                            (r[0],r[1] - 5), cv2.FONT_HERSHEY_COMPLEX, 
                        fontScale,  self.colors[class_id], 1, cv2.LINE_AA)
            
            im = cv2.rectangle(im, (r[0],r[1]), (r[2],r[3]), self.colors[class_id], bbx_thickness)

            center_coordinates = ((r[2] + r[0])//2, (r[3] + r[1]) // 2)
            im =  cv2.circle(im, center_coordinates, 2 , (0,0,255), -1)

        return im


    def save_img_detection(self, img, save_dir, file_name ):
        """
    Saves the image with the detected objects overlaid as a new image file.

    Parameters:
    - img: np.ndarray
        The input image with the detected objects overlaid.
    - save_dir: str
        The directory where the resulting image will be saved.
    - file_name: str
        The name of the resulting image file.

    Returns: None
    """

        if not os.path.exists(save_dir):
            raise ValueError(f"File path {save_dir} does not exist.")

        # save resulting images in their corresponding folders
        save_file = os.path.join(save_dir, self.model_name, file_name)
        print(f"Saving Detection Results of {file_name} to {save_file}")
        cv2.imwrite(save_file ,img)
        
        
    def run_multiple_imgs_detection_from_path(self, images_path, save_dir):
        """
    Runs object detection on multiple images located in the specified folder and saves the resulting image with the
    bounding boxes and labels to the specified folder.

    Args:
        images_path (str): Path to the folder containing the images to process.
        save_dir (str): Path to the folder where the results will be saved.

    Raises:
        ValueError: If the specified folder does not exist.

    Returns:
        None
    """
    
        for img_name in os.listdir(images_path):
            img_path = os.path.join(images_path, img_name)
            img = cv2.imread(img_path)
            results = self.run_img_detection_from_path(img_path)
            img = self.print_detections_on_image(results, img)
            self.save_img_detection(img, save_dir, img_name)

    def run_video_detection_from_path(self, video_path, save_dir, output_FPS = 15, 
                                      output_format = '.avi', verbose = False):


        """
        Runs object detection on each frame of the video located at `video_path` and saves the resulting video
        in `save_dir`. The output video file format can be specified with `output_format` (default is '.avi') 
        and the output FPS can be specified with `output_FPS` (default is 15). The bounding box thickness 
        of the detected objects is based on the dimensions of the video. 
        
        Args:
            video_path (str): The path to the input video file.
            save_dir (str): The directory where the resulting video file will be saved.
            output_FPS (int, optional): The number of frames per second in the output video file. 
                Default is 15.
            output_format (str, optional): The file format of the output video file. Default is '.avi'.
            verbose (bool, optional): Whether to print additional information. Default is False.

        Raises:
            ValueError: If `video_path` does not exist.
        """

        
        if not os.path.exists(video_path):
            raise ValueError(f"File path {video_path} does not exist.")
        
       # Open input video file
        cap = cv2.VideoCapture(video_path)
        print("\n----------------------------")

        # Get video name 
        vid_name = os.path.basename(video_path)
        print(vid_name, end = ' : ')

        # Get frame dimensions and print information about input video file
        width  = int(cap.get(3) )  # get `width` 
        height = int(cap.get(4) )  # get `height` 
        print((width,height))
        print(video_path)
        print(self.model_name)

        # Set bounding box thickness based on video dimensions
        bbx_thickness = (height + width) // 500

        # Define output video file
        save_file = os.path.join(save_dir, vid_name[:-4] + output_format)
        print('saving to :' + save_file)

        # define an output VideoWriter  object
        out = cv2.VideoWriter(save_file,
                            cv2.VideoWriter_fourcc(*"MJPG"),
                            output_FPS,(width,height))

        # Check if the video is opened correctly
        if not cap.isOpened():
            print("Error opening video stream or file")

        # Read the video frames
        while cap.isOpened():
            ret, frame = cap.read()

            # If the frame was not read successfully, break the loop
            if not ret:
                print("Error reading frame")
                break

            # Run object detection on the frame and calculate FPS
            beg = time.time()
            results = self.run_img_detection(frame, verbose= False)
            fps = 1 / (time.time() - beg)

            # Display FPS on frame
            frame = cv2.putText(frame,f"FPS : {fps:,.2f}" , 
                                (5,15), cv2.FONT_HERSHEY_COMPLEX, 
                            0.5,  (0,0,255), 1, cv2.LINE_AA)
            
            # Display detections on frame
            frame = self.print_detections_on_image(results, frame)

            # append frame to the video file
            out.write(frame)
            
            # the 'q' button is set as the
            # quitting button you may use any
            # desired button of your choice

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # After the loop release the cap 
        cap.release()
        out.release()