From d0a3324111c56976baec4a5e5507ad05193fa4a1 Mon Sep 17 00:00:00 2001 From: TomDarmon <36815861+TomDarmon@users.noreply.github.com> Date: Thu, 30 Nov 2023 14:10:27 +0100 Subject: [PATCH] fea: create example forlder (#27) Co-authored-by: TomDarmon Co-authored-by: tristanpepinartefact --- .gitignore | 10 +- README.md | 10 + bin/download_sample_sequences.sh | 22 -- docs/index.md | 20 +- lib/.gitkeep => examples/norfair/README.md | 0 examples/norfair/lib/.gitkeep | 0 {lib => examples/norfair/lib}/bbox/utils.py | 0 .../norfair/lib}/norfair_helper/utils.py | 3 +- .../norfair/lib}/norfair_helper/video.py | 3 +- {lib => examples/norfair/lib}/sequence.py | 0 examples/norfair/norfair_starter_kit.ipynb | 284 ++++++++++++++ examples/norfair/requirements.txt | 1 + examples/trackreid/data/.gitkeep | 0 examples/trackreid/frames/.gitkeep | 0 examples/trackreid/requirements.txt | 5 + examples/trackreid/starter_kit_reid.ipynb | 356 ++++++++++++++++++ examples/trackreid/utils.py | 70 ++++ examples/trackreid/videos/.gitkeep | 0 18 files changed, 753 insertions(+), 31 deletions(-) delete mode 100644 bin/download_sample_sequences.sh rename lib/.gitkeep => examples/norfair/README.md (100%) create mode 100644 examples/norfair/lib/.gitkeep rename {lib => examples/norfair/lib}/bbox/utils.py (100%) rename {lib => examples/norfair/lib}/norfair_helper/utils.py (99%) rename {lib => examples/norfair/lib}/norfair_helper/video.py (99%) rename {lib => examples/norfair/lib}/sequence.py (100%) create mode 100644 examples/norfair/norfair_starter_kit.ipynb create mode 100644 examples/norfair/requirements.txt create mode 100644 examples/trackreid/data/.gitkeep create mode 100644 examples/trackreid/frames/.gitkeep create mode 100644 examples/trackreid/requirements.txt create mode 100644 examples/trackreid/starter_kit_reid.ipynb create mode 100644 examples/trackreid/utils.py create mode 100644 examples/trackreid/videos/.gitkeep diff --git a/.gitignore b/.gitignore index fe7c188..d99842d 100644 --- a/.gitignore +++ b/.gitignore @@ -138,10 +138,12 @@ secrets/* .DS_Store -# Data ignore everythin data/detections and data/frames -data/detections/* -data/frames/* +# Ignore everything in data/ and large files +data/ *.mp4 -*.txt +*.pt # poetry poetry.lock + + +*.png \ No newline at end of file diff --git a/README.md b/README.md index f65be64..37a190e 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,16 @@ This Git repository is dedicated to the development of a Python library aimed at - [Documentation](#documentation) - [Repository Structure](#repository-structure) +## trackreid + bytetrack VS bytetrack + +

+ + +

+ + + + ## Installation First, install poetry: diff --git a/bin/download_sample_sequences.sh b/bin/download_sample_sequences.sh deleted file mode 100644 index b7e5b14..0000000 --- a/bin/download_sample_sequences.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -N_SEQUENCES=5 - -sequences_detections=$(gsutil ls gs://data-track-reid/detections | head -$N_SEQUENCES) -sequences_frames=$(gsutil ls gs://data-track-reid/frames | head -$N_SEQUENCES) - -# remove first sequence which is the bucket name -sequences_detections=$(echo "$sequences_detections" | tail -n +2) -sequences_frames=$(echo "$sequences_frames" | tail -n +2) - -mkdir -p data/detections -mkdir -p data/frames - -# download the sequences to data/detections and data/frames -for sequence in $sequences_detections; do - gsutil -m cp -r $sequence data/detections -done - -for sequence in $sequences_frames; do - gsutil -m cp -r $sequence data/frames -done diff --git a/docs/index.md b/docs/index.md index df25491..88989ef 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,3 +1,21 @@ # Welcome to the documentation -For more information, make sure to check the [Material for MkDocs documentation](https://squidfunk.github.io/mkdocs-material/getting-started/) +This repository aims to implement a modular library for correcting tracking results. By tracking, we mean: + +- On a sequence of images, an initial detection algorithm (e.g., yolo, fast-RCNN) is applied upstream. +- A tracking algorithm (e.g., Bytetrack, Strongsort) is then applied to the detections with the aim of assigning a unique ID to each different object and tracking these objects, i.e., maintaining the unique ID throughout the image sequence. + +Overall, state-of-the-art (SOTA) tracking algorithms perform well in cases of constant speed movements, with detections not evolving (shape of bounding boxes relatively constant), which does not fit many real use cases. In practice, we end up with a lot of ID switches, and far too many unique IDs compared to the number of different objects. Therefore, we propose here a library for re-matching IDs, based on a tracking result, and allowing to reassign object IDs to ensure uniqueness. + +Here is an example of the track reid library, used to correct jungling balls tracking results on a short video. + +

+
+ Bytetrack x yolov8l, 42 tracked objects +

+

+
+ Bytetrack x yolov8l + track-reid, 4 tracked objects +

+ +For more insight on how to get started, please refer to [this guide for users](quickstart_user.md), or [this guide for developers](quickstart_dev.md). diff --git a/lib/.gitkeep b/examples/norfair/README.md similarity index 100% rename from lib/.gitkeep rename to examples/norfair/README.md diff --git a/examples/norfair/lib/.gitkeep b/examples/norfair/lib/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/lib/bbox/utils.py b/examples/norfair/lib/bbox/utils.py similarity index 100% rename from lib/bbox/utils.py rename to examples/norfair/lib/bbox/utils.py diff --git a/lib/norfair_helper/utils.py b/examples/norfair/lib/norfair_helper/utils.py similarity index 99% rename from lib/norfair_helper/utils.py rename to examples/norfair/lib/norfair_helper/utils.py index aff22c8..7e95f5a 100644 --- a/lib/norfair_helper/utils.py +++ b/examples/norfair/lib/norfair_helper/utils.py @@ -2,9 +2,8 @@ import cv2 import numpy as np -from norfair import Detection, get_cutout - from lib.bbox.utils import rescale_bbox, xy_center_to_xyxy +from norfair import Detection, get_cutout def yolo_to_norfair_detection( diff --git a/lib/norfair_helper/video.py b/examples/norfair/lib/norfair_helper/video.py similarity index 99% rename from lib/norfair_helper/video.py rename to examples/norfair/lib/norfair_helper/video.py index d84e09d..f99d5d8 100644 --- a/lib/norfair_helper/video.py +++ b/examples/norfair/lib/norfair_helper/video.py @@ -1,9 +1,8 @@ import cv2 import numpy as np -from norfair import Tracker, draw_boxes - from lib.norfair_helper.utils import compute_embeddings, yolo_to_norfair_detection from lib.sequence import Sequence +from norfair import Tracker, draw_boxes def generate_tracking_video( diff --git a/lib/sequence.py b/examples/norfair/lib/sequence.py similarity index 100% rename from lib/sequence.py rename to examples/norfair/lib/sequence.py diff --git a/examples/norfair/norfair_starter_kit.ipynb b/examples/norfair/norfair_starter_kit.ipynb new file mode 100644 index 0000000..c6e743f --- /dev/null +++ b/examples/norfair/norfair_starter_kit.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WIP NOT WORKING" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Value proposition of norfair" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Norfair is a customizable lightweight Python library for real-time multi-object tracking.\n", + "Using Norfair, you can add tracking capabilities to any detector with just a few lines of code.\n", + "\n", + "It means you won't need a SOTA Tracker you can use a basic Tracker with a Kalmann Filter and add the custom logic you want." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports and setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys; sys.path.append('.')\n", + "import os\n", + "\n", + "import cv2\n", + "from norfair import Tracker, OptimizedKalmanFilterFactory\n", + "\n", + "from lib.sequence import Sequence\n", + "from lib.norfair_helper.video import generate_tracking_video\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to test this code on your detection and frames you can use the following code if you structure the data as follows:\n", + "\n", + "```\n", + "data/\n", + " ├── detection/\n", + " │ └── sequence_1/\n", + " │ └── detections_1.txt\n", + " └── frames/\n", + " └── sequence_1/\n", + " └── frame_1.jpg\n", + "```\n", + "\n", + "Where the detections.txt file is in the following format scaled between 0 and 1:\n", + "\n", + "```\n", + "class_id x_center y_center width height confidence\n", + "```\n", + "\n", + "If this is not the case, you'll need to adapt this code to your data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_PATH = \"../data\"\n", + "DETECTION_PATH = f\"{DATA_PATH}/detections\"\n", + "FRAME_PATH = f\"{DATA_PATH}/frames\"\n", + "VIDEO_OUTPUT_PATH = \"private\"\n", + "\n", + "SEQUENCES = os.listdir(FRAME_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sequence_frames(sequence):\n", + " frames = os.listdir(f\"{FRAME_PATH}/{sequence}\")\n", + " frames = [os.path.join(f\"{FRAME_PATH}/{sequence}\", frame) for frame in frames]\n", + " frames.sort()\n", + " return frames\n", + "\n", + "def get_sequence_detections(sequence):\n", + " detections = os.listdir(f\"{DETECTION_PATH}/{sequence}\")\n", + " detections = [os.path.join(f\"{DETECTION_PATH}/{sequence}\", detection) for detection in detections]\n", + " detections.sort()\n", + " return detections\n", + "\n", + "frame_path = get_sequence_frames(SEQUENCES[3])\n", + "test_sequence = Sequence(frame_path)\n", + "test_sequence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_sequence" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic Usage of Norfair" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tracker\n", + "\n", + "Norfair tracker object is the customizable object that will track detections.\n", + "Norfair expects a distance function that will serve as a metric to match objects between each detection. You can create your own distance metric or use one of the built-in ones such as euclidian distance, iou or many more." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize a tracker with the distance function\n", + "basic_tracker = Tracker(\n", + " distance_function=\"mean_euclidean\",\n", + " distance_threshold=40,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic tracking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "video_path = generate_tracking_video(\n", + " sequence=test_sequence,\n", + " tracker=basic_tracker,\n", + " frame_size=(2560, 1440),\n", + " output_path=os.path.join(VIDEO_OUTPUT_PATH, \"basic_tracking.mp4\"),\n", + " add_embedding=False,\n", + ")\n", + "video_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced tracking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def always_match(new_object, unmatched_object):\n", + " return 0 # ALWAYS MATCH\n", + "\n", + "\n", + "def embedding_distance(matched_not_init_trackers, unmatched_trackers):\n", + " snd_embedding = unmatched_trackers.last_detection.embedding\n", + "\n", + " # Find last non-empty embedding if current is None\n", + " if snd_embedding is None:\n", + " snd_embedding = next((detection.embedding for detection in reversed(unmatched_trackers.past_detections) if detection.embedding is not None), None)\n", + "\n", + " if snd_embedding is None:\n", + " return 1 # No match if no embedding is found\n", + "\n", + " # Iterate over past detections and calculate distance\n", + " for detection_fst in matched_not_init_trackers.past_detections:\n", + " if detection_fst.embedding is not None:\n", + " distance = 1 - cv2.compareHist(snd_embedding, detection_fst.embedding, cv2.HISTCMP_CORREL)\n", + " # If similar a tiny bit similar, we return the distance to the tracker\n", + " if distance < 0.9:\n", + " return distance\n", + "\n", + " return 1 # No match if no matching embedding is found between the 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "advanced_tracker = Tracker(\n", + " distance_function=\"sqeuclidean\",\n", + " filter_factory = OptimizedKalmanFilterFactory(R=5, Q=0.05),\n", + " distance_threshold=350, # Higher value means objects further away will be matched\n", + " initialization_delay=12, # Wait 15 frames before an object is starts to be tracked\n", + " hit_counter_max=15, # Inertia, higher values means an object will take time to enter in reid phase\n", + " reid_distance_function=embedding_distance, # function to decide on which metric to reid\n", + " reid_distance_threshold=0.9, # If the distance is below the object is matched\n", + " reid_hit_counter_max=200, #higher values means an object will stay reid phase longer\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "video_path = generate_tracking_video(\n", + " sequence=test_sequence,\n", + " tracker=advanced_tracker,\n", + " frame_size=(2560, 1440),\n", + " output_path=os.path.join(VIDEO_OUTPUT_PATH, \"advance_tracking.mp4\"),\n", + " add_embedding=True,\n", + ")\n", + "video_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "advanced_tracker.total_object_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "track-reid", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/norfair/requirements.txt b/examples/norfair/requirements.txt new file mode 100644 index 0000000..986d702 --- /dev/null +++ b/examples/norfair/requirements.txt @@ -0,0 +1 @@ +norfair diff --git a/examples/trackreid/data/.gitkeep b/examples/trackreid/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/examples/trackreid/frames/.gitkeep b/examples/trackreid/frames/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/examples/trackreid/requirements.txt b/examples/trackreid/requirements.txt new file mode 100644 index 0000000..09a94a3 --- /dev/null +++ b/examples/trackreid/requirements.txt @@ -0,0 +1,5 @@ +git+https://github.com/artefactory-fr/bytetrack.git@main +git+https://github.com/artefactory-fr/track-reid.git@main +opencv-python==4.8.1.78 +ultralytics==8.0.216 +matplotlib==3.8.2 diff --git a/examples/trackreid/starter_kit_reid.ipynb b/examples/trackreid/starter_kit_reid.ipynb new file mode 100644 index 0000000..06f3613 --- /dev/null +++ b/examples/trackreid/starter_kit_reid.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os\n", + "import glob\n", + "import matplotlib.pyplot as plt\n", + "import cv2\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from ultralytics import YOLO" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's try to detect jungling balls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the demo .mp4 video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download the video\n", + "VIDEO_PATH = 'data/jungling_guy.mp4'\n", + "!if [ ! -f $VIDEO_PATH ]; then mkdir -p data && wget https://storage.googleapis.com/track-reid/data/jungling_guy.mp4 -O $VIDEO_PATH; fi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Video\n", + "Video(VIDEO_PATH, width=800)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Video to frames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can run this only once:\n", + "# Transform this VIDEO_PATH into a list of frames in this folder under frames/\n", + "!mkdir -p frames && ffmpeg -i $VIDEO_PATH -vf fps=12 frames/%d.png -hide_banner -loglevel panic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "available_frames = glob.glob(\"frames/*.png\")\n", + "available_frames = sorted(available_frames, key=lambda x: int(x.split(\"/\")[-1].split(\".\")[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Detection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "MODEL_WEIGHTS = \"yolov8m.pt\"\n", + "\n", + "model = YOLO(MODEL_WEIGHTS)\n", + "results = model(available_frames[0])[0]\n", + "\n", + "plt.imshow(cv2.cvtColor(results.plot(), cv2.COLOR_BGR2RGB))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### We will track only sports ball so we need to filter them out\n", + "SPORTS_BALL_CLASS_ID = 32" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bytetracker import BYTETracker\n", + "from bytetracker.basetrack import BaseTrack\n", + "from utils import draw_all_bbox_on_image, yolo_results_to_bytetrack_format, scale_bbox_as_xyxy\n", + "\n", + "from trackreid.reid_processor import ReidProcessor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tracking without trackreid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tracker = BYTETracker(track_thresh= 0.15, track_buffer = 3, match_thresh = 0.85, frame_rate= 12)\n", + "BaseTrack._count = 0\n", + "model = YOLO(MODEL_WEIGHTS, task=\"detect\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "all_tracked_objects = []\n", + "for frame_id, image_filename in enumerate(available_frames):\n", + " img = cv2.imread(image_filename)\n", + " detections = model.predict(img, classes=[SPORTS_BALL_CLASS_ID], conf=0.15, verbose=False)[0]\n", + " detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)\n", + " tracked_objects = tracker.update(detections_bytetrack_format, frame_id)\n", + " if len(tracked_objects) > 0:\n", + " tracked_objects = np.insert(tracked_objects, 0, frame_id, axis=1)\n", + " all_tracked_objects.append(tracked_objects)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### • Create a dataframe with the tracking informations\n", + "\n", + "#### • Rescale the bbox to the original image size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_tracked = pd.DataFrame(np.concatenate(all_tracked_objects), columns=[\"frame_id\", \"x1\", \"y1\", \"x2\", \"y2\", \"track_id\", \"class\", \"confidence\"])\n", + "df_tracked[[\"x1\", \"y1\", \"x2\", \"y2\"]] = df_tracked[[\"x1\", \"y1\", \"x2\", \"y2\"]].apply(\n", + " lambda x: scale_bbox_as_xyxy(x[0:4], detections.orig_shape), axis=1, result_type=\"expand\"\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Show tracking video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fourcc = cv2.VideoWriter_fourcc(*'H264') \n", + "OUTPUT_WITH_BBOX_NO_REID = \"videos/video_bbox_no_reid.mp4\"\n", + "out = cv2.VideoWriter(OUTPUT_WITH_BBOX_NO_REID, fourcc, 12, (1280, 720))\n", + "for frame_id, image_filename in enumerate(available_frames):\n", + " image = cv2.imread(image_filename)\n", + " if frame_id in df_tracked.frame_id.astype('int').values:\n", + " df_current_frame = df_tracked[df_tracked.frame_id == frame_id][[\"x1\", \"y1\", \"x2\", \"y2\", \"track_id\", \"class\", \"confidence\"]].to_numpy()\n", + " image = draw_all_bbox_on_image(image, df_current_frame)\n", + " out.write(image)\n", + "out.release()\n", + "print(\"Video with bounding box without reid is saved at:\", OUTPUT_WITH_BBOX_NO_REID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Number of detected objects: \", len(df_tracked.track_id.unique()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tracking with trackreid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tracker = BYTETracker(track_thresh= 0.15, track_buffer = 3, match_thresh = 0.85, frame_rate= 12)\n", + "BaseTrack._count = 0\n", + "model = YOLO(MODEL_WEIGHTS, task=\"detect\")\n", + "\n", + "reid_processor = ReidProcessor(filter_confidence_threshold=0.15,\n", + " filter_time_threshold=1,\n", + " cost_function_threshold=5000, # max cost to rematch 2 objects\n", + " max_attempt_to_match=1,\n", + " max_frames_to_rematch=200,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "all_tracked_objects = []\n", + "for frame_id, image_filename in enumerate(available_frames):\n", + " detections = model.predict(image_filename, classes=[SPORTS_BALL_CLASS_ID], conf=0.15, verbose=False)[0]\n", + " detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)\n", + " tracked_objects = tracker.update(detections_bytetrack_format, frame_id)\n", + " corrected_tracked_objects = reid_processor.update(tracked_objects, frame_id)\n", + " if len(corrected_tracked_objects) > 0:\n", + " all_tracked_objects.append(corrected_tracked_objects)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_tracked = pd.DataFrame(np.concatenate(all_tracked_objects), columns=[\"frame_id\", \"track_id\", \"class\", \"x1\", \"y1\", \"x2\", \"y2\", \"confidence\", \"mean_confidence\", \"orginal_track_id\"])\n", + "df_tracked[[\"x1\", \"y1\", \"x2\", \"y2\"]] = df_tracked[[\"x1\", \"y1\", \"x2\", \"y2\"]].apply(\n", + " lambda x: scale_bbox_as_xyxy(x[0:4], detections.orig_shape), axis=1, result_type=\"expand\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fourcc = cv2.VideoWriter_fourcc(*'H264') \n", + "OUTPUT_WITH_BBOX_REID = \"videos/video_bbox_with_reid.mp4\"\n", + "out = cv2.VideoWriter(OUTPUT_WITH_BBOX_REID, fourcc, 12, (1280, 720))\n", + "for frame_id, image_filename in enumerate(available_frames):\n", + " image = cv2.imread(image_filename)\n", + " if frame_id in df_tracked.frame_id.astype('int').values:\n", + " df_current_frame = df_tracked[df_tracked.frame_id == frame_id][[\"x1\", \"y1\", \"x2\", \"y2\", \"track_id\", \"class\", \"confidence\"]].to_numpy()\n", + " image = draw_all_bbox_on_image(image, df_current_frame)\n", + " out.write(image)\n", + "out.release()\n", + "print(\"Video with bounding box with reid is saved at:\", OUTPUT_WITH_BBOX_REID)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### You can access informations about the tracked object from the ReidProcessor object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Number of detected objects: \", len(reid_processor.seen_objects))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reid_processor.seen_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reid_processor.nb_corrections" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "track-reid", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "vscode": { + "interpreter": { + "hash": "a7fd834062a85a1fb9d4482d7456bec56e0ff99e4dd054f5e10ff6e3cdc923c6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/trackreid/utils.py b/examples/trackreid/utils.py new file mode 100644 index 0000000..0d45b6b --- /dev/null +++ b/examples/trackreid/utils.py @@ -0,0 +1,70 @@ +import cv2 +import numpy as np + + +def draw_all_bbox_on_image(image, tracking_objects: np.ndarray): + """ + A list of of detections with track id, class id and confidence. + [ + [x, y, x, y, track_id, class_id, conf], + [x, y, x, y, track_id, class_id, conf], + ... + ] + + Plot this on the image with the track id, class id and confidence. + """ + for detection in tracking_objects: + x1, y1, x2, y2, track_id, _, conf = detection + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2) + cv2.putText( + image, + f"{int(track_id)} ({conf:.2f})", + (x1, y1 - 10), + 0, + 1, + (0, 255, 0), + 2, + ) + return image + + +def yolo_results_to_bytetrack_format(detections): + """Transforms YOLO detections into the bytetrack format. + + Args: + detections: A list of YOLO detections. + + Returns: + A list of bytetrack detections. + """ + boxes = detections.numpy().boxes.xyxyn + scores = detections.numpy().boxes.conf + classes = detections.numpy().boxes.cls + return np.stack( + [ + boxes[:, 0], + boxes[:, 1], + boxes[:, 2], + boxes[:, 3], + scores, + classes, + ], + axis=1, + ) + + +def scale_bbox_as_xyxy(bbox: np.ndarray, target_img_size: tuple): + """Scales a bounding box to a target image size. + + Args: + bbox: A bounding box in the format [x, y, x, y]. + target_img_size: The target image size as a tuple (h, W). + + Returns: + The scaled bounding box. + """ + x1, y1, x2, y2 = bbox + h, w = target_img_size + scaled_bbox = np.array([x1 * w, y1 * h, x2 * w, y2 * h]) + return scaled_bbox diff --git a/examples/trackreid/videos/.gitkeep b/examples/trackreid/videos/.gitkeep new file mode 100644 index 0000000..e69de29