From 5b5128a429ea600f9086b721e22af2a531361e79 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Thu, 5 Sep 2024 20:02:21 +0600
Subject: [PATCH 01/22] fix data type bug

---
 app/main.py | 59 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 19 deletions(-)

diff --git a/app/main.py b/app/main.py
index 6803b02..18e67ef 100644
--- a/app/main.py
+++ b/app/main.py
@@ -58,7 +58,7 @@ def continue_ui(self):
             self.config.sahi_model_type = st.sidebar.selectbox("Model Architecture:", ["yolov8", "yolov9", "yolov10"])
             self.config.sahi_device = st.sidebar.selectbox("Device:", ["cpu"])
             self.config.sahi_slice_size = st.sidebar.slider("SAHI slice size:", 128, 512, (256, 256))
-            self.config.sahi_overlap_ratio = st.sidebar.slider("SAHI overlap ratio:", 0.1, 0.5, 0.2)
+            self.config.sahi_overlap_ratio = st.sidebar.slider("SAHI overlap ratio:", 0.1, 0.5, (0.2, 0.2))
             self.sahi_config = {
                 'model_type': self.config.sahi_model_type,
                 'slice_size': self.config.sahi_slice_size,
@@ -107,29 +107,50 @@ def process_cloud_storage_video(self):
         # Proceed to run the extraction process
         self.run_extraction(video_path, unique_filename)
 
+    # def run_extraction(self, video_path, unique_filename):
+    #     class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
+    #     specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
+    #     os.makedirs(specific_output_dir, exist_ok=True)
+    #     output_format_instance = self.format_options[self.format_selection](specific_output_dir)
+    #     try:
+    #         extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
+    #                                         self.model_selection, class_config_path, output_format_instance,
+    #                                         self.transformations, self.sahi_config)
+    #         extractor.extract_frames(self.model_confidence)
+    #         if self.format_selection == "CVAT":
+    #             output_format_instance.zip_and_cleanup()
+    #         if self.storage_option == 'Object Storage':
+    #             self.upload_outputs(specific_output_dir)
+    #
+    #         # Clean up: Remove the temporary video file after processing
+    #         if os.path.exists(video_path):
+    #             os.remove(video_path)
+    #             print(f"Deleted temporary video file: {video_path}")
+    #
+    #         st.success('Extraction Completed!')
+    #     except Exception as e:
+    #         st.error(f"An error occurred during frame extraction: {str(e)}")
     def run_extraction(self, video_path, unique_filename):
         class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
         specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
         os.makedirs(specific_output_dir, exist_ok=True)
         output_format_instance = self.format_options[self.format_selection](specific_output_dir)
-        try:
-            extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
-                                            self.model_selection, class_config_path, output_format_instance,
-                                            self.transformations, self.sahi_config)
-            extractor.extract_frames(self.model_confidence)
-            if self.format_selection == "CVAT":
-                output_format_instance.zip_and_cleanup()
-            if self.storage_option == 'Object Storage':
-                self.upload_outputs(specific_output_dir)
-
-            # Clean up: Remove the temporary video file after processing
-            if os.path.exists(video_path):
-                os.remove(video_path)
-                print(f"Deleted temporary video file: {video_path}")
-
-            st.success('Extraction Completed!')
-        except Exception as e:
-            st.error(f"An error occurred during frame extraction: {str(e)}")
+
+        extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
+                                        self.model_selection, class_config_path, output_format_instance,
+                                        self.transformations, self.sahi_config)
+        extractor.extract_frames(self.model_confidence)
+        if self.format_selection == "CVAT":
+            output_format_instance.zip_and_cleanup()
+        if self.storage_option == 'Object Storage':
+            self.upload_outputs(specific_output_dir)
+
+        # Clean up: Remove the temporary video file after processing
+        if os.path.exists(video_path):
+            os.remove(video_path)
+            print(f"Deleted temporary video file: {video_path}")
+
+        st.success('Extraction Completed!')
 
     def upload_outputs(self, directory):
         """

From 51c47c74857410423398b96ba01a31a7052d9ec7 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Fri, 6 Sep 2024 12:26:29 +0600
Subject: [PATCH 02/22] turn off log object storage connection

---
 utils/storage_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/storage_manager.py b/utils/storage_manager.py
index dd4d34b..2d7d54d 100644
--- a/utils/storage_manager.py
+++ b/utils/storage_manager.py
@@ -21,7 +21,7 @@ def __init__(self, config):
                 aws_access_key_id=self.config.s3_access_key,
                 aws_secret_access_key=self.config.s3_secret_key
             )
-            print(f"Connected to S3 endpoint: {self.config.s3_endpoint_url} (Region: {self.config.s3_region_name})")
+            # print(f"Connected to S3 endpoint: {self.config.s3_endpoint_url} (Region: {self.config.s3_region_name})")
         except Exception as e:
             raise RuntimeError(f"Error initializing S3 client: {str(e)}")
 

From 3a9cce83092228415243f881588e98971f7624b7 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Fri, 6 Sep 2024 12:27:07 +0600
Subject: [PATCH 03/22] disable output log

---
 app/extractor.py | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/app/extractor.py b/app/extractor.py
index 99d69f9..825fde2 100644
--- a/app/extractor.py
+++ b/app/extractor.py
@@ -25,9 +25,11 @@ def __init__(self, config, video_path, frame_rate, output_dir, model_path, class
         self.supported_classes = self.load_classes(self.class_config_path)
         self.image_processor = ImageProcessor(output_size=self.transformations.get('size', (640, 640)))
 
-        self.sahi_utils = SahiUtils(os.path.join('models', model_path), **sahi_config) if sahi_config else None
-        self.output_format.sahi_enabled = bool(sahi_config)
-        self.output_format.sahi_utils = self.sahi_utils
+        # Only initialize SahiUtils if SAHI is enabled
+        if sahi_config:
+            self.sahi_utils = SahiUtils(os.path.join('models', model_path), **sahi_config)
+        else:
+            self.sahi_utils = None
 
         # Debugging output to ensure path handling
         if not os.path.exists(self.video_path):
@@ -46,9 +48,6 @@ def load_classes(self, config_path):
         return [cls['name'] for cls in class_data['classes']]
 
     def extract_frames(self, model_confidence):
-        """
-        Extract and process frames from the video, and save them using the specified output format.
-        """
         cap = cv2.VideoCapture(self.video_path)
         if not cap.isOpened():
             raise ValueError(f"Failed to open video stream for {self.video_path}")
@@ -66,24 +65,46 @@ def extract_frames(self, model_confidence):
                 transformed_images = self.apply_transformations(frame)
 
                 for key, transformed_image in transformed_images.items():
-                    if transformed_image.ndim == 2:  # Check if the image is grayscale
-                        # Convert back to RGB format for consistency
-                        transformed_image = cv2.cvtColor(transformed_image,
-                                                         cv2.COLOR_GRAY2BGR)
+                    if transformed_image.ndim == 2:  # Grayscale to RGB for consistency
+                        transformed_image = cv2.cvtColor(transformed_image, cv2.COLOR_GRAY2BGR)
 
                     frame_filename = f"{self._get_video_basename()}_image{frame_count}_{key}.jpg"
                     frame_path = os.path.join(self.output_dir, 'images', frame_filename)
 
-                    # Save images locally or to configured storage
                     cv2.imwrite(frame_path, transformed_image)
-                    results = self.yolo_model.predict(transformed_image, conf=model_confidence)
-                    self.output_format.save_annotations(transformed_image, frame_path, frame_filename, results,
+                    if self.sahi_utils:
+                        results = self.sahi_utils.perform_sliced_inference(transformed_image)
+                    else:
+                        results = self.yolo_model.predict(transformed_image, conf=model_confidence, verbose=False)
+
+                    formatted_results = self.format_results_for_annotation(results, self.sahi_utils is not None)
+                    # print(formatted_results)
+                    self.output_format.save_annotations(transformed_image, frame_path, frame_filename,
+                                                        formatted_results,
                                                         self.supported_classes)
 
             frame_count += 1
 
         cap.release()
 
+    def format_results_for_annotation(self, results, sahi_enabled):
+        if sahi_enabled:
+            formatted_results = []
+            for object_prediction in results.object_prediction_list:
+                bbox = object_prediction.bbox
+                category = object_prediction.category
+                formatted_result = {
+                    'class_id': int(category.id),
+                    'xmin': bbox.minx,
+                    'ymin': bbox.miny,
+                    'xmax': bbox.maxx,
+                    'ymax': bbox.maxy
+                }
+                formatted_results.append(formatted_result)
+            return formatted_results
+        else:
+            return results
+
     def apply_transformations(self, frame):
         """
         Apply selected transformations to the frame and return a dictionary of transformed images.

From cb72a5fc5666185393d37b2b68e7b2e27190f9cc Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Fri, 6 Sep 2024 20:06:36 +0600
Subject: [PATCH 04/22] fix sahi normalization

---
 app/extractor.py | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/app/extractor.py b/app/extractor.py
index 825fde2..7246fb0 100644
--- a/app/extractor.py
+++ b/app/extractor.py
@@ -77,34 +77,14 @@ def extract_frames(self, model_confidence):
                     else:
                         results = self.yolo_model.predict(transformed_image, conf=model_confidence, verbose=False)
 
-                    formatted_results = self.format_results_for_annotation(results, self.sahi_utils is not None)
-                    # print(formatted_results)
                     self.output_format.save_annotations(transformed_image, frame_path, frame_filename,
-                                                        formatted_results,
+                                                        results,
                                                         self.supported_classes)
 
             frame_count += 1
 
         cap.release()
 
-    def format_results_for_annotation(self, results, sahi_enabled):
-        if sahi_enabled:
-            formatted_results = []
-            for object_prediction in results.object_prediction_list:
-                bbox = object_prediction.bbox
-                category = object_prediction.category
-                formatted_result = {
-                    'class_id': int(category.id),
-                    'xmin': bbox.minx,
-                    'ymin': bbox.miny,
-                    'xmax': bbox.maxx,
-                    'ymax': bbox.maxy
-                }
-                formatted_results.append(formatted_result)
-            return formatted_results
-        else:
-            return results
-
     def apply_transformations(self, frame):
         """
         Apply selected transformations to the frame and return a dictionary of transformed images.

From a4422d10b7cef93b14fbcbd53cb0702c8a9ef6cf Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Fri, 6 Sep 2024 23:42:51 +0600
Subject: [PATCH 05/22] converter numpy

---
 utils/sahi_utils.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/utils/sahi_utils.py b/utils/sahi_utils.py
index 67b2285..290b537 100644
--- a/utils/sahi_utils.py
+++ b/utils/sahi_utils.py
@@ -1,5 +1,6 @@
 from sahi.predict import get_sliced_prediction
 from sahi import AutoDetectionModel
+import numpy as np
 
 
 class SahiUtils:
@@ -20,13 +21,40 @@ def load_model(self, model_path):
         return detection_model
 
     def perform_sliced_inference(self, image):
-        # Perform sliced inference using the loaded model and SAHI
         results = get_sliced_prediction(
             image,
             self.model,  # this should be a sahi model
             slice_height=self.slice_size[0],
             slice_width=self.slice_size[1],
             overlap_height_ratio=self.overlap_ratio[0],
-            overlap_width_ratio=self.overlap_ratio[1]
+            overlap_width_ratio=self.overlap_ratio[1],
+            verbose=False
         )
-        return results
+        return self.format_predictions(results, image)
+
+    def format_predictions(self, prediction_result, image):
+        formatted_results = {"boxes": [], "names": {}, "orig_img": image, "orig_shape": image.shape, "path": "",
+                             "probs": None, "save_dir": None, "speed": None}
+        class_ids = set()
+        for prediction in prediction_result.object_prediction_list:
+            class_id = prediction.category.id
+            class_ids.add(class_id)
+            formatted_results["names"][class_id] = prediction.category.name
+            bbox_xyxy = [prediction.bbox.minx, prediction.bbox.miny, prediction.bbox.maxx, prediction.bbox.maxy]
+            formatted_results["boxes"].append({
+                "class_id": class_id,
+                "bbox": np.array(bbox_xyxy),
+                "score": prediction.score.value
+            })
+
+        formatted_results["boxes"] = self.convert_boxes(formatted_results["boxes"])
+        return formatted_results
+
+    def convert_boxes(self, boxes):
+        # Convert to ultralytics.engine.results.Boxes format or similar
+        # Ensure correct shape and concatenation of score and class_id
+        boxes_array = [np.concatenate([box["bbox"], [box["score"], box["class_id"]]]) for box in boxes]
+        if boxes_array:  # Check if list is not empty
+            return np.stack(boxes_array)  # Properly stack arrays to maintain structure
+        else:
+            return np.array([])  # Return an empty numpy array if no boxes

From b0e2e83907e4e0a34fa00e67120073c204527ffc Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Fri, 6 Sep 2024 23:42:57 +0600
Subject: [PATCH 06/22] debug num

---
 app/extractor.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/extractor.py b/app/extractor.py
index 7246fb0..a028e33 100644
--- a/app/extractor.py
+++ b/app/extractor.py
@@ -77,6 +77,8 @@ def extract_frames(self, model_confidence):
                     else:
                         results = self.yolo_model.predict(transformed_image, conf=model_confidence, verbose=False)
 
+                    print(results)
+
                     self.output_format.save_annotations(transformed_image, frame_path, frame_filename,
                                                         results,
                                                         self.supported_classes)

From fc4bc3dba7e511b02d8eadea3598e5f06022cc4e Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 01:19:46 +0600
Subject: [PATCH 07/22] hide double time slice

---
 formats/base_format.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/formats/base_format.py b/formats/base_format.py
index 4c83463..0ffc36a 100644
--- a/formats/base_format.py
+++ b/formats/base_format.py
@@ -49,11 +49,11 @@ def save_annotations(self, frame, frame_path: str, frame_filename: str, results:
         Raises:
             NotImplementedError: If `_save_annotations` is not implemented in the subclass.
         """
-        if self.sahi_enabled and self.sahi_utils:
-            if hasattr(self.sahi_utils, 'perform_sliced_inference'):
-                results = self.sahi_utils.perform_sliced_inference(frame)
-            else:
-                raise AttributeError("sahi_utils object does not have 'perform_sliced_inference' method.")
+        # if self.sahi_enabled and self.sahi_utils:
+        #     if hasattr(self.sahi_utils, 'perform_sliced_inference'):
+        #         results = self.sahi_utils.perform_sliced_inference(frame)
+        #     else:
+        #         raise AttributeError("sahi_utils object does not have 'perform_sliced_inference' method.")
         self._save_annotations(frame, frame_path, frame_filename, results, supported_classes)
 
     def _save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):

From 4afe88fcb1298f7fc23e323621416af5f08c7c2a Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 01:19:55 +0600
Subject: [PATCH 08/22] hide debug statment

---
 app/extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/extractor.py b/app/extractor.py
index a028e33..b6d1551 100644
--- a/app/extractor.py
+++ b/app/extractor.py
@@ -77,7 +77,7 @@ def extract_frames(self, model_confidence):
                     else:
                         results = self.yolo_model.predict(transformed_image, conf=model_confidence, verbose=False)
 
-                    print(results)
+                    # print(results)
 
                     self.output_format.save_annotations(transformed_image, frame_path, frame_filename,
                                                         results,

From 282f10d050204901e1a3c008d80e8ebd0a604034 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:35:55 +0600
Subject: [PATCH 09/22] sahi format fixed

---
 utils/sahi_utils.py | 45 ++++++++++++++++++---------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/utils/sahi_utils.py b/utils/sahi_utils.py
index 290b537..d05f6e1 100644
--- a/utils/sahi_utils.py
+++ b/utils/sahi_utils.py
@@ -1,17 +1,19 @@
 from sahi.predict import get_sliced_prediction
+from sahi.utils.cv import read_image_as_pil
 from sahi import AutoDetectionModel
 import numpy as np
 
 
 class SahiUtils:
     def __init__(self, model_path, model_type='yolov8', device='cpu', slice_size=(256, 256), overlap_ratio=(0.2, 0.2)):
-        self.device = device  # CPU or 'cuda:0'
+        self.device = device  # Can be 'cpu' or 'cuda:0' for GPU
         self.model_type = model_type
         self.model = self.load_model(model_path)
         self.slice_size = slice_size
         self.overlap_ratio = overlap_ratio
 
     def load_model(self, model_path):
+        """Loads a detection model based on the specified type and path."""
         detection_model = AutoDetectionModel.from_pretrained(
             model_type=self.model_type,
             model_path=model_path,
@@ -21,40 +23,29 @@ def load_model(self, model_path):
         return detection_model
 
     def perform_sliced_inference(self, image):
+        """Performs object detection on an image using sliced prediction."""
+        pil_image = read_image_as_pil(image)
         results = get_sliced_prediction(
-            image,
-            self.model,  # this should be a sahi model
+            pil_image,
+            detection_model=self.model,
             slice_height=self.slice_size[0],
             slice_width=self.slice_size[1],
             overlap_height_ratio=self.overlap_ratio[0],
             overlap_width_ratio=self.overlap_ratio[1],
             verbose=False
         )
-        return self.format_predictions(results, image)
+        return self.format_predictions(results)
 
-    def format_predictions(self, prediction_result, image):
-        formatted_results = {"boxes": [], "names": {}, "orig_img": image, "orig_shape": image.shape, "path": "",
-                             "probs": None, "save_dir": None, "speed": None}
-        class_ids = set()
+    def format_predictions(self, prediction_result):
+        """Formats the predictions into a compatible format with YOLO output."""
+        formatted_results = {'boxes': []}
         for prediction in prediction_result.object_prediction_list:
-            class_id = prediction.category.id
-            class_ids.add(class_id)
-            formatted_results["names"][class_id] = prediction.category.name
-            bbox_xyxy = [prediction.bbox.minx, prediction.bbox.miny, prediction.bbox.maxx, prediction.bbox.maxy]
-            formatted_results["boxes"].append({
-                "class_id": class_id,
-                "bbox": np.array(bbox_xyxy),
-                "score": prediction.score.value
-            })
+            box = prediction.bbox.to_voc_bbox()
+            formatted_result = {
+                'cls': [prediction.category.id],  # list wrapping for compatibility
+                'conf': [prediction.score.value],  # list wrapping for compatibility
+                'xyxy': [np.array([box[0], box[1], box[2], box[3]])],  # VOC format to numpy array
+            }
+            formatted_results['boxes'].append(formatted_result)
 
-        formatted_results["boxes"] = self.convert_boxes(formatted_results["boxes"])
         return formatted_results
-
-    def convert_boxes(self, boxes):
-        # Convert to ultralytics.engine.results.Boxes format or similar
-        # Ensure correct shape and concatenation of score and class_id
-        boxes_array = [np.concatenate([box["bbox"], [box["score"], box["class_id"]]]) for box in boxes]
-        if boxes_array:  # Check if list is not empty
-            return np.stack(boxes_array)  # Properly stack arrays to maintain structure
-        else:
-            return np.array([])  # Return an empty numpy array if no boxes

From 310061241ec7c7028e2a737f9516347f01eaf884 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:36:02 +0600
Subject: [PATCH 10/22] add process img

---
 formats/base_format.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/formats/base_format.py b/formats/base_format.py
index 0ffc36a..0401cd5 100644
--- a/formats/base_format.py
+++ b/formats/base_format.py
@@ -35,6 +35,22 @@ def ensure_directories(self):
         """
         raise NotImplementedError("Subclasses should implement this method.")
 
+    def process_results(self, frame, results, img_dimensions):
+        """Generate formatted strings from detection results."""
+        annotations = []
+        img_height, img_width = img_dimensions
+        for result in results:
+            if hasattr(result, 'boxes') and result.boxes is not None:
+                for box in result.boxes:
+                    class_id = int(box.cls[0])
+                    xmin, ymin, xmax, ymax = box.xyxy[0]
+                    x_center = ((xmin + xmax) / 2) / img_width
+                    y_center = ((ymin + ymax) / 2) / img_height
+                    width = (xmax - xmin) / img_width
+                    height = (ymax - ymin) / img_height
+                    annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+        return annotations
+
     def save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):
         """
         Saves the annotations for a given frame. If SAHI is enabled, performs sliced inference before saving.
@@ -49,11 +65,6 @@ def save_annotations(self, frame, frame_path: str, frame_filename: str, results:
         Raises:
             NotImplementedError: If `_save_annotations` is not implemented in the subclass.
         """
-        # if self.sahi_enabled and self.sahi_utils:
-        #     if hasattr(self.sahi_utils, 'perform_sliced_inference'):
-        #         results = self.sahi_utils.perform_sliced_inference(frame)
-        #     else:
-        #         raise AttributeError("sahi_utils object does not have 'perform_sliced_inference' method.")
         self._save_annotations(frame, frame_path, frame_filename, results, supported_classes)
 
     def _save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):

From ca08cdbd478630277516623421828e2189b4dd51 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:36:11 +0600
Subject: [PATCH 11/22] fixed with base class

---
 formats/roboflow_format.py | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/formats/roboflow_format.py b/formats/roboflow_format.py
index dc369a5..7cbb6da 100644
--- a/formats/roboflow_format.py
+++ b/formats/roboflow_format.py
@@ -12,28 +12,13 @@ def __init__(self, output_dir):
         os.makedirs(self.label_dir, exist_ok=True)
 
     def save_annotations(self, frame, frame_path, frame_filename, results, supported_classes):
-        """
-        Saves the annotations in the Roboflow specified format.
-        """
+        img_dimensions = frame.shape[:2]
+        annotations = self.process_results(frame, results, img_dimensions)
         annotation_filename = frame_filename.replace('.jpg', '.txt')
         annotation_path = os.path.join(self.label_dir, annotation_filename)
-        img_height, img_width = frame.shape[:2]
-
         with open(annotation_path, 'w') as f:
-            for result in results:
-                if hasattr(result, 'boxes') and result.boxes is not None:
-                    for box in result.boxes:
-                        class_id = int(box.cls[0])
-                        if supported_classes[class_id] in supported_classes:
-                            confidence = box.conf[0]
-                            xmin, ymin, xmax, ymax = box.xyxy[0]
-                            x_center = ((xmin + xmax) / 2) / img_width
-                            y_center = ((ymin + ymax) / 2) / img_height
-                            width = (xmax - xmin) / img_width
-                            height = (ymax - ymin) / img_height
-                            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
-
-        # Generate metadata file if needed
+            for annotation in annotations:
+                f.write(annotation + "\n")
         self.create_data_yaml(supported_classes)
 
     def create_data_yaml(self, supported_classes):

From 3ba9d06008795cfa1dd0f047cdec8ede2f70a52d Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:42:21 +0600
Subject: [PATCH 12/22] fixed cvat process with base

---
 formats/cvat_format.py | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/formats/cvat_format.py b/formats/cvat_format.py
index bec4404..4ff4a33 100644
--- a/formats/cvat_format.py
+++ b/formats/cvat_format.py
@@ -18,30 +18,16 @@ def __init__(self, output_dir):
         os.makedirs(self.image_dir, exist_ok=True)
 
     def save_annotations(self, frame, frame_path, frame_filename, results, supported_classes):
-        """
-        Saves annotations and images in CVAT-compatible format directly in obj_train_data.
-        """
+        img_dimensions = frame.shape[:2]
+        annotations = self.process_results(frame, results, img_dimensions)
         frame_filename_png = frame_filename.replace('.jpg', '.png')
         image_path = os.path.join(self.image_dir, frame_filename_png)
-        cv2.imwrite(image_path, frame)  # Save the frame image
-
+        cv2.imwrite(image_path, frame)
         annotation_filename = frame_filename_png.replace('.png', '.txt')
         annotation_path = os.path.join(self.image_dir, annotation_filename)
-
         with open(annotation_path, 'w') as file:
-            for result in results:
-                if hasattr(result, 'boxes') and result.boxes is not None:
-                    for box in result.boxes:
-                        if box.xyxy.dim() == 2 and box.xyxy.shape[0] == 1:
-                            class_id = int(box.cls[0])
-                            xmin, ymin, xmax, ymax = box.xyxy[0].tolist()
-                            x_center = ((xmin + xmax) / 2) / frame.shape[1]
-                            y_center = ((ymin + ymax) / 2) / frame.shape[0]
-                            width = (xmax - xmin) / frame.shape[1]
-                            height = (ymax - ymin) / frame.shape[0]
-                            file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
-
-        # After saving all annotations, update metadata files
+            for annotation in annotations:
+                file.write(annotation + "\n")
         self.create_metadata_files(supported_classes)
 
     def create_metadata_files(self, supported_classes):

From 7ab9df197b0316404b48ab9892c8eebf91933ee6 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:59:28 +0600
Subject: [PATCH 13/22] add sahi flag to child

---
 formats/cvat_format.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/formats/cvat_format.py b/formats/cvat_format.py
index 4ff4a33..2d94ce6 100644
--- a/formats/cvat_format.py
+++ b/formats/cvat_format.py
@@ -11,8 +11,8 @@ class CVATFormat(BaseFormat):
         output_dir (str): Base directory for all output.
     """
 
-    def __init__(self, output_dir):
-        super().__init__(output_dir)
+    def __init__(self, output_dir, sahi_enabled):
+        super().__init__(output_dir, sahi_enabled)
         self.data_dir = os.path.join(output_dir, 'data')
         self.image_dir = os.path.join(self.data_dir, 'obj_train_data')
         os.makedirs(self.image_dir, exist_ok=True)

From 1562494eafdaa5d06796b9768e1366d7dfae23f8 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:59:33 +0600
Subject: [PATCH 14/22] add sahi flag to child roboflow

---
 formats/roboflow_format.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/formats/roboflow_format.py b/formats/roboflow_format.py
index 7cbb6da..b95bffc 100644
--- a/formats/roboflow_format.py
+++ b/formats/roboflow_format.py
@@ -4,8 +4,8 @@
 
 
 class RoboflowFormat(BaseFormat):
-    def __init__(self, output_dir):
-        super().__init__(output_dir)
+    def __init__(self, output_dir, sahi_enabled):
+        super().__init__(output_dir, sahi_enabled)
         self.image_dir = os.path.join(output_dir, 'images')
         self.label_dir = os.path.join(output_dir, 'labels')
         os.makedirs(self.image_dir, exist_ok=True)

From ebf299c7e3f7d286405aad0e901d5c06aa5dfafc Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 13:59:42 +0600
Subject: [PATCH 15/22] pass sahi flag to format

---
 app/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/main.py b/app/main.py
index 18e67ef..9349902 100644
--- a/app/main.py
+++ b/app/main.py
@@ -134,7 +134,8 @@ def run_extraction(self, video_path, unique_filename):
         class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
         specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
         os.makedirs(specific_output_dir, exist_ok=True)
-        output_format_instance = self.format_options[self.format_selection](specific_output_dir)
+        output_format_instance = self.format_options[self.format_selection](output_dir=specific_output_dir,
+                                                                            sahi_enabled=self.sahi_enabled)
 
         extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
                                         self.model_selection, class_config_path, output_format_instance,

From 1567a4b295a31b2c58528ca4e360472b5a156743 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 14:02:31 +0600
Subject: [PATCH 16/22] change sahi dist

---
 formats/base_format.py | 50 +++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/formats/base_format.py b/formats/base_format.py
index 0401cd5..35b20a8 100644
--- a/formats/base_format.py
+++ b/formats/base_format.py
@@ -39,35 +39,31 @@ def process_results(self, frame, results, img_dimensions):
         """Generate formatted strings from detection results."""
         annotations = []
         img_height, img_width = img_dimensions
-        for result in results:
-            if hasattr(result, 'boxes') and result.boxes is not None:
-                for box in result.boxes:
-                    class_id = int(box.cls[0])
-                    xmin, ymin, xmax, ymax = box.xyxy[0]
-                    x_center = ((xmin + xmax) / 2) / img_width
-                    y_center = ((ymin + ymax) / 2) / img_height
-                    width = (xmax - xmin) / img_width
-                    height = (ymax - ymin) / img_height
-                    annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
-        return annotations
+        if self.sahi_enabled:
+            for box in results['boxes']:
+                class_id = int(box['cls'][0])
+                xmin, ymin, xmax, ymax = box['xyxy'][0]
+                x_center = ((xmin + xmax) / 2) / img_width
+                y_center = ((ymin + ymax) / 2) / img_height
+                width = (xmax - xmin) / img_width
+                height = (ymax - ymin) / img_height
+                print(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+                annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+            return annotations
+        else:
+            for result in results:
+                if hasattr(result, 'boxes') and result.boxes is not None:
+                    for box in result.boxes:
+                        class_id = int(box.cls[0])
+                        xmin, ymin, xmax, ymax = box.xyxy[0]
+                        x_center = ((xmin + xmax) / 2) / img_width
+                        y_center = ((ymin + ymax) / 2) / img_height
+                        width = (xmax - xmin) / img_width
+                        height = (ymax - ymin) / img_height
+                        annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+            return annotations
 
     def save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):
-        """
-        Saves the annotations for a given frame. If SAHI is enabled, performs sliced inference before saving.
-
-        Args:
-            frame (ndarray): The image frame for which annotations are being saved.
-            frame_path (str): The path where the frame is located.
-            frame_filename (str): The name of the frame file.
-            results (list): A list of results from the detection model or sliced inference.
-            supported_classes (list): List of supported class labels for the annotations.
-
-        Raises:
-            NotImplementedError: If `_save_annotations` is not implemented in the subclass.
-        """
-        self._save_annotations(frame, frame_path, frame_filename, results, supported_classes)
-
-    def _save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):
         """
         Abstract method for saving annotations. To be implemented by subclasses to define
         the logic for saving the annotations.

From 3fac727b7dc78750baac2ec4b85c2d31463f23ec Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Sat, 7 Sep 2024 14:14:51 +0600
Subject: [PATCH 17/22] fix error handle

---
 app/main.py | 57 +++++++++++++++++------------------------------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/app/main.py b/app/main.py
index 9349902..820e7a6 100644
--- a/app/main.py
+++ b/app/main.py
@@ -107,51 +107,30 @@ def process_cloud_storage_video(self):
         # Proceed to run the extraction process
         self.run_extraction(video_path, unique_filename)
 
-    # def run_extraction(self, video_path, unique_filename):
-    #     class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
-    #     specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
-    #     os.makedirs(specific_output_dir, exist_ok=True)
-    #     output_format_instance = self.format_options[self.format_selection](specific_output_dir)
-    #     try:
-    #         extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
-    #                                         self.model_selection, class_config_path, output_format_instance,
-    #                                         self.transformations, self.sahi_config)
-    #         extractor.extract_frames(self.model_confidence)
-    #         if self.format_selection == "CVAT":
-    #             output_format_instance.zip_and_cleanup()
-    #         if self.storage_option == 'Object Storage':
-    #             self.upload_outputs(specific_output_dir)
-    #
-    #         # Clean up: Remove the temporary video file after processing
-    #         if os.path.exists(video_path):
-    #             os.remove(video_path)
-    #             print(f"Deleted temporary video file: {video_path}")
-    #
-    #         st.success('Extraction Completed!')
-    #     except Exception as e:
-    #         st.error(f"An error occurred during frame extraction: {str(e)}")
     def run_extraction(self, video_path, unique_filename):
         class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
         specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
         os.makedirs(specific_output_dir, exist_ok=True)
         output_format_instance = self.format_options[self.format_selection](output_dir=specific_output_dir,
                                                                             sahi_enabled=self.sahi_enabled)
-
-        extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
-                                        self.model_selection, class_config_path, output_format_instance,
-                                        self.transformations, self.sahi_config)
-        extractor.extract_frames(self.model_confidence)
-        if self.format_selection == "CVAT":
-            output_format_instance.zip_and_cleanup()
-        if self.storage_option == 'Object Storage':
-            self.upload_outputs(specific_output_dir)
-
-        # Clean up: Remove the temporary video file after processing
-        if os.path.exists(video_path):
-            os.remove(video_path)
-            print(f"Deleted temporary video file: {video_path}")
-
-        st.success('Extraction Completed!')
+        try:
+            extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
+                                            self.model_selection, class_config_path, output_format_instance,
+                                            self.transformations, self.sahi_config)
+            extractor.extract_frames(self.model_confidence)
+            if self.format_selection == "CVAT":
+                output_format_instance.zip_and_cleanup()
+            if self.storage_option == 'Object Storage':
+                self.upload_outputs(specific_output_dir)
+
+            # Clean up: Remove the temporary video file after processing
+            if os.path.exists(video_path):
+                os.remove(video_path)
+                print(f"Deleted temporary video file: {video_path}")
+
+            st.success('Extraction Completed!')
+        except Exception as e:
+            st.error(f"An error occurred during frame extraction: {str(e)}")
 
     def upload_outputs(self, directory):
         """

From 863f5d331334219b05f52e4549dee5d10188e96b Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Mon, 9 Sep 2024 02:48:18 +0600
Subject: [PATCH 18/22] fix save annotation

---
 formats/base_format.py | 55 +++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/formats/base_format.py b/formats/base_format.py
index 35b20a8..16bf9c2 100644
--- a/formats/base_format.py
+++ b/formats/base_format.py
@@ -1,13 +1,14 @@
-from typing import Optional
+import os
+from typing import Optional, List, Dict
 
 
 class BaseFormat:
     """
-    Base class for handling annotation formats. This class provides basic functionalities
-    like saving annotations and ensuring directory structure, which can be extended by subclasses.
+    Base class for handling annotation formats. Provides foundational functionalities
+    like saving annotations and ensuring directory structure, designed for extension by subclasses.
 
     Attributes:
-        output_dir (str): The directory where the output will be stored.
+        output_dir (str): Directory where output will be stored.
         sahi_enabled (bool): Flag to enable or disable SAHI (Sliced Inference).
         sahi_utils (Optional[object]): SAHI utility object for performing sliced inference.
     """
@@ -27,7 +28,7 @@ def __init__(self, output_dir: str, sahi_enabled: bool = False, sahi_utils: Opti
 
     def ensure_directories(self):
         """
-        Ensures that the necessary directories for saving annotations exist.
+        Ensures that necessary directories for saving annotations exist.
         Must be implemented by subclasses.
 
         Raises:
@@ -35,21 +36,31 @@ def ensure_directories(self):
         """
         raise NotImplementedError("Subclasses should implement this method.")
 
-    def process_results(self, frame, results, img_dimensions):
-        """Generate formatted strings from detection results."""
+    def process_results(self, frame, results: Dict, img_dimensions) -> List[str]:
+        """
+        Generate formatted strings from detection results suitable for annotations.
+
+        Args:
+            frame: The image frame being processed.
+            results: Detection results containing bounding boxes and class IDs.
+            img_dimensions: Dimensions of the image for normalizing coordinates.
+
+        Returns:
+            List of annotation strings formatted according to specific requirements.
+        """
         annotations = []
         img_height, img_width = img_dimensions
+
+        # Check if SAHI is enabled to adapt processing of results accordingly
         if self.sahi_enabled:
-            for box in results['boxes']:
+            for box in results['boxes']:  # Assuming SAHI results are formatted similarly
                 class_id = int(box['cls'][0])
                 xmin, ymin, xmax, ymax = box['xyxy'][0]
                 x_center = ((xmin + xmax) / 2) / img_width
                 y_center = ((ymin + ymax) / 2) / img_height
                 width = (xmax - xmin) / img_width
                 height = (ymax - ymin) / img_height
-                print(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
                 annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
-            return annotations
         else:
             for result in results:
                 if hasattr(result, 'boxes') and result.boxes is not None:
@@ -61,9 +72,25 @@ def process_results(self, frame, results, img_dimensions):
                         width = (xmax - xmin) / img_width
                         height = (ymax - ymin) / img_height
                         annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
-            return annotations
 
-    def save_annotations(self, frame, frame_path: str, frame_filename: str, results: list, supported_classes: list):
+        return annotations
+
+    def write_annotations(self, frame_filename: str, annotations: List[str]):
+        """
+        Writes annotations to a file based on the frame filename.
+
+        Args:
+            frame_filename (str): The filename of the frame to which annotations relate.
+            annotations (List[str]): Annotations to be written to the file.
+        """
+        annotation_filename = frame_filename.replace('.jpg', '.txt')
+        annotation_path = os.path.join(self.output_dir, 'labels', annotation_filename)
+        with open(annotation_path, 'w') as file:
+            for annotation in annotations:
+                file.write(annotation + "\n")
+
+    def save_annotations(self, frame, frame_path: str, frame_filename: str, results: Dict,
+                         supported_classes: List[str]):
         """
         Abstract method for saving annotations. To be implemented by subclasses to define
         the logic for saving the annotations.
@@ -72,8 +99,8 @@ def save_annotations(self, frame, frame_path: str, frame_filename: str, results:
             frame (ndarray): The image frame for which annotations are being saved.
             frame_path (str): The path where the frame is located.
             frame_filename (str): The name of the frame file.
-            results (list): A list of results from the detection model or sliced inference.
-            supported_classes (list): List of supported class labels for the annotations.
+            results (Dict): A dictionary of results from the detection model or sliced inference.
+            supported_classes (List[str]): List of supported class labels for the annotations.
 
         Raises:
             NotImplementedError: If the method is not implemented in the subclass.

From 3f3616b264d82b0e5c1c3135544620673b1cfece Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Mon, 9 Sep 2024 02:48:43 +0600
Subject: [PATCH 19/22] scale roboflow format

---
 formats/roboflow_format.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/formats/roboflow_format.py b/formats/roboflow_format.py
index b95bffc..8325f3d 100644
--- a/formats/roboflow_format.py
+++ b/formats/roboflow_format.py
@@ -14,11 +14,7 @@ def __init__(self, output_dir, sahi_enabled):
     def save_annotations(self, frame, frame_path, frame_filename, results, supported_classes):
         img_dimensions = frame.shape[:2]
         annotations = self.process_results(frame, results, img_dimensions)
-        annotation_filename = frame_filename.replace('.jpg', '.txt')
-        annotation_path = os.path.join(self.label_dir, annotation_filename)
-        with open(annotation_path, 'w') as f:
-            for annotation in annotations:
-                f.write(annotation + "\n")
+        self.write_annotations(frame_filename, annotations)
         self.create_data_yaml(supported_classes)
 
     def create_data_yaml(self, supported_classes):

From 7427f4baa0001d7f15920e0a8975134288aca278 Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Mon, 9 Sep 2024 03:21:48 +0600
Subject: [PATCH 20/22] fix write annotation

---
 formats/base_format.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/formats/base_format.py b/formats/base_format.py
index 16bf9c2..6068a28 100644
--- a/formats/base_format.py
+++ b/formats/base_format.py
@@ -1,4 +1,3 @@
-import os
 from typing import Optional, List, Dict
 
 
@@ -26,6 +25,16 @@ def __init__(self, output_dir: str, sahi_enabled: bool = False, sahi_utils: Opti
         self.sahi_enabled = sahi_enabled
         self.sahi_utils = sahi_utils
 
+    def write_annotations(self, frame_filename: str, annotations: List[str]):
+        """
+        Writes annotations to a file based on the frame filename.
+
+        Args:
+            frame_filename (str): The filename of the frame to which annotations relate.
+            annotations (List[str]): Annotations to be written to the file.
+        """
+        raise NotImplementedError("Subclasses should implement this method.")
+
     def ensure_directories(self):
         """
         Ensures that necessary directories for saving annotations exist.
@@ -75,20 +84,6 @@ def process_results(self, frame, results: Dict, img_dimensions) -> List[str]:
 
         return annotations
 
-    def write_annotations(self, frame_filename: str, annotations: List[str]):
-        """
-        Writes annotations to a file based on the frame filename.
-
-        Args:
-            frame_filename (str): The filename of the frame to which annotations relate.
-            annotations (List[str]): Annotations to be written to the file.
-        """
-        annotation_filename = frame_filename.replace('.jpg', '.txt')
-        annotation_path = os.path.join(self.output_dir, 'labels', annotation_filename)
-        with open(annotation_path, 'w') as file:
-            for annotation in annotations:
-                file.write(annotation + "\n")
-
     def save_annotations(self, frame, frame_path: str, frame_filename: str, results: Dict,
                          supported_classes: List[str]):
         """

From c78ecd7dd00e2b124240f153cac105386a030fcc Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Mon, 9 Sep 2024 03:21:53 +0600
Subject: [PATCH 21/22] fix write annotation cvat

---
 formats/cvat_format.py | 105 +++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 47 deletions(-)

diff --git a/formats/cvat_format.py b/formats/cvat_format.py
index 2d94ce6..ac05932 100644
--- a/formats/cvat_format.py
+++ b/formats/cvat_format.py
@@ -1,83 +1,94 @@
 import os
 import cv2
 import zipfile
+from typing import List
 from formats.base_format import BaseFormat
 
 
 class CVATFormat(BaseFormat):
     """
-    Class to handle the CVAT format for image annotations.
-    Attributes:
-        output_dir (str): Base directory for all output.
+    Handles the CVAT format for image annotations. This class manages the creation of necessary directories,
+    the writing of annotations into CVAT-compatible text files, and the organization of image data.
     """
 
-    def __init__(self, output_dir, sahi_enabled):
+    def __init__(self, output_dir: str, sahi_enabled: bool = False):
         super().__init__(output_dir, sahi_enabled)
         self.data_dir = os.path.join(output_dir, 'data')
         self.image_dir = os.path.join(self.data_dir, 'obj_train_data')
         os.makedirs(self.image_dir, exist_ok=True)
 
-    def save_annotations(self, frame, frame_path, frame_filename, results, supported_classes):
+    def save_annotations(self, frame, frame_path: str, frame_filename: str, results, supported_classes: List[str]):
+        """
+        Saves annotations and frames in a format compatible with CVAT.
+        """
         img_dimensions = frame.shape[:2]
         annotations = self.process_results(frame, results, img_dimensions)
         frame_filename_png = frame_filename.replace('.jpg', '.png')
         image_path = os.path.join(self.image_dir, frame_filename_png)
         cv2.imwrite(image_path, frame)
-        annotation_filename = frame_filename_png.replace('.png', '.txt')
-        annotation_path = os.path.join(self.image_dir, annotation_filename)
-        with open(annotation_path, 'w') as file:
-            for annotation in annotations:
-                file.write(annotation + "\n")
+        self.write_annotations(frame_filename_png, annotations)
         self.create_metadata_files(supported_classes)
 
-    def create_metadata_files(self, supported_classes):
+    def write_annotations(self, frame_filename: str, annotations: List[str]):
+        """
+        Writes annotations to a text file associated with each frame image.
+        """
+        annotation_filename = frame_filename.replace('.png', '.txt')
+        annotation_path = os.path.join(self.image_dir, annotation_filename)
+        try:
+            with open(annotation_path, 'w') as file:
+                for annotation in annotations:
+                    file.write(annotation + "\n")
+        except IOError as e:
+            print(f"Error writing annotation file {annotation_path}: {str(e)}")
+
+    def create_metadata_files(self, supported_classes: List[str]):
         """
-        Creates necessary metadata files for CVAT training setup.
+        Creates necessary metadata files for a CVAT training setup, including class names and training configurations.
         """
         obj_names_path = os.path.join(self.data_dir, 'obj.names')
         obj_data_path = os.path.join(self.data_dir, 'obj.data')
         train_txt_path = os.path.join(self.data_dir, 'train.txt')
 
-        # Create obj.names file
-        with open(obj_names_path, 'w') as f:
-            for cls in supported_classes:
-                f.write(f"{cls}\n")
+        try:
+            with open(obj_names_path, 'w') as f:
+                for cls in supported_classes:
+                    f.write(f"{cls}\n")
 
-        # Create obj.data file
-        with open(obj_data_path, 'w') as f:
-            f.write("classes = {}\n".format(len(supported_classes)))
-            f.write("train = data/train.txt\n")
-            f.write("names = data/obj.names\n")
-            f.write("backup = backup/\n")
+            with open(obj_data_path, 'w') as f:
+                f.write("classes = {}\n".format(len(supported_classes)))
+                f.write("train = data/train.txt\n")
+                f.write("names = data/obj.names\n")
+                f.write("backup = backup/\n")
 
-        # Create train.txt file listing all image files
-        with open(train_txt_path, 'w') as f:
-            for image_file in os.listdir(self.image_dir):
-                if image_file.endswith('.png'):
-                    f.write(f"data/obj_train_data/{image_file}\n")
-
-    def ensure_directories(self):
-        """Ensures all directories are created and ready for use."""
-        super().ensure_directories()  # Ensures base directories are created
+            with open(train_txt_path, 'w') as f:
+                for image_file in os.listdir(self.image_dir):
+                    if image_file.endswith('.png'):
+                        f.write(f"data/obj_train_data/{image_file}\n")
+        except IOError as e:
+            print(f"Error writing metadata files: {str(e)}")
 
     def zip_and_cleanup(self):
-        # Create a zip file and add all the data in the data directory to it.
+        """
+        Zips the processed data for transfer or storage and cleans up the directory structure.
+        """
         zip_path = os.path.join(self.output_dir, 'cvat_data.zip')
-        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        try:
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                for root, dirs, files in os.walk(self.data_dir):
+                    for file in files:
+                        file_path = os.path.join(root, file)
+                        zipf.write(file_path, os.path.relpath(file_path, self.data_dir))
+                    for dir in dirs:
+                        dir_path = os.path.join(root, dir)
+                        zipf.write(dir_path, os.path.relpath(dir_path, self.data_dir))
+
+            # Cleanup
             for root, dirs, files in os.walk(self.data_dir, topdown=False):
                 for file in files:
-                    file_path = os.path.join(root, file)
-                    zipf.write(file_path, os.path.relpath(file_path, self.data_dir))
+                    os.remove(os.path.join(root, file))
                 for dir in dirs:
-                    dir_path = os.path.join(root, dir)
-                    zipf.write(dir_path, os.path.relpath(dir_path, self.data_dir))
-
-        # Clean up the directory by removing all files first, then empty directories.
-        for root, dirs, files in os.walk(self.data_dir, topdown=False):
-            for file in files:
-                os.remove(os.path.join(root, file))
-            for dir in dirs:
-                os.rmdir(os.path.join(root, dir))
-
-        # Finally, remove the base data directory now that it should be empty.
-        os.rmdir(self.data_dir)
+                    os.rmdir(os.path.join(root, dir))
+            os.rmdir(self.data_dir)
+        except Exception as e:
+            print(f"Error during zip or cleanup: {str(e)}")

From 068dc5aef7242ddc31025f737cfde85518d6ea2e Mon Sep 17 00:00:00 2001
From: Shamsuddin Ahmed <shamsuddinpias0@gmail.com>
Date: Mon, 9 Sep 2024 03:22:00 +0600
Subject: [PATCH 22/22] fix write annotation roboflow

---
 formats/roboflow_format.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/formats/roboflow_format.py b/formats/roboflow_format.py
index 8325f3d..c853b15 100644
--- a/formats/roboflow_format.py
+++ b/formats/roboflow_format.py
@@ -1,6 +1,7 @@
-from formats.base_format import BaseFormat
 import os
 import yaml
+from typing import List
+from formats.base_format import BaseFormat
 
 
 class RoboflowFormat(BaseFormat):
@@ -11,6 +12,20 @@ def __init__(self, output_dir, sahi_enabled):
         os.makedirs(self.image_dir, exist_ok=True)
         os.makedirs(self.label_dir, exist_ok=True)
 
+    def write_annotations(self, frame_filename: str, annotations: List[str]):
+        """
+        Writes annotations to a file based on the frame filename.
+
+        Args:
+            frame_filename (str): The filename of the frame to which annotations relate.
+            annotations (List[str]): Annotations to be written to the file.
+        """
+        annotation_filename = frame_filename.replace('.jpg', '.txt')
+        annotation_path = os.path.join(self.output_dir, 'labels', annotation_filename)
+        with open(annotation_path, 'w') as file:
+            for annotation in annotations:
+                file.write(annotation + "\n")
+
     def save_annotations(self, frame, frame_path, frame_filename, results, supported_classes):
         img_dimensions = frame.shape[:2]
         annotations = self.process_results(frame, results, img_dimensions)