From fc81b785a9f088b630adf4c0f0f49d0ddce4fa44 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Wed, 24 Jan 2024 09:39:15 +0100
Subject: [PATCH 1/9] refactor run_throughput_benchmark Results are written to
 a file after the benchmark complete

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index ee038cef..abdedfe2 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -595,11 +595,9 @@ def run_throughput_benchmark(
             f"Benchmarking inference rate for synchronous inference on {frames} frames "
             f"with {repeats} repeats"
         )
-        with logging_redirect_tqdm(tqdm_class=tqdm), open(
-            results_file, "w", newline=""
-        ) as csvfile:
+        with logging_redirect_tqdm(tqdm_class=tqdm):
             results: List[Dict[str, str]] = []
-            for index, deployment_folder in enumerate(
+            for deployment_index, deployment_folder in enumerate(
                 tqdm(self._deployment_folders, desc="Benchmarking")
             ):
                 success = True
@@ -667,7 +665,7 @@ def run_throughput_benchmark(
 
                 # Update result list
                 result_row: Dict[str, str] = {}
-                result_row["name"] = f"Deployment {index}"
+                result_row["name"] = f"Deployment {deployment_index}"
                 result_row["project_name"] = self.project.name
                 result_row["target_device"] = target_device
                 result_row["task 1"] = self.project.get_trainable_tasks()[0].title
@@ -684,11 +682,12 @@ def run_throughput_benchmark(
                 result_row.update(get_system_info(device=target_device))
                 results.append(result_row)
 
-                # Write results to file
-                if index == 0:
-                    fieldnames = list(result_row.keys())
-                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-                    writer.writeheader()
+        # Write results to file
+        with open(results_file, "w", newline="") as csvfile:
+            fieldnames = list(results[0].keys())
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            for result_row in results:
                 writer.writerow(result_row)
 
         return results

From 00ed2885a5240078bdacdc762b97e6b1d2988c70 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 09:18:05 +0100
Subject: [PATCH 2/9] comparison functional

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py    | 334 +++++++++++++++++++++++-
 notebooks/011_benchmarking_models.ipynb |  40 ++-
 2 files changed, 361 insertions(+), 13 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index abdedfe2..0cedb27f 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -16,8 +16,9 @@
 import logging
 import os
 import time
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Any, Dict, List, Optional, Sequence, Union
 
+import cv2
 import numpy as np
 from tqdm.auto import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
@@ -33,6 +34,8 @@
 )
 from geti_sdk.deployment import Deployment
 from geti_sdk.rest_clients import ImageClient, ModelClient, TrainingClient, VideoClient
+from geti_sdk.rest_clients.prediction_client import PredictionClient
+from geti_sdk.utils.plot_helpers import show_image_with_annotation_scene
 
 from .utils import get_system_info, load_benchmark_media, suppress_log_output
 
@@ -687,7 +690,330 @@ def run_throughput_benchmark(
             fieldnames = list(results[0].keys())
             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
             writer.writeheader()
-            for result_row in results:
-                writer.writerow(result_row)
-
+            writer.writerows(results)
         return results
+
+    def _predict_using_active_model(
+        self,
+        numpy_image: np.ndarray,
+    ) -> dict[str, Any]:
+        """
+        Predict on platform using the active model.
+
+        :param numpy_image: Numpy array containing the image to be predicted on.
+        :return: Dictionary containing the prediction results.
+        """
+        # upload the image
+        image_client = ImageClient(
+            session=self.geti.session,
+            workspace_id=self.geti.workspace_id,
+            project=self.project,
+        )
+        sc_image = image_client.upload_image(numpy_image)
+        # Load the pixel data to visualize the image later on
+        sc_image.get_data(self.geti.session)
+
+        prediction_client = PredictionClient(
+            session=self.geti.session,
+            workspace_id=self.geti.workspace_id,
+            project=self.project,
+        )
+        platform_prediction = prediction_client.get_image_prediction(sc_image)
+        # load active models info
+        active_models = self.model_client.get_all_active_models()
+        result: dict[str, Any] = {}
+        result["prediction"] = platform_prediction
+        result["run_name"] = "online prediction"
+        result["model_1"] = active_models[0].name + " " + active_models[0].precision[0]
+        result["model_1_score"] = active_models[0].performance.score
+        if not self._is_single_task:
+            result["model_2"] = (
+                active_models[1].name + " " + active_models[1].precision[0]
+            )
+            result["model_2_score"] = active_models[1].performance.score
+        return result
+
+    def _pad_image_and_put_caption(
+        self,
+        image: np.ndarray,
+        run_name: int,
+        model_1: str,
+        model_1_score: str,
+        model_2: Optional[str] = None,
+        model_2_score: Optional[str] = None,
+        fps: Optional[int] = None,
+    ) -> np.ndarray:
+        """
+        Pad the image with white and put the caption on it.
+
+        :param image: Numpy array containing the image to be padded.
+        :param run_name: Experiment description.
+        :param model_1: Name of the model 1.
+        :param model_1_score: Score of the model 1.
+        :param model_2: Name of the model 2.
+        :param model_2_score: Score of the model 2.
+        :param fps: FPS of the inference.
+        :return: Padded image with caption.
+        """
+        # Calculate text and image padding size
+        text_scale = round(image.shape[1] / 1280, 1)
+        thickness = int(text_scale / 1.5)
+        (_, label_height), baseline = cv2.getTextSize(
+            "Test caption", cv2.FONT_HERSHEY_SIMPLEX, text_scale, thickness
+        )
+        universal_padding = 2
+        bottom_padding = label_height + baseline
+        # Prepare image captions
+        caption_lines = [
+            run_name + ("" if fps is None else f" @{fps} fps"),
+            f"Model 1: {model_1}, score {model_1_score:.2f}",
+        ]
+        if model_2 and model_2_score:
+            caption_lines.append(f"Model 2: {model_2}, score {model_2_score:.2f}")
+        # Pad the image and put captions on it
+        padded_image = cv2.copyMakeBorder(
+            image,
+            top=universal_padding,
+            bottom=universal_padding + bottom_padding * len(caption_lines),
+            left=universal_padding,
+            right=universal_padding,
+            borderType=cv2.BORDER_CONSTANT,
+            value=(255, 255, 255),
+        )
+        # Put text
+        for line_number, text_line in enumerate(caption_lines):
+            cv2.putText(
+                padded_image,
+                text_line,
+                (0, image.shape[0] + bottom_padding * (line_number + 1)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                text_scale,
+                (0, 0, 0),
+                thickness,
+            )
+        return padded_image
+
+    def _concat_prediction_results(self, results: List[List[np.ndarray]]) -> np.ndarray:
+        """
+        Merge the prediction images to one.
+
+        :param results: List of lists of numpy arrays containing the results of the
+            predictions.
+        :return: Numpy array containing the concatenated results.
+        """
+        # Gather information about images on the grid
+        row_pixel_lengths = []
+        for index, row in enumerate(results):
+            integral_row_length = sum([image.shape[1] for image in row])
+            row_pixel_lengths.append(integral_row_length)
+            image_heights = [image.shape[0] for image in row]
+            if len(set(image_heights)) > 1:
+                raise ValueError(f"Row {index} has images with different heights!")
+        # Concatenate images
+        max_row_length = max(row_pixel_lengths)
+        concatenated_rows = []
+        for row in results:
+            merged_row = cv2.hconcat(row)
+            if merged_row.shape[1] < max_row_length:
+                # Add empty image to the end of the row
+                merged_row = cv2.hconcat(
+                    [
+                        merged_row,
+                        np.zeros(
+                            (
+                                merged_row.shape[0],
+                                max_row_length - merged_row.shape[1],
+                                merged_row.shape[2],
+                            ),
+                            dtype=np.uint8,
+                        ),
+                    ]
+                )
+            concatenated_rows.append(merged_row)
+        return cv2.vconcat(concatenated_rows)
+
+    def compare_predictions(
+        self,
+        working_directory: os.PathLike = ".",
+        results_filename: str = "comparison",
+        target_device: str = "CPU",
+        image: Optional[Union[np.ndarray, str, os.PathLike]] = None,
+        include_online_prediction_for_active_model: bool = True,  # the name is not finalized
+        throughput_benchmark_results: Optional[
+            Union[List[Dict[str, str]], os.PathLike]
+        ] = None,
+    ) -> np.ndarray:
+        """
+        TODO blank image if not success
+
+        Perform visual comparison of predictions from different deployments.
+
+        :param working_directory: Directory in which the deployments that should be
+            benchmarked are stored. All output will be saved to this directory.
+        :param results_filename: Name of the file to which the results will be saved.
+            File extension should not be included, the results will always be saved as
+            a `.jpg` file. Defaults to `comparison.jpg`. The results file will be created
+            within the `working_directory`
+        :param target_device: Device to run the inference models on, for example "CPU"
+            or "GPU". Defaults to "CPU".
+        :param image: Image to use for comparison. If no image is passed, the first
+            image in the `images` list will be used.
+        :param include_online_prediction_for_active_model: Flag to include prediction
+            from the active model on the platform side.
+        :param throughput_benchmark_results: Results from a throughput benchmark run. If
+            this is passed, the captions for the images will contain the benchmark results.
+        :return: Image containing visual comparison in form of a NumPy array.
+        """
+        if len(self._deployment_folders) == 0:
+            raise ValueError(
+                "Benchmarker does not contain any deployments to benchmark yet! Please "
+                "prepare the deployments first using either the "
+                "`Benchmarker.prepare_benchmark()` or "
+                "`Benchmarker.initialize_from_folder()` methods."
+            )
+        logging.info("Starting collecting predictions for visual comparison.")
+
+        logging.info(
+            f"The Benchmarker will run for {len(self._deployment_folders)} deployments"
+        )
+
+        logging.info("Loading benchmark media")
+        if isinstance(image, np.ndarray):
+            pass
+        elif image is None:
+            image = load_benchmark_media(
+                session=self.geti.session,
+                images=self.images,
+                video=self.video,
+                frames=1,
+            )[0]
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        elif isinstance(image, (str, os.PathLike)):
+            image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB)
+        else:
+            raise TypeError(f"Invalid image type: {type(image)}.")
+
+        results_file = os.path.join(working_directory, f"{results_filename}.jpg")
+        logging.info(f"Saving visual comparison to `{results_file}`")
+
+        # Check the benchmark results
+        if isinstance(throughput_benchmark_results, (os.PathLike, str)):
+            with open(throughput_benchmark_results, "r") as results_file:
+                throughput_benchmark_results = list(csv.DictReader(results_file))
+
+        # Performe inferece
+        with logging_redirect_tqdm(tqdm_class=tqdm):
+            results: List[List[np.ndarray]] = []
+            model_name_to_row: dict[str, int] = {}
+            for deployment_index, deployment_folder in enumerate(
+                tqdm(self._deployment_folders, desc="Collecting predictions")
+            ):
+                success = True
+                deployment = Deployment.from_folder(deployment_folder)
+                try:
+                    with suppress_log_output():
+                        deployment.load_inference_models(device=target_device)
+                except Exception as e:
+                    success = False
+                    logging.info(
+                        f"Failed to load inference models for deployment at path: "
+                        f"`{deployment_folder}`, with error: {e}. Marking benchmark "
+                        f"run for the deployment as failed"
+                    )
+
+                if success:
+                    try:
+                        prediction = deployment.infer(image)
+                    except Exception as e:
+                        success = False
+                        logging.info(
+                            f"Failed to run inference on the image. Marking "
+                            f"benchmark run for deployment `{deployment_folder}` as "
+                            f"failed. Inference failed with error: `{e}`"
+                        )
+                if success:
+                    image_with_prediction = show_image_with_annotation_scene(
+                        image, prediction, show_results=False
+                    )
+                    image_with_prediction = cv2.cvtColor(
+                        image_with_prediction, cv2.COLOR_BGR2RGB
+                    )
+                else:
+                    # Replace the image with an empty one in case of no prediction
+                    image_with_prediction = np.zeros_like(image)
+                    image_with_prediction = cv2.putText(
+                        image_with_prediction,
+                        "Failed to run inference on the image",
+                        (10, 50),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1,
+                        (255, 0, 0),
+                        2,
+                    )
+                # Save model scores
+                model_scores = []
+                for om in deployment.models:
+                    if isinstance(om.performance, Performance):
+                        score = om.performance.score
+                    elif isinstance(om.performance, dict):
+                        score = om.performance.get("score", -1)
+                    else:
+                        score = -1
+                    model_scores.append(score)
+
+                model_info = {
+                    "run_name": f"Deployment {deployment_index}",
+                    "model_1": deployment.models[0].name,
+                    "model_1_score": model_scores[0],
+                    "fps": None
+                    if throughput_benchmark_results is None
+                    else throughput_benchmark_results[deployment_index]["fps"],
+                }
+                if not self._is_single_task:
+                    model_info.update(
+                        {
+                            "model_2": deployment.models[1].name,
+                            "model_2_score": model_scores[1],
+                        }
+                    )
+                # Pad the image and put captions on it
+                image_with_prediction = self._pad_image_and_put_caption(
+                    image=image_with_prediction, **model_info
+                )
+
+                # Image is ready, now we add it to the results array
+                # Determine result's position on the grid
+                model_1_name = deployment.models[0].name.split(" ")[0]
+                if model_1_name not in model_name_to_row:
+                    model_name_to_row[model_1_name] = len(results)
+                    results.append([])
+                row_n = model_name_to_row[model_1_name]
+                results[row_n].append(image_with_prediction)
+
+        if include_online_prediction_for_active_model:
+            logging.info("Predicting on platform using the active model")
+            online_prediction_result = self._predict_using_active_model(image)
+            image_with_prediction = show_image_with_annotation_scene(
+                image, online_prediction_result["prediction"], show_results=False
+            )
+            image_with_prediction = cv2.cvtColor(
+                image_with_prediction, cv2.COLOR_BGR2RGB
+            )
+
+            del online_prediction_result["prediction"]
+            image_with_prediction = self._pad_image_and_put_caption(
+                image=image_with_prediction, **online_prediction_result
+            )
+            # Add online prediction to a separate row
+            results.append(
+                [
+                    image_with_prediction,
+                ]
+            )
+
+        # self.project.name
+        # self.project.get_trainable_tasks()[0].title
+        # get_system_info(device=target_device)
+        # deployment_folder
+
+        return self._concat_prediction_results(results=results)
diff --git a/notebooks/011_benchmarking_models.ipynb b/notebooks/011_benchmarking_models.ipynb
index 3626b1d3..697a7fff 100644
--- a/notebooks/011_benchmarking_models.ipynb
+++ b/notebooks/011_benchmarking_models.ipynb
@@ -218,30 +218,52 @@
     "\n",
     "In addition, the table contains some details about the system, indicating the operating system, some info regarding the target device and the python, geti-sdk and openvino versions. This is useful when comparing benchmark results across different hardware setups.\n",
     "\n",
-    "## Conclusion\n",
-    "Ideally, the table below should help to select which model to pick for deployment in production use. The optimal model has a sufficiently high `model 1 score`, while still reaching the desired `fps`."
+    "### Visual predictions comparison\n",
+    "Although the model scores give insight into the model performance statistically, comparing the models' predictions visually is useful. The `Benchmarker` exposes the `compare_predictions` method, which compares the saved deployment prediction results by inferring them on a provided image."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ff3e62e5-0b86-4c7d-a16c-49c2ebe1d969",
+   "id": "ad8f1f09-2722-41f5-b338-e83e1cf6c11b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
+    "from IPython.display import display\n",
+    "from PIL import Image\n",
     "\n",
-    "df = pd.DataFrame(results)\n",
-    "df"
+    "from geti_sdk.demos import EXAMPLE_IMAGE_PATH\n",
+    "\n",
+    "prediction_comparison = benchmarker.compare_predictions(\n",
+    "    working_directory=benchmark_folder,\n",
+    "    image=EXAMPLE_IMAGE_PATH,\n",
+    "    throughput_benchmark_results=results,\n",
+    "    include_online_prediction_for_active_model=True,\n",
+    ")\n",
+    "display(Image.fromarray(prediction_comparison))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8144f2a",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "Ideally, the table below should help to select which model to pick for deployment in production use. The optimal model has a sufficiently high `model 1 score`, while still reaching the desired `fps`."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ad8f1f09-2722-41f5-b338-e83e1cf6c11b",
+   "id": "ff3e62e5-0b86-4c7d-a16c-49c2ebe1d969",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.DataFrame(results)\n",
+    "df"
+   ]
   }
  ],
  "metadata": {
@@ -260,7 +282,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.17"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From 4011488d80891c422044eb541a03ddb82651b717 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 10:18:12 +0100
Subject: [PATCH 3/9] add header to comparison image

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 78 +++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 14 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index 0cedb27f..0df9ab1b 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -723,7 +723,7 @@ def _predict_using_active_model(
         active_models = self.model_client.get_all_active_models()
         result: dict[str, Any] = {}
         result["prediction"] = platform_prediction
-        result["run_name"] = "online prediction"
+        result["run_name"] = "On-Platform Prediction"
         result["model_1"] = active_models[0].name + " " + active_models[0].precision[0]
         result["model_1_score"] = active_models[0].performance.score
         if not self._is_single_task:
@@ -762,7 +762,7 @@ def _pad_image_and_put_caption(
             "Test caption", cv2.FONT_HERSHEY_SIMPLEX, text_scale, thickness
         )
         universal_padding = 2
-        bottom_padding = label_height + baseline
+        bottom_padding_pre_line = label_height + baseline
         # Prepare image captions
         caption_lines = [
             run_name + ("" if fps is None else f" @{fps} fps"),
@@ -774,7 +774,7 @@ def _pad_image_and_put_caption(
         padded_image = cv2.copyMakeBorder(
             image,
             top=universal_padding,
-            bottom=universal_padding + bottom_padding * len(caption_lines),
+            bottom=universal_padding + bottom_padding_pre_line * len(caption_lines),
             left=universal_padding,
             right=universal_padding,
             borderType=cv2.BORDER_CONSTANT,
@@ -785,7 +785,7 @@ def _pad_image_and_put_caption(
             cv2.putText(
                 padded_image,
                 text_line,
-                (0, image.shape[0] + bottom_padding * (line_number + 1)),
+                (0, image.shape[0] + bottom_padding_pre_line * (line_number + 1)),
                 cv2.FONT_HERSHEY_SIMPLEX,
                 text_scale,
                 (0, 0, 0),
@@ -819,19 +819,74 @@ def _concat_prediction_results(self, results: List[List[np.ndarray]]) -> np.ndar
                 merged_row = cv2.hconcat(
                     [
                         merged_row,
-                        np.zeros(
+                        np.ones(
                             (
                                 merged_row.shape[0],
                                 max_row_length - merged_row.shape[1],
                                 merged_row.shape[2],
                             ),
                             dtype=np.uint8,
-                        ),
+                        )
+                        * 255,
                     ]
                 )
             concatenated_rows.append(merged_row)
         return cv2.vconcat(concatenated_rows)
 
+    def _add_header_to_comparison(
+        self, comparison_image: np.ndarray, target_device: str
+    ) -> np.ndarray:
+        """
+        Add a header to the comparison image.
+
+        :param comparison_image: Comparison image to add the header to.
+        :return: Comparison image with header.
+        """
+        # Calculate text and image padding size
+        text_scale = round(comparison_image.shape[1] / 1280, 1)
+        thickness = int(text_scale / 1.4)
+        (_, label_height), baseline = cv2.getTextSize(
+            "Test caption", cv2.FONT_HERSHEY_SIMPLEX, text_scale, thickness
+        )
+        top_padding_per_line = label_height + baseline
+        # Prepare image captions
+        device_info = get_system_info(device=target_device)["device_info"]
+        caption_lines = [
+            "Inference results comparison",
+            f"Project: {self.project.name}",
+            (
+                f"Task: {self.project.get_trainable_tasks()[0].title}"
+                + (
+                    ""
+                    if self._is_single_task
+                    else f" -> {self.project.get_trainable_tasks()[1].title}"
+                )
+            ),
+            f"Device info: {device_info}",
+        ]
+        # Pad the image
+        padded_image = cv2.copyMakeBorder(
+            comparison_image,
+            top=2 * baseline + top_padding_per_line * len(caption_lines),
+            bottom=0,
+            left=0,
+            right=0,
+            borderType=cv2.BORDER_CONSTANT,
+            value=(255, 255, 255),
+        )
+        # Put text
+        for line_number, text_line in enumerate(caption_lines):
+            cv2.putText(
+                padded_image,
+                text_line,
+                (10, top_padding_per_line * (line_number + 1)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                text_scale,
+                (0, 0, 0),
+                thickness,
+            )
+        return padded_image
+
     def compare_predictions(
         self,
         working_directory: os.PathLike = ".",
@@ -991,7 +1046,7 @@ def compare_predictions(
                 results[row_n].append(image_with_prediction)
 
         if include_online_prediction_for_active_model:
-            logging.info("Predicting on platform using the active model")
+            logging.info("Predicting on the platform using the active model")
             online_prediction_result = self._predict_using_active_model(image)
             image_with_prediction = show_image_with_annotation_scene(
                 image, online_prediction_result["prediction"], show_results=False
@@ -1010,10 +1065,5 @@ def compare_predictions(
                     image_with_prediction,
                 ]
             )
-
-        # self.project.name
-        # self.project.get_trainable_tasks()[0].title
-        # get_system_info(device=target_device)
-        # deployment_folder
-
-        return self._concat_prediction_results(results=results)
+        image_grid = self._concat_prediction_results(results=results)
+        return self._add_header_to_comparison(image_grid, target_device=target_device)

From 7e235abe17521055851d492073ae416877b06099 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 12:27:39 +0100
Subject: [PATCH 4/9] move visualization helper functions to plot_helpers

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 130 +++++----------------------
 geti_sdk/utils/plot_helpers.py       | 109 +++++++++++++++++++++-
 2 files changed, 126 insertions(+), 113 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index 0df9ab1b..3e4e510d 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -35,7 +35,11 @@
 from geti_sdk.deployment import Deployment
 from geti_sdk.rest_clients import ImageClient, ModelClient, TrainingClient, VideoClient
 from geti_sdk.rest_clients.prediction_client import PredictionClient
-from geti_sdk.utils.plot_helpers import show_image_with_annotation_scene
+from geti_sdk.utils.plot_helpers import (
+    concat_prediction_results,
+    pad_image_and_put_caption,
+    show_image_with_annotation_scene,
+)
 
 from .utils import get_system_info, load_benchmark_media, suppress_log_output
 
@@ -733,106 +737,6 @@ def _predict_using_active_model(
             result["model_2_score"] = active_models[1].performance.score
         return result
 
-    def _pad_image_and_put_caption(
-        self,
-        image: np.ndarray,
-        run_name: int,
-        model_1: str,
-        model_1_score: str,
-        model_2: Optional[str] = None,
-        model_2_score: Optional[str] = None,
-        fps: Optional[int] = None,
-    ) -> np.ndarray:
-        """
-        Pad the image with white and put the caption on it.
-
-        :param image: Numpy array containing the image to be padded.
-        :param run_name: Experiment description.
-        :param model_1: Name of the model 1.
-        :param model_1_score: Score of the model 1.
-        :param model_2: Name of the model 2.
-        :param model_2_score: Score of the model 2.
-        :param fps: FPS of the inference.
-        :return: Padded image with caption.
-        """
-        # Calculate text and image padding size
-        text_scale = round(image.shape[1] / 1280, 1)
-        thickness = int(text_scale / 1.5)
-        (_, label_height), baseline = cv2.getTextSize(
-            "Test caption", cv2.FONT_HERSHEY_SIMPLEX, text_scale, thickness
-        )
-        universal_padding = 2
-        bottom_padding_pre_line = label_height + baseline
-        # Prepare image captions
-        caption_lines = [
-            run_name + ("" if fps is None else f" @{fps} fps"),
-            f"Model 1: {model_1}, score {model_1_score:.2f}",
-        ]
-        if model_2 and model_2_score:
-            caption_lines.append(f"Model 2: {model_2}, score {model_2_score:.2f}")
-        # Pad the image and put captions on it
-        padded_image = cv2.copyMakeBorder(
-            image,
-            top=universal_padding,
-            bottom=universal_padding + bottom_padding_pre_line * len(caption_lines),
-            left=universal_padding,
-            right=universal_padding,
-            borderType=cv2.BORDER_CONSTANT,
-            value=(255, 255, 255),
-        )
-        # Put text
-        for line_number, text_line in enumerate(caption_lines):
-            cv2.putText(
-                padded_image,
-                text_line,
-                (0, image.shape[0] + bottom_padding_pre_line * (line_number + 1)),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                text_scale,
-                (0, 0, 0),
-                thickness,
-            )
-        return padded_image
-
-    def _concat_prediction_results(self, results: List[List[np.ndarray]]) -> np.ndarray:
-        """
-        Merge the prediction images to one.
-
-        :param results: List of lists of numpy arrays containing the results of the
-            predictions.
-        :return: Numpy array containing the concatenated results.
-        """
-        # Gather information about images on the grid
-        row_pixel_lengths = []
-        for index, row in enumerate(results):
-            integral_row_length = sum([image.shape[1] for image in row])
-            row_pixel_lengths.append(integral_row_length)
-            image_heights = [image.shape[0] for image in row]
-            if len(set(image_heights)) > 1:
-                raise ValueError(f"Row {index} has images with different heights!")
-        # Concatenate images
-        max_row_length = max(row_pixel_lengths)
-        concatenated_rows = []
-        for row in results:
-            merged_row = cv2.hconcat(row)
-            if merged_row.shape[1] < max_row_length:
-                # Add empty image to the end of the row
-                merged_row = cv2.hconcat(
-                    [
-                        merged_row,
-                        np.ones(
-                            (
-                                merged_row.shape[0],
-                                max_row_length - merged_row.shape[1],
-                                merged_row.shape[2],
-                            ),
-                            dtype=np.uint8,
-                        )
-                        * 255,
-                    ]
-                )
-            concatenated_rows.append(merged_row)
-        return cv2.vconcat(concatenated_rows)
-
     def _add_header_to_comparison(
         self, comparison_image: np.ndarray, target_device: str
     ) -> np.ndarray:
@@ -890,7 +794,7 @@ def _add_header_to_comparison(
     def compare_predictions(
         self,
         working_directory: os.PathLike = ".",
-        results_filename: str = "comparison",
+        saved_image_name: str = "comparison",
         target_device: str = "CPU",
         image: Optional[Union[np.ndarray, str, os.PathLike]] = None,
         include_online_prediction_for_active_model: bool = True,  # the name is not finalized
@@ -905,7 +809,7 @@ def compare_predictions(
 
         :param working_directory: Directory in which the deployments that should be
             benchmarked are stored. All output will be saved to this directory.
-        :param results_filename: Name of the file to which the results will be saved.
+        :param saved_image_name: Name of the file to which the results will be saved.
             File extension should not be included, the results will always be saved as
             a `.jpg` file. Defaults to `comparison.jpg`. The results file will be created
             within the `working_directory`
@@ -948,8 +852,8 @@ def compare_predictions(
         else:
             raise TypeError(f"Invalid image type: {type(image)}.")
 
-        results_file = os.path.join(working_directory, f"{results_filename}.jpg")
-        logging.info(f"Saving visual comparison to `{results_file}`")
+        saved_image_path = os.path.join(working_directory, f"{saved_image_name}.jpg")
+        logging.info(f"Saving visual comparison to `{saved_image_path}`")
 
         # Check the benchmark results
         if isinstance(throughput_benchmark_results, (os.PathLike, str)):
@@ -1032,7 +936,7 @@ def compare_predictions(
                         }
                     )
                 # Pad the image and put captions on it
-                image_with_prediction = self._pad_image_and_put_caption(
+                image_with_prediction = pad_image_and_put_caption(
                     image=image_with_prediction, **model_info
                 )
 
@@ -1056,7 +960,7 @@ def compare_predictions(
             )
 
             del online_prediction_result["prediction"]
-            image_with_prediction = self._pad_image_and_put_caption(
+            image_with_prediction = pad_image_and_put_caption(
                 image=image_with_prediction, **online_prediction_result
             )
             # Add online prediction to a separate row
@@ -1065,5 +969,13 @@ def compare_predictions(
                     image_with_prediction,
                 ]
             )
-        image_grid = self._concat_prediction_results(results=results)
-        return self._add_header_to_comparison(image_grid, target_device=target_device)
+        image_grid = concat_prediction_results(results=results)
+        image_with_header = self._add_header_to_comparison(
+            image_grid, target_device=target_device
+        )
+
+        # Save image to file
+        cv2.imwrite(
+            saved_image_path, cv2.cvtColor(image_with_header, cv2.COLOR_RGB2BGR)
+        )
+        return image_with_header
diff --git a/geti_sdk/utils/plot_helpers.py b/geti_sdk/utils/plot_helpers.py
index 83856a07..282cb691 100644
--- a/geti_sdk/utils/plot_helpers.py
+++ b/geti_sdk/utils/plot_helpers.py
@@ -53,9 +53,9 @@ def show_image_with_annotation_scene(
         This parameter accepts either `rgb` or `bgr` as input values, and defaults to
         `rgb`.
     """
-    if type(annotation_scene) == AnnotationScene:
+    if type(annotation_scene) is AnnotationScene:
         plot_type = "Annotation"
-    elif type(annotation_scene) == Prediction:
+    elif type(annotation_scene) is Prediction:
         plot_type = "Prediction"
     else:
         raise ValueError(
@@ -143,9 +143,9 @@ def show_video_frames_with_annotation_scenes(
         )
 
     for frame, annotation_scene in zip(video_frames, annotation_scenes):
-        if type(annotation_scene) == AnnotationScene:
+        if type(annotation_scene) is AnnotationScene:
             name = "Annotation"
-        elif type(annotation_scene) == Prediction:
+        elif type(annotation_scene) is Prediction:
             name = "Prediction"
         else:
             raise ValueError(
@@ -173,3 +173,104 @@ def show_video_frames_with_annotation_scenes(
         cv2.waitKey(1)
     else:
         out_writer.release()
+
+
+def pad_image_and_put_caption(
+    image: np.ndarray,
+    run_name: int,
+    model_1: str,
+    model_1_score: str,
+    model_2: Optional[str] = None,
+    model_2_score: Optional[str] = None,
+    fps: Optional[int] = None,
+) -> np.ndarray:
+    """
+    Pad the image with white and put the caption on it.
+
+    :param image: Numpy array containing the image to be padded.
+    :param run_name: Experiment description.
+    :param model_1: Name of the model 1.
+    :param model_1_score: Score of the model 1.
+    :param model_2: Name of the model 2.
+    :param model_2_score: Score of the model 2.
+    :param fps: FPS of the inference.
+    :return: Padded image with caption.
+    """
+    # Calculate text and image padding size
+    text_scale = round(image.shape[1] / 1280, 1)
+    thickness = int(text_scale / 1.5)
+    (_, label_height), baseline = cv2.getTextSize(
+        "Test caption", cv2.FONT_HERSHEY_SIMPLEX, text_scale, thickness
+    )
+    universal_padding = 2
+    bottom_padding_pre_line = label_height + baseline
+    # Prepare image captions
+    caption_lines = [
+        run_name + ("" if fps is None else f" @{fps} fps"),
+        f"Model 1: {model_1}, score {model_1_score:.2f}",
+    ]
+    if model_2 and model_2_score:
+        caption_lines.append(f"Model 2: {model_2}, score {model_2_score:.2f}")
+    # Pad the image and put captions on it
+    padded_image = cv2.copyMakeBorder(
+        image,
+        top=universal_padding,
+        bottom=universal_padding + bottom_padding_pre_line * len(caption_lines),
+        left=universal_padding,
+        right=universal_padding,
+        borderType=cv2.BORDER_CONSTANT,
+        value=(255, 255, 255),
+    )
+    # Put text
+    for line_number, text_line in enumerate(caption_lines):
+        cv2.putText(
+            padded_image,
+            text_line,
+            (0, image.shape[0] + bottom_padding_pre_line * (line_number + 1)),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            text_scale,
+            (0, 0, 0),
+            thickness,
+        )
+    return padded_image
+
+
+def concat_prediction_results(results: List[List[np.ndarray]]) -> np.ndarray:
+    """
+    Merge the prediction images to one.
+
+    :param results: List of lists of numpy arrays containing the results of the
+        predictions.
+    :return: Numpy array containing the concatenated results.
+    """
+    # Gather information about images on the grid
+    row_pixel_lengths = []
+    for index, row in enumerate(results):
+        integral_row_length = sum([image.shape[1] for image in row])
+        row_pixel_lengths.append(integral_row_length)
+        image_heights = [image.shape[0] for image in row]
+        if len(set(image_heights)) > 1:
+            raise ValueError(f"Row {index} has images with different heights!")
+    # Concatenate images
+    max_row_length = max(row_pixel_lengths)
+    concatenated_rows = []
+    for row in results:
+        merged_row = cv2.hconcat(row)
+        if merged_row.shape[1] < max_row_length:
+            # Add empty image to the end of the row
+            merged_row = cv2.hconcat(
+                [
+                    merged_row,
+                    np.ones(
+                        (
+                            merged_row.shape[0],
+                            max_row_length - merged_row.shape[1],
+                            merged_row.shape[2],
+                        ),
+                        dtype=np.uint8,
+                    )
+                    * 255,
+                ]
+            )
+        concatenated_rows.append(merged_row)
+    return cv2.vconcat(concatenated_rows)

From a1eca52c93a496e22393cd83c19dc4e124ba7e86 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 12:28:07 +0100
Subject: [PATCH 5/9] add nightly tests

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 tests/nightly/test_classification.py              | 3 +++
 tests/nightly/test_detection_to_classification.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tests/nightly/test_classification.py b/tests/nightly/test_classification.py
index 079e9975..c0c7baa5 100644
--- a/tests/nightly/test_classification.py
+++ b/tests/nightly/test_classification.py
@@ -66,3 +66,6 @@ def test_benchmarking(
             repeats=2,
         )
         pd.DataFrame(results)
+        benchmarker.compare_predictions(
+            working_directory=fxt_temp_directory, throughput_benchmark_results=results
+        )
diff --git a/tests/nightly/test_detection_to_classification.py b/tests/nightly/test_detection_to_classification.py
index 65d751d4..d7fa8c0f 100644
--- a/tests/nightly/test_detection_to_classification.py
+++ b/tests/nightly/test_detection_to_classification.py
@@ -73,3 +73,6 @@ def test_benchmarking(
             repeats=2,
         )
         pd.DataFrame(results)
+        benchmarker.compare_predictions(
+            working_directory=fxt_temp_directory, throughput_benchmark_results=results
+        )

From a77e70c0f45a7f4bc9e12e6fcb81556bdd67ce21 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 12:45:44 +0100
Subject: [PATCH 6/9] fix typing

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index 3e4e510d..34e8e0f7 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -700,7 +700,7 @@ def run_throughput_benchmark(
     def _predict_using_active_model(
         self,
         numpy_image: np.ndarray,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
         """
         Predict on platform using the active model.
 

From 05500d1d9e31207fa5004b3c29526ca41660f942 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 18:04:58 +0100
Subject: [PATCH 7/9] use predict_image for on-platform inference

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index 34e8e0f7..c19b260b 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -707,22 +707,14 @@ def _predict_using_active_model(
         :param numpy_image: Numpy array containing the image to be predicted on.
         :return: Dictionary containing the prediction results.
         """
-        # upload the image
-        image_client = ImageClient(
-            session=self.geti.session,
-            workspace_id=self.geti.workspace_id,
-            project=self.project,
-        )
-        sc_image = image_client.upload_image(numpy_image)
-        # Load the pixel data to visualize the image later on
-        sc_image.get_data(self.geti.session)
-
+        # Predict on the Platform
         prediction_client = PredictionClient(
             session=self.geti.session,
             workspace_id=self.geti.workspace_id,
             project=self.project,
         )
-        platform_prediction = prediction_client.get_image_prediction(sc_image)
+        platform_prediction = prediction_client.predict_image(numpy_image)
+
         # load active models info
         active_models = self.model_client.get_all_active_models()
         result: dict[str, Any] = {}

From b6836044c3715203bca1aff01df44f0e449f62a8 Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 18:24:51 +0100
Subject: [PATCH 8/9] add unit test

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 .../unit/benchmarking/test_benchmarker.py     | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/tests/pre-merge/unit/benchmarking/test_benchmarker.py b/tests/pre-merge/unit/benchmarking/test_benchmarker.py
index 07a73980..04f95069 100644
--- a/tests/pre-merge/unit/benchmarking/test_benchmarker.py
+++ b/tests/pre-merge/unit/benchmarking/test_benchmarker.py
@@ -15,6 +15,7 @@
 import csv
 from pathlib import Path
 
+import numpy as np
 import pytest
 from pytest_mock import MockerFixture
 
@@ -328,3 +329,72 @@ def test_throughput_benchmark(
         assert deployment.load_inference_models.call_count == number_of_runs
         # For each model infer is called: 1 Warm-up call, 1 time estimation call and `frames * repeats` for benchmark
         assert deployment.infer.call_count == number_of_runs * (2 + frames * repeats)
+
+    def test_compare_predictions(
+        self,
+        fxt_benchmarker: Benchmarker,
+        mocker: MockerFixture,
+        fxt_temp_directory: str,
+    ):
+        # Arrange
+        mock_image = np.array((10, 10, 3), dtype=np.uint8)
+        mocked_prediction_client = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.PredictionClient",
+        )
+        _ = mocker.patch.object(fxt_benchmarker.geti, "deploy_project")
+        _ = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.cv2.getTextSize",
+            return_value=((10, 10), 10),
+        )
+        _ = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.cv2.copyMakeBorder",
+            return_value=mock_image,
+        )
+        _ = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.cv2.putText",
+        )
+        mock_show_image_with_annotation_scene = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.show_image_with_annotation_scene",
+            return_value=mock_image,
+        )
+        mock_pad_image_and_put_caption = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.pad_image_and_put_caption",
+        )
+        mock_concat_prediction_results = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.concat_prediction_results",
+        )
+        fxt_benchmarker.prepare_benchmark(fxt_temp_directory)
+        deployment = mocker.MagicMock()
+        deployment.project.name = fxt_benchmarker.project.name
+        deployment.models = [
+            mocker.MagicMock(),
+        ]
+        _ = mocker.patch(
+            "geti_sdk.benchmarking.benchmarker.Deployment.from_folder",
+            return_value=deployment,
+        )
+
+        results_file = Path(fxt_temp_directory) / "comparison.jpg"
+
+        # Act
+        fxt_benchmarker.compare_predictions(
+            working_directory=fxt_temp_directory,
+            image=mock_image,
+            saved_image_name=results_file.stem,
+        )
+
+        # Assert
+        assert results_file.is_file()
+        mocked_prediction_client.return_value.predict_image.assert_called_once_with(
+            mock_image
+        )
+        assert (
+            mock_show_image_with_annotation_scene.call_count
+            == mock_pad_image_and_put_caption.call_count
+            == (
+                # Calls for deployments + online prediction call
+                len(fxt_benchmarker.models) * len(fxt_benchmarker.precision_levels)
+                + 1
+            )
+        )
+        mock_concat_prediction_results.assert_called_once()

From 990b141433a5c57610e1ca5bb2a7acffa1c22c5d Mon Sep 17 00:00:00 2001
From: Igor Davidyuk <igor.davidyuk@intel.com>
Date: Mon, 29 Jan 2024 18:35:20 +0100
Subject: [PATCH 9/9] save benchmark result to file every run

Signed-off-by: Igor Davidyuk <igor.davidyuk@intel.com>
---
 geti_sdk/benchmarking/benchmarker.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/geti_sdk/benchmarking/benchmarker.py b/geti_sdk/benchmarking/benchmarker.py
index c19b260b..b871a4bc 100644
--- a/geti_sdk/benchmarking/benchmarker.py
+++ b/geti_sdk/benchmarking/benchmarker.py
@@ -687,14 +687,20 @@ def run_throughput_benchmark(
                 result_row["total frames"] = f"{frames * repeats}"
                 result_row["source"] = deployment_folder
                 result_row.update(get_system_info(device=target_device))
+
+                # Write results to file
+                fieldnames = list(result_row.keys())
+                if not results:  # First row
+                    with open(results_file, "w", newline="") as csvfile:
+                        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                        writer.writeheader()
+                        writer.writerow(result_row)
+                else:  # Append
+                    with open(results_file, "a", newline="") as csvfile:
+                        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                        writer.writerow(result_row)
                 results.append(result_row)
 
-        # Write results to file
-        with open(results_file, "w", newline="") as csvfile:
-            fieldnames = list(results[0].keys())
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            writer.writeheader()
-            writer.writerows(results)
         return results
 
     def _predict_using_active_model(