From d6e59cde4f5db74c29e00f414761f5e0ad00ce05 Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Tue, 27 Aug 2024 15:35:23 -0400
Subject: [PATCH 1/9] Fix transpose and patch coords bug

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 monai/apps/vista3d/sampler.py  | 15 ++++++++-------
 monai/networks/nets/vista3d.py |  7 +++++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index b7aeb89a2e..7bc091f013 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -58,16 +58,16 @@ def sample_prompt_pairs(
         labels: [1, 1, H, W, D], ground truth labels.
         label_set: the label list for the specific dataset. Note if 0 is included in label_set,
             it will be added into automatic branch training. Recommend removing 0 from label_set
-            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset.
-            The reason is region with 0 in one partially labeled dataset may contain foregrounds in
-            another dataset.
+            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset. 
+            The reason is region with 0 in one partially labeled dataset may contain foregrounds in 
+            another dataset. 
         max_prompt: int, max number of total prompt, including foreground and background.
         max_foreprompt: int, max number of prompt from foreground.
         max_backprompt: int, max number of prompt from background.
         max_point: maximum number of points for each object.
         include_background: if include 0 into training prompt. If included, background 0 is treated
-            the same as foreground. Always be False for multi-partial-dataset training. If needed,
-            can be true for finetuning specific dataset, .
+            the same as foreground and points will be sampled. Can be true only if user want to segment 
+            background 0 with point clicks, otherwise always be false. 
         drop_label_prob: probability to drop label prompt.
         drop_point_prob: probability to drop point prompt.
         point_sampler: sampler to augment masks with supervoxel.
@@ -76,12 +76,13 @@ def sample_prompt_pairs(
     Returns:
         label_prompt: [B, 1]. The classes used for training automatic segmentation.
         point: [B, N, 3]. The corresponding points for each class.
-        Note that background label prompt requires matching point as well ([0,0,0] is used).
+            Note that background label prompt requires matching point as well ([0,0,0] is used).
         point_label: [B, N]. The corresponding point labels for each point (negative or positive).
-        -1 is used for padding the background label prompt and will be ignored.
+            -1 is used for padding the background label prompt and will be ignored.
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
         label_prompt can be None, and prompt_class is used to identify point classes.
     """
+
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")
diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 9148e36542..979a090df0 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -336,11 +336,11 @@ def set_auto_grad(self, auto_freeze: bool = False, point_freeze: bool = False):
     def forward(
         self,
         input_images: torch.Tensor,
+        patch_coords: Sequence[slice] | None = None,
         point_coords: torch.Tensor | None = None,
         point_labels: torch.Tensor | None = None,
         class_vector: torch.Tensor | None = None,
         prompt_class: torch.Tensor | None = None,
-        patch_coords: Sequence[slice] | None = None,
         labels: torch.Tensor | None = None,
         label_set: Sequence[int] | None = None,
         prev_mask: torch.Tensor | None = None,
@@ -421,7 +421,10 @@ def forward(
                     point_coords, point_labels = None, None
 
         if point_coords is None and class_vector is None:
-            return self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            logits = self.NINF_VALUE + torch.zeros([bs, 1, *image_size], device=device)
+            if transpose:
+                logits = logits.transpose(1, 0)
+            return logits
 
         if self.image_embeddings is not None and kwargs.get("keep_cache", False) and class_vector is None:
             out, out_auto = self.image_embeddings, None

From 1b119766144a9d3bfea417fc456f75c5857f0bac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 19:38:26 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/vista3d/sampler.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 7bc091f013..6ede500997 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -58,16 +58,16 @@ def sample_prompt_pairs(
         labels: [1, 1, H, W, D], ground truth labels.
         label_set: the label list for the specific dataset. Note if 0 is included in label_set,
             it will be added into automatic branch training. Recommend removing 0 from label_set
-            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset. 
-            The reason is region with 0 in one partially labeled dataset may contain foregrounds in 
-            another dataset. 
+            for multi-partially-labeled-dataset training, and adding 0 for finetuning specific dataset.
+            The reason is region with 0 in one partially labeled dataset may contain foregrounds in
+            another dataset.
         max_prompt: int, max number of total prompt, including foreground and background.
         max_foreprompt: int, max number of prompt from foreground.
         max_backprompt: int, max number of prompt from background.
         max_point: maximum number of points for each object.
         include_background: if include 0 into training prompt. If included, background 0 is treated
-            the same as foreground and points will be sampled. Can be true only if user want to segment 
-            background 0 with point clicks, otherwise always be false. 
+            the same as foreground and points will be sampled. Can be true only if user want to segment
+            background 0 with point clicks, otherwise always be false.
         drop_label_prob: probability to drop label prompt.
         drop_point_prob: probability to drop point prompt.
         point_sampler: sampler to augment masks with supervoxel.

From a4920306489eb6b2c6f90ec38ca5ea57c55f8daf Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:45:05 +0800
Subject: [PATCH 3/9] fix doc build

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 6ede500997..ff9f99542e 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -80,7 +80,7 @@ def sample_prompt_pairs(
         point_label: [B, N]. The corresponding point labels for each point (negative or positive).
             -1 is used for padding the background label prompt and will be ignored.
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
-        label_prompt can be None, and prompt_class is used to identify point classes.
+            label_prompt can be None, and prompt_class is used to identify point classes.
     """
 
     # class label number

From 67341661fb0866e8f8d1cb26f7317c2943a1146a Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:55:47 +0800
Subject: [PATCH 4/9] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ff9f99542e..05c69b0307 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -22,6 +22,7 @@
 
 __all__ = ["sample_prompt_pairs"]
 
+
 ENABLE_SPECIAL = True
 SPECIAL_INDEX = (23, 24, 25, 26, 27, 57, 128)
 MERGE_LIST = {
@@ -82,7 +83,6 @@ def sample_prompt_pairs(
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
             label_prompt can be None, and prompt_class is used to identify point classes.
     """
-
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")

From fc9d4c3b9b89ac63080084da5acb96869d24126e Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:24:40 +0800
Subject: [PATCH 5/9] fix doc

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 05c69b0307..21a0ac54b5 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -20,9 +20,6 @@
 import torch
 from torch import Tensor
 
-__all__ = ["sample_prompt_pairs"]
-
-
 ENABLE_SPECIAL = True
 SPECIAL_INDEX = (23, 24, 25, 26, 27, 57, 128)
 MERGE_LIST = {
@@ -31,6 +28,8 @@
     132: [57],  # overlap with trachea merge into airway
 }
 
+__all__ = ["sample_prompt_pairs"]
+
 
 def _get_point_label(id: int) -> tuple[int, int]:
     if id in SPECIAL_INDEX and ENABLE_SPECIAL:
@@ -83,6 +82,7 @@ def sample_prompt_pairs(
         prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
             label_prompt can be None, and prompt_class is used to identify point classes.
     """
+
     # class label number
     if not labels.shape[0] == 1:
         raise ValueError("only support batch size 1")

From 4f781aacf7b62df38b803fb68f5aa6ea70763862 Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:41:32 +0800
Subject: [PATCH 6/9] fix doc

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 docs/requirements.txt         | 1 -
 monai/apps/vista3d/sampler.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index ff94f7b6de..fc72be6b9e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -35,7 +35,6 @@ pydicom
 h5py
 nni; platform_system == "Linux"
 optuna
-opencv-python-headless
 onnx>=1.13.0
 onnxruntime; python_version <= '3.10'
 zarr
diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 21a0ac54b5..80927f190e 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -37,7 +37,6 @@ def _get_point_label(id: int) -> tuple[int, int]:
     else:
         return 0, 1
 
-
 def sample_prompt_pairs(
     labels: Tensor,
     label_set: Sequence[int],

From d643d703b653f565b8b6c7023ea74d82e669ef2f Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:19:29 +0800
Subject: [PATCH 7/9] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 docs/requirements.txt         |  2 ++
 monai/apps/vista3d/sampler.py | 20 +++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index fc72be6b9e..7307d8e5f9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -35,9 +35,11 @@ pydicom
 h5py
 nni; platform_system == "Linux"
 optuna
+opencv-python-headless
 onnx>=1.13.0
 onnxruntime; python_version <= '3.10'
 zarr
 huggingface_hub
 pyamg>=5.0.0
 packaging
+polygraphy
diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index 80927f190e..ec84fafcf6 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -73,13 +73,19 @@ def sample_prompt_pairs(
         point_sampler_kwargs: arguments for point_sampler.
 
     Returns:
-        label_prompt: [B, 1]. The classes used for training automatic segmentation.
-        point: [B, N, 3]. The corresponding points for each class.
-            Note that background label prompt requires matching point as well ([0,0,0] is used).
-        point_label: [B, N]. The corresponding point labels for each point (negative or positive).
-            -1 is used for padding the background label prompt and will be ignored.
-        prompt_class: [B, 1], exactly the same with label_prompt for label indexing for training loss.
-            label_prompt can be None, and prompt_class is used to identify point classes.
+        tuple:
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+              training automatic segmentation.
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
+              for each class. Note that background label prompts require matching points as well 
+              (e.g., [0, 0, 0] is used).
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
+              labels for each point (negative or positive). -1 is used for padding the background 
+              label prompt and will be ignored.
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
+              for label indexing during training. If label_prompt is None, prompt_class is used to 
+              identify point classes.
+
     """
 
     # class label number

From af53f6fe6f56ddc563e7c9de78c6ec256a4940b9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 05:20:02 +0000
Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 monai/apps/vista3d/sampler.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ec84fafcf6..c1429e1d53 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -74,16 +74,16 @@ def sample_prompt_pairs(
 
     Returns:
         tuple:
-            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for
               training automatic segmentation.
-            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
-              for each class. Note that background label prompts require matching points as well 
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points
+              for each class. Note that background label prompts require matching points as well
               (e.g., [0, 0, 0] is used).
-            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
-              labels for each point (negative or positive). -1 is used for padding the background 
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point
+              labels for each point (negative or positive). -1 is used for padding the background
               label prompt and will be ignored.
-            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
-              for label indexing during training. If label_prompt is None, prompt_class is used to 
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt
+              for label indexing during training. If label_prompt is None, prompt_class is used to
               identify point classes.
 
     """

From 3e0d115d93fe22779c02ab2c91a1bae2dafc3773 Mon Sep 17 00:00:00 2001
From: YunLiu <55491388+KumoLiu@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:11:59 +0800
Subject: [PATCH 9/9] fix format

Signed-off-by: YunLiu <55491388+KumoLiu@users.noreply.github.com>
---
 monai/apps/vista3d/sampler.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/monai/apps/vista3d/sampler.py b/monai/apps/vista3d/sampler.py
index ec84fafcf6..17b2d34911 100644
--- a/monai/apps/vista3d/sampler.py
+++ b/monai/apps/vista3d/sampler.py
@@ -37,6 +37,7 @@ def _get_point_label(id: int) -> tuple[int, int]:
     else:
         return 0, 1
 
+
 def sample_prompt_pairs(
     labels: Tensor,
     label_set: Sequence[int],
@@ -74,16 +75,16 @@ def sample_prompt_pairs(
 
     Returns:
         tuple:
-            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for 
+            - label_prompt (Tensor | None): Tensor of shape [B, 1] containing the classes used for
               training automatic segmentation.
-            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points 
-              for each class. Note that background label prompts require matching points as well 
+            - point (Tensor | None): Tensor of shape [B, N, 3] representing the corresponding points
+              for each class. Note that background label prompts require matching points as well
               (e.g., [0, 0, 0] is used).
-            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point 
-              labels for each point (negative or positive). -1 is used for padding the background 
+            - point_label (Tensor | None): Tensor of shape [B, N] representing the corresponding point
+              labels for each point (negative or positive). -1 is used for padding the background
               label prompt and will be ignored.
-            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt 
-              for label indexing during training. If label_prompt is None, prompt_class is used to 
+            - prompt_class (Tensor | None): Tensor of shape [B, 1], exactly the same as label_prompt
+              for label indexing during training. If label_prompt is None, prompt_class is used to
               identify point classes.
 
     """