Project-MONAI · yiheng-wang-nv · Aug 9, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 7, 2024
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -101,6 +101,7 @@ jobs:
         python -m pip install --pre -U itk
     - name: Install the dependencies
       run: |
+        find  /opt/hostedtoolcache/* -maxdepth 0 ! -name 'Python' -exec rm -rf {} \;
         python -m pip install --user --upgrade pip wheel
         python -m pip install torch==1.13.1 torchvision==0.14.1
         cat "requirements-dev.txt"

diff --git a/monai/networks/blocks/mlp.py b/monai/networks/blocks/mlp.py
@@ -11,12 +11,15 @@
 
 from __future__ import annotations
 
+from typing import Union
+
 import torch.nn as nn
 
 from monai.networks.layers import get_act_layer
+from monai.networks.layers.factories import split_args
 from monai.utils import look_up_option
 
-SUPPORTED_DROPOUT_MODE = {"vit", "swin"}
+SUPPORTED_DROPOUT_MODE = {"vit", "swin", "vista3d"}
 
 
 class MLPBlock(nn.Module):
@@ -39,7 +42,7 @@ def __init__(
                 https://github.com/google-research/vision_transformer/blob/main/vit_jax/models.py#L87
                 "swin" corresponds to one instance as implemented in
                 https://github.com/microsoft/Swin-Transformer/blob/main/models/swin_mlp.py#L23
-
+                "vista3d" mode does not use dropout.
 
         """
 
@@ -48,15 +51,24 @@ def __init__(
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
         mlp_dim = mlp_dim or hidden_size
-        self.linear1 = nn.Linear(hidden_size, mlp_dim) if act != "GEGLU" else nn.Linear(hidden_size, mlp_dim * 2)
+        act_name, _ = split_args(act)
+        self.linear1 = nn.Linear(hidden_size, mlp_dim) if act_name != "GEGLU" else nn.Linear(hidden_size, mlp_dim * 2)
         self.linear2 = nn.Linear(mlp_dim, hidden_size)
         self.fn = get_act_layer(act)
-        self.drop1 = nn.Dropout(dropout_rate)
+        # Use Union[nn.Dropout, nn.Identity] for type annotations
+        self.drop1: Union[nn.Dropout, nn.Identity]
+        self.drop2: Union[nn.Dropout, nn.Identity]
+
         dropout_opt = look_up_option(dropout_mode, SUPPORTED_DROPOUT_MODE)
         if dropout_opt == "vit":
+            self.drop1 = nn.Dropout(dropout_rate)
             self.drop2 = nn.Dropout(dropout_rate)
         elif dropout_opt == "swin":
+            self.drop1 = nn.Dropout(dropout_rate)
             self.drop2 = self.drop1
+        elif dropout_opt == "vista3d":
+            self.drop1 = nn.Identity()
+            self.drop2 = nn.Identity()
         else:
             raise ValueError(f"dropout_mode should be one of {SUPPORTED_DROPOUT_MODE}")
 

diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
@@ -22,6 +22,7 @@
 
 import numpy as np
 import torch
+import torch.nn.functional as F
 
 import monai
 from monai.config import DtypeLike, IndexSelection
@@ -65,6 +66,8 @@
     min_version,
     optional_import,
     pytorch_after,
+    unsqueeze_right,
+    unsqueeze_left
 )
 from monai.utils.enums import TransformBackends
 from monai.utils.type_conversion import (
@@ -103,6 +106,10 @@
     "generate_spatial_bounding_box",
     "get_extreme_points",
     "get_largest_connected_component_mask",
+    "get_largest_connected_component_mask_point",
+    "sample_points_from_label",
+    "erode3d",
+    "sample"
     "remove_small_objects",
     "img_bounds",
     "in_bounds",
@@ -1171,6 +1178,176 @@ def get_largest_connected_component_mask(
 
     return convert_to_dst_type(out, dst=img, dtype=out.dtype)[0]
 
+def get_largest_connected_component_mask_point(
+    img_pos: NdarrayTensor,
+    img_neg: NdarrayTensor,
+    pos_val: list=[1, 3],
+    neg_val: list=[0, 2],
+    point_coords: None = None,
+    point_labels: None = None,
+    margins: int = 3,
+) -> NdarrayTensor:
+    """
+    Gets the largest connected component mask of an image that include the point_coords.
+    Args:
+        img_pos: [1, B, H, W, D]
+        point_coords [B, N, 3]
+        point_labels [B, N]
+    """
+
+    img_pos_, *_ = convert_data_type(img_pos, np.ndarray)
+    img_neg_, *_ = convert_data_type(img_neg, np.ndarray)
+    label = measure.label
+    lib = np
+
+    features_pos, num_features = label(img_pos_, connectivity=3, return_num=True)
+    features_neg, num_features = label(img_neg_, connectivity=3, return_num=True)
+
+    outs = np.zeros_like(img_pos_)
+    for bs in range(point_coords.shape[0]):
+        for i, p in enumerate(point_coords[bs]):
+            if point_labels[bs, i] in pos_val:
+                features = features_pos
+            elif point_labels[bs, i] in neg_val:
+                features = features_neg
+            else:
+                # if -1 padding point, skip
+                continue
+            for margin in range(margins):
+                x, y, z = p.round().int().tolist()
+                l, r = max(x - margin, 0), min(x + margin + 1, features.shape[-3])
+                t, d = max(y - margin, 0), min(y + margin + 1, features.shape[-2])
+                f, b = max(z - margin, 0), min(z + margin + 1, features.shape[-1])
+                if (features[bs, 0, l:r, t:d, f:b] > 0).any():
+                    index = features[bs, 0, l:r, t:d, f:b].max()
+                    outs[[bs]] += lib.isin(features[[bs]], index)
+                    break
+    outs[outs > 1] = 1
+    return convert_to_dst_type(outs, dst=img_pos, dtype=outs.dtype)[0]
+
+def convert_points_to_disc(image_size, point, point_label, radius=2, disc=False):
+    """
+    Convert a 3D point coordinates into image mask. The returned mask has the same spatial
+    size as `image_size` while the batch dimension is the same as point' batch dimension.
+    The point is converted to a mask ball with radius defined by `radius`. The output
+    contains two channels each for negative (first channel) and positive points.
+    Args:
+        image_size: The output size of th
+        point: [b, N, 3]
+        point_label: [b, N], 0 or 2 means negative points, 1 or 3 means postive points.
+        radius: disc ball radius size
+        disc: If true, use regular disc other other use gaussian.
+    """
+    if not torch.is_tensor(point):
+        point = torch.from_numpy(point)
+    masks = torch.zeros(
+        [point.shape[0], 2, image_size[0], image_size[1], image_size[2]],
+        device=point.device,
+    )
+    _array = [torch.arange(
+        start=0, end=image_size[i], step=1, dtype=torch.float32, device=point.device
+    ) for i in range(3)]
+    coord_rows, coord_cols, coord_z = torch.meshgrid(_array[2], _array[1], _array[0])
+    # [1, 3, h, w, d] -> [b, 2, 3, h, w, d]
+    coords = unsqueeze_left(torch.stack((coord_rows, coord_cols, coord_z), dim=0), 6)
+    coords = coords.repeat(point.shape[0], 2, 1, 1, 1, 1)
+    for b in range(point.shape[0]):
+        for n in range(point.shape[1]):
+            point_bn = unsqueeze_right(point[b, n], 6)
+            if point_label[b, n] > -1:
+                channel = 0 if (point_label[b, n] == 0 or point_label[b, n] == 2) else 1
+                pow_diff = torch.pow(coords[b, channel] - point_bn[b, n], 2)
+                if disc:
+                    masks[b, channel] += pow_diff.sum(0) < radius**2
+                else:
+                    masks[b, channel] += torch.exp(-pow_diff.sum(0) / (2 * radius**2))
+    return masks
+
+def sample_points_from_label(
+    labels, label_set=None, max_ppoint=1, max_npoint=0, device="cpu", use_center=False
+):
+    """Sample points from labels.
+    Args:
+        labels: [1, 1, H, W, D]
+        label_set: local index, must match values in labels.
+        max_ppoint: maximum positive point samples.
+        max_npoint: maximum negative point samples.
+        device: returned tensor device.
+        use_center: whether to sample points from center.
+    Returns:
+        point: point coordinates of [B, N, 3].
+        point_label: [B, N], always 0 for negative, 1 for positive.
+    """
+    assert labels.shape[0] == 1, "only support batch size 1"
+    labels = labels[0, 0]
+    unique_labels = labels.unique().cpu().numpy().tolist()
+    _point = []
+    _point_label = []
+    for id in label_set:
+        if id in unique_labels:
+            plabels = labels == int(id)
+            nlabels = ~plabels
+            _plabels = get_largest_connected_component_mask(erode3d(plabels))
+            plabelpoints = torch.nonzero(_plabels).to(device)
+            if len(plabelpoints) == 0:
+                plabelpoints = torch.nonzero(plabels).to(device)
+            nlabelpoints = torch.nonzero(nlabels).to(device)
+            Np = min(len(plabelpoints), max_ppoint)
+            Nn = min(len(nlabelpoints), max_npoint)
+            pad = max_ppoint + max_npoint - Np - Nn
+            if use_center:
+                pmean = plabelpoints.float().mean(0)
+                pdis = ((plabelpoints - pmean) ** 2).sum(-1)
+                _, sorted_indices = torch.sort(pdis)
+            else:
+                sorted_indices = list(range(len(plabelpoints)))
+                random.shuffle(sorted_indices)
+            _point.append(
+                torch.stack([plabelpoints[sorted_indices[i]] for i in Np]
+                    + random.choices(nlabelpoints, k=Nn)
+                    + [torch.tensor([0, 0, 0], device=device)] * pad
+                    )
+                )
+            _point_label.append(
+                torch.tensor([1] * Np + [0] * Nn + [-1] * pad).to(device))
+        else:
+            # pad the background labels
+            _point.append(torch.zeros(max_ppoint + max_npoint, 3).to(device))
+            _point_label.append(torch.zeros(max_ppoint + max_npoint).to(device) - 1)
+    point = torch.stack(_point)
+    point_label = torch.stack(_point_label)
+    return point, point_label
+
+def erode3d(input_tensor, erosion=3):
+    # Define the structuring element
+    erosion = ensure_tuple_rep(erosion, 3)
+    structuring_element = torch.ones(1, 1, erosion[0], erosion[1], erosion[2]).to(
+        input_tensor.device
+    )
+
+    # Pad the input tensor to handle border pixels
+    input_padded = F.pad(
+        input_tensor.float().unsqueeze(0).unsqueeze(0),
+        (
+            erosion[2] // 2,
+            erosion[2] // 2,
+            erosion[1] // 2,
+            erosion[1] // 2,
+            erosion[0] // 2,
+            erosion[0] // 2,
+        ),
+        mode="constant",
+        value=1.0,
+    )
+
+    # Apply erosion operation
+    output = F.conv3d(input_padded, structuring_element, padding=0)
+
+    # Set output values based on the minimum value within the structuring element
+    output = torch.where(output == torch.sum(structuring_element), 1.0, 0.0)
+
+    return output.squeeze(0).squeeze(0)
+
 
 def remove_small_objects(
     img: NdarrayTensor,

diff --git a/tests/test_mlp.py b/tests/test_mlp.py
@@ -15,10 +15,12 @@
 
 import numpy as np
 import torch
+import torch.nn as nn
 from parameterized import parameterized
 
 from monai.networks import eval_mode
 from monai.networks.blocks.mlp import MLPBlock
+from monai.networks.layers.factories import split_args
 
 TEST_CASE_MLP = []
 for dropout_rate in np.linspace(0, 1, 4):
@@ -31,6 +33,14 @@
             ]
             TEST_CASE_MLP.append(test_case)
 
+# test different activation layers
+TEST_CASE_ACT = []
+for act in ["GELU", "GEGLU", ("GEGLU", {})]:  # type: ignore
+    TEST_CASE_ACT.append([{"hidden_size": 128, "mlp_dim": 0, "act": act}, (2, 512, 128), (2, 512, 128)])
+
+# test different dropout modes
+TEST_CASE_DROP = [["vit", nn.Dropout], ["swin", nn.Dropout], ["vista3d", nn.Identity]]
+
 
 class TestMLPBlock(unittest.TestCase):
 
@@ -45,6 +55,24 @@ def test_ill_arg(self):
         with self.assertRaises(ValueError):
             MLPBlock(hidden_size=128, mlp_dim=512, dropout_rate=5.0)
 
+    @parameterized.expand(TEST_CASE_ACT)
+    def test_act(self, input_param, input_shape, expected_shape):
+        net = MLPBlock(**input_param)
+        with eval_mode(net):
+            result = net(torch.randn(input_shape))
+            self.assertEqual(result.shape, expected_shape)
+        act_name, _ = split_args(input_param["act"])
+        if act_name == "GEGLU":
+            self.assertEqual(net.linear1.in_features, net.linear1.out_features // 2)
+        else:
+            self.assertEqual(net.linear1.in_features, net.linear1.out_features)
+
+    @parameterized.expand(TEST_CASE_DROP)
+    def test_dropout_mode(self, dropout_mode, dropout_layer):
+        net = MLPBlock(hidden_size=128, mlp_dim=512, dropout_rate=0.1, dropout_mode=dropout_mode)
+        self.assertTrue(isinstance(net.drop1, dropout_layer))
+        self.assertTrue(isinstance(net.drop2, dropout_layer))
+
 
 if __name__ == "__main__":
     unittest.main()