From de9cbaf00b563931e027ec42cf3f5f9526538522 Mon Sep 17 00:00:00 2001
From: gs-olive <113141689+gs-olive@users.noreply.github.com>
Date: Tue, 6 Dec 2022 18:42:15 -0800
Subject: [PATCH 1/2] fix: Bug in `aten::where` with differing-shape inputs

- Behavior of Torch-TRT differing from that of Torch in the case where
the input tensors to `aten::where` have different rank
- Torch automatically broadcasts tensors to the highest-rank variant
whereas the TRT Select layer requires tensors of the same rank and
throws an error
- Add dimension checking and unsqueeze operator to ensure broadcasting
is enabled
- Add test case to catch error
---
 core/conversion/converters/impl/select.cpp    | 33 +++++++++++++++++++
 .../conversion/converters/test_select.cpp     | 32 ++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/core/conversion/converters/impl/select.cpp b/core/conversion/converters/impl/select.cpp
index 5877e13210..2041b7a226 100644
--- a/core/conversion/converters/impl/select.cpp
+++ b/core/conversion/converters/impl/select.cpp
@@ -736,8 +736,41 @@ auto select_registrations TORCHTRT_UNUSED =
             {"aten::where.self(Tensor condition, Tensor self, Tensor other) -> (Tensor)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto condition = args[0].ITensorOrFreeze(ctx);
+               auto c_nbDims = condition->getDimensions().nbDims;
                auto x = args[1].ITensorOrFreeze(ctx);
+               auto x_nbDims = x->getDimensions().nbDims;
                auto y = args[2].ITensorOrFreeze(ctx);
+               auto y_nbDims = y->getDimensions().nbDims;
+
+               // Get maximum rank of all input tensors
+               auto max_nbDims = std::max(c_nbDims, std::max(x_nbDims, y_nbDims));
+
+               // TensorRT requires all inputs to Select layers to have the same rank, so for each
+               // tensor input, ensure that its rank is equal to the maximum number of dimensions
+               // If not, left-pad the tensor dimension with 1s until the max rank is achieved
+               auto add_reshape = [&ctx, &max_nbDims](nvinfer1::ITensor*& tensor) {
+                 nvinfer1::Dims dimensions = tensor->getDimensions();
+
+                 // If the rank of this tensor is smaller than the max rank, use reshape
+                 if (dimensions.nbDims < max_nbDims) {
+                   auto shuffle_layer = ctx->net->addShuffle(*tensor);
+
+                   // For each dimension from the rank of the smaller tensor to the max rank,
+                   // unsqueeze dimensions by 1
+                   for (auto i = dimensions.nbDims; i < max_nbDims; i++) {
+                     dimensions = util::unsqueezeDims(dimensions, 0, 1, false);
+                   }
+
+                   // Reshape to the unsqueezed dimensions
+                   shuffle_layer->setReshapeDimensions(dimensions);
+                   tensor = shuffle_layer->getOutput(0);
+                 }
+               };
+
+               // Apply reshape to each tensor input
+               add_reshape(condition);
+               add_reshape(x);
+               add_reshape(y);
 
                auto layer = ctx->net->addSelect(*condition, *x, *y);
 
diff --git a/tests/core/conversion/converters/test_select.cpp b/tests/core/conversion/converters/test_select.cpp
index 40c5f11843..0e007271ec 100644
--- a/tests/core/conversion/converters/test_select.cpp
+++ b/tests/core/conversion/converters/test_select.cpp
@@ -1224,3 +1224,35 @@ TEST(Converters, WhereConvertsCorrectly) {
 
   ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
 }
+
+TEST(Converters, WhereConvertsMismatchedShapesCorrectly) {
+  const auto graph = R"IR(
+        graph(%condition : Tensor,
+              %x : Tensor,
+              %y : Tensor):
+          %out : Tensor = aten::where(%condition, %x, %y)
+          return (%out))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+
+  torch::jit::parseIR(graph, g.get());
+
+  // As per Torch behavior, the input Tensors are expected to be broadcasted
+  // along their respective dimension in the largest-rank Tensor provided
+  auto condition = at::randint(0, 2, {7, 5}, {at::kCUDA}).to(torch::kBool);
+  auto x = at::randn({2, 7, 5}, {at::kCUDA});
+  auto y = at::randn({5}, {at::kCUDA});
+
+  auto jit_condition = at::clone(condition);
+  auto jit_x = at::clone(x);
+  auto jit_y = at::clone(y);
+  auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
+  auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y});
+
+  auto trt_condition = at::clone(condition);
+  auto trt_x = at::clone(x);
+  auto trt_y = at::clone(y);
+  auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y});
+
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}

From c4f36cb40f746b43f27dcb1196e9161553f60446 Mon Sep 17 00:00:00 2001
From: gs-olive <113141689+gs-olive@users.noreply.github.com>
Date: Thu, 8 Dec 2022 11:37:40 -0800
Subject: [PATCH 2/2] Refactor `aten::where` implementation to use `addPadding`

---
 core/conversion/converters/impl/select.cpp | 31 +++++-----------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/core/conversion/converters/impl/select.cpp b/core/conversion/converters/impl/select.cpp
index 2041b7a226..6f4bbb9d67 100644
--- a/core/conversion/converters/impl/select.cpp
+++ b/core/conversion/converters/impl/select.cpp
@@ -736,41 +736,22 @@ auto select_registrations TORCHTRT_UNUSED =
             {"aten::where.self(Tensor condition, Tensor self, Tensor other) -> (Tensor)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto condition = args[0].ITensorOrFreeze(ctx);
-               auto c_nbDims = condition->getDimensions().nbDims;
+               auto condition_nbDims = condition->getDimensions().nbDims;
                auto x = args[1].ITensorOrFreeze(ctx);
                auto x_nbDims = x->getDimensions().nbDims;
                auto y = args[2].ITensorOrFreeze(ctx);
                auto y_nbDims = y->getDimensions().nbDims;
 
                // Get maximum rank of all input tensors
-               auto max_nbDims = std::max(c_nbDims, std::max(x_nbDims, y_nbDims));
+               auto max_nbDims = std::max(condition_nbDims, std::max(x_nbDims, y_nbDims));
 
                // TensorRT requires all inputs to Select layers to have the same rank, so for each
                // tensor input, ensure that its rank is equal to the maximum number of dimensions
                // If not, left-pad the tensor dimension with 1s until the max rank is achieved
-               auto add_reshape = [&ctx, &max_nbDims](nvinfer1::ITensor*& tensor) {
-                 nvinfer1::Dims dimensions = tensor->getDimensions();
-
-                 // If the rank of this tensor is smaller than the max rank, use reshape
-                 if (dimensions.nbDims < max_nbDims) {
-                   auto shuffle_layer = ctx->net->addShuffle(*tensor);
-
-                   // For each dimension from the rank of the smaller tensor to the max rank,
-                   // unsqueeze dimensions by 1
-                   for (auto i = dimensions.nbDims; i < max_nbDims; i++) {
-                     dimensions = util::unsqueezeDims(dimensions, 0, 1, false);
-                   }
-
-                   // Reshape to the unsqueezed dimensions
-                   shuffle_layer->setReshapeDimensions(dimensions);
-                   tensor = shuffle_layer->getOutput(0);
-                 }
-               };
-
-               // Apply reshape to each tensor input
-               add_reshape(condition);
-               add_reshape(x);
-               add_reshape(y);
+               condition =
+                   addPadding(ctx, n, condition, max_nbDims, /*bool trailing =*/false, /*bool use_zeros =*/false);
+               x = addPadding(ctx, n, x, max_nbDims, /*bool trailing =*/false, /*bool use_zeros =*/false);
+               y = addPadding(ctx, n, y, max_nbDims, /*bool trailing =*/false, /*bool use_zeros =*/false);
 
                auto layer = ctx->net->addSelect(*condition, *x, *y);