Skip to content

Commit

Permalink
Merge pull request #792 from guoruoqian/fix_pooling
Browse files Browse the repository at this point in the history
Feat: support aten::adaptive_max_pool1d, aten::adaptive_avg_pool3d and aten::adaptive_max_pool3d operators and fix issue #791
  • Loading branch information
narendasan authored Jan 31, 2022
2 parents 726b031 + 143fc3b commit 0ac503e
Show file tree
Hide file tree
Showing 3 changed files with 278 additions and 19 deletions.
47 changes: 28 additions & 19 deletions core/conversion/converters/impl/pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@ bool GlobalPoolingConverter(
nvinfer1::PoolingType pool_type) {
auto in = args[0].ITensorOrFreeze(ctx);
nvinfer1::Dims dims = in->getDimensions();
// Generate a bitmask of all 1s except the last 2 bits (N and C axes)
// Generate a bitmask of all 1s except the last 2 bits (N and C axes) when dims.nbDims > 2
uint32_t reduceAxes = ((1 << dims.nbDims) - 1) & ~0b11;
// Generate a bitmask of all 1s except the last 1 bits (N axes) when dims.nbDims == 2. `aten::adaptive_avg_pool1d`'s
// input can be (N, C, L) or (C, L).
if (dims.nbDims == 2) {
reduceAxes = ((1 << dims.nbDims) - 1) & ~0b1;
}
auto* new_layer = ctx->net->addReduce(
*in,
pool_type == nvinfer1::PoolingType::kMAX ? nvinfer1::ReduceOperation::kMAX : nvinfer1::ReduceOperation::kAVG,
Expand All @@ -36,7 +41,8 @@ bool AdaptivePoolingConverter(
ConversionCtx* ctx,
const torch::jit::Node* n,
args& args,
nvinfer1::PoolingType pool_type) {
nvinfer1::PoolingType pool_type,
const std::string& mode) {
auto in = args[0].ITensorOrFreeze(ctx);
auto out_size = util::toDims(args[1].unwrapToIntList());

Expand All @@ -47,15 +53,7 @@ bool AdaptivePoolingConverter(
}

auto orig_dims = in->getDimensions();
bool expandDims = (orig_dims.nbDims < 4);
TORCHTRT_CHECK(orig_dims.nbDims > 2, "Unable to create pooling layer from node: " << *n);
if (expandDims) {
in = addPadding(ctx, n, in, 4, false, false);
}

if (out_size.nbDims == 1) {
out_size = util::unsqueezeDims(out_size, 0, 1);
}
TORCHTRT_CHECK(orig_dims.nbDims > 1, "Unable to create pooling layer from node: " << *n);

auto in_shape = util::toVec(in->getDimensions());
nvinfer1::ILayer* new_layer = nullptr;
Expand Down Expand Up @@ -89,10 +87,6 @@ bool AdaptivePoolingConverter(
int32_t use_scales_casted = 0;
f.emplace_back(nvinfer1::PluginField("use_scales", &use_scales_casted, nvinfer1::PluginFieldType::kINT32, 1));

std::string mode = "adaptive_avg_pool2d";
if (pool_type == nvinfer1::PoolingType::kMAX) {
mode = "adaptive_max_pool2d";
}
f.emplace_back(nvinfer1::PluginField("mode", &mode, nvinfer1::PluginFieldType::kCHAR, 1));

fc.nbFields = f.size();
Expand All @@ -109,7 +103,7 @@ bool AdaptivePoolingConverter(
TORCHTRT_CHECK(new_layer, "Unable to create pooling (interpolation) plugin from node" << *n);

new_layer->setName(util::node_info(n).c_str());
auto layer_output = addUnpadding(ctx, n, new_layer->getOutput(0), orig_dims.nbDims, false, false);
auto layer_output = new_layer->getOutput(0);

ctx->AssociateValueAndTensor(n->outputs()[0], layer_output);
LOG_DEBUG("Output tensor shape: " << layer_output->getDimensions());
Expand Down Expand Up @@ -237,15 +231,30 @@ auto pooling_registrations TORCHTRT_UNUSED =
}})
.pattern({"aten::adaptive_avg_pool1d(Tensor self, int[1] output_size) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kAVERAGE);
return AdaptivePoolingConverter(
ctx, n, args, nvinfer1::PoolingType::kAVERAGE, "adaptive_avg_pool1d");
}})
.pattern({"aten::adaptive_max_pool1d(Tensor self, int[2] output_size) -> (Tensor, Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX, "adaptive_max_pool1d");
}})
.pattern({"aten::adaptive_avg_pool2d(Tensor self, int[2] output_size) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kAVERAGE);
return AdaptivePoolingConverter(
ctx, n, args, nvinfer1::PoolingType::kAVERAGE, "adaptive_avg_pool2d");
}})
.pattern({"aten::adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX);
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX, "adaptive_max_pool2d");
}})
.pattern({"aten::adaptive_avg_pool3d(Tensor self, int[3] output_size) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(
ctx, n, args, nvinfer1::PoolingType::kAVERAGE, "adaptive_avg_pool3d");
}})
.pattern({"aten::adaptive_max_pool3d(Tensor self, int[3] output_size) -> (Tensor, Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
return AdaptivePoolingConverter(ctx, n, args, nvinfer1::PoolingType::kMAX, "adaptive_max_pool3d");
}});
} // namespace
} // namespace impl
Expand Down
8 changes: 8 additions & 0 deletions core/plugins/impl/interpolate_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,18 @@ int InterpolatePlugin::enqueue(
out = at::upsample_bilinear2d(input, {size_[0], size_[1]}, align_corners_);
} else if (mode_ == "trilinear") {
out = at::upsample_trilinear3d(input, {size_[0], size_[1], size_[2]}, align_corners_);
} else if (mode_ == "adaptive_avg_pool1d") {
out = at::adaptive_avg_pool1d(input, {size_[0]});
} else if (mode_ == "adaptive_max_pool1d") {
out = std::get<0>(at::adaptive_max_pool1d(input, {size_[0]}));
} else if (mode_ == "adaptive_avg_pool2d") {
out = at::adaptive_avg_pool2d(input, {size_[0], size_[1]});
} else if (mode_ == "adaptive_max_pool2d") {
out = std::get<0>(at::adaptive_max_pool2d(input, {size_[0], size_[1]}));
} else if (mode_ == "adaptive_avg_pool3d") {
out = at::adaptive_avg_pool3d(input, {size_[0], size_[1], size_[2]});
} else if (mode_ == "adaptive_max_pool3d") {
out = std::get<0>(at::adaptive_max_pool3d(input, {size_[0], size_[1], size_[2]}));
}
}

Expand Down
242 changes: 242 additions & 0 deletions tests/core/conversion/converters/test_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,32 @@ TEST(Converters, ATenAdaptiveAvgPool2DConvertsCorrectly) {
ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveAvgPool2DGlobalPoolingConvertsCorrectly) {
const auto graph = R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=1]()
%3 : int = prim::Constant[value=1]()
%6 : int[] = prim::ListConstruct(%2, %3)
%10 : Tensor = aten::adaptive_avg_pool2d(%0, %6)
return (%10))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch PyTorch adaptive_avg_pool2d needs a 4D input or a 3D input
auto in = at::randint(-5, 5, {64, 16, 32, 32}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveAvgPool2DConvertsCorrectlyWithDynamicInput) {
const auto graph = R"IR(
graph(%0 : Tensor):
Expand Down Expand Up @@ -488,6 +514,110 @@ TEST(Converters, ATenAdaptiveAvgPool1DConvertsCorrectly) {
ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 1.0));
}

TEST(Converters, ATenAdaptiveAvgPool1DGlobalPoolingConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=1]()
%6 : int[] = prim::ListConstruct(%2)
%10 : Tensor = aten::adaptive_avg_pool1d(%0, %6)
return (%10))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_avg_pool1d needs a 3D input or a 2D input
auto in = at::randint(-5, 5, {3, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveAvgPool1DUsingPluginConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=3]()
%6 : int[] = prim::ListConstruct(%2)
%10 : Tensor = aten::adaptive_avg_pool1d(%0, %6)
return (%10))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_avg_pool1d needs a 3D input or a 2D input
auto in = at::randint(-5, 5, {1, 3, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveMaxPool1DGlobalPoolingConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=1]()
%6 : int[] = prim::ListConstruct(%2)
%10 : Tensor, %11 : Tensor = aten::adaptive_max_pool1d(%0, %6)
return (%10, %11))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_max_pool1d needs a 3D input or a 2D input
auto in = at::randint(-5, 5, {1, 3, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveMaxPool1DUsingPluginConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=3]()
%6 : int[] = prim::ListConstruct(%2)
%10 : Tensor, %11 : Tensor = aten::adaptive_max_pool1d(%0, %6)
return (%10, %11))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_max_pool1d needs a 3D input or a 2D input
auto in = at::randint(-5, 5, {1, 3, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveMaxPool2DConvertsCorrectly) {
const auto graph = R"IR(
graph(%0 : Tensor):
Expand Down Expand Up @@ -539,3 +669,115 @@ TEST(Converters, ATenAdaptiveMaxPool2DConvertsCorrectlyWithDynamicInput) {

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveAvgPool3DGlobalPoolingConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=1]()
%3 : int = prim::Constant[value=1]()
%4 : int = prim::Constant[value=1]()
%6 : int[] = prim::ListConstruct(%2, %3, %4)
%10 : Tensor = aten::adaptive_avg_pool3d(%0, %6)
return (%10))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_avg_pool3d needs a 5D input or a 4D input
auto in = at::randint(-5, 5, {4, 5, 3, 15, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveAvgPool3DUsingPluginConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=7]()
%3 : int = prim::Constant[value=6]()
%4 : int = prim::Constant[value=5]()
%6 : int[] = prim::ListConstruct(%2, %3, %4)
%10 : Tensor = aten::adaptive_avg_pool3d(%0, %6)
return (%10))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_avg_pool3d needs a 5D input or a 4D input
auto in = at::randint(-5, 5, {4, 5, 3, 15, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveMaxPool3DGlobalPoolingConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=1]()
%3 : int = prim::Constant[value=1]()
%4 : int = prim::Constant[value=1]()
%6 : int[] = prim::ListConstruct(%2, %3, %4)
%10 : Tensor, %11 : Tensor = aten::adaptive_max_pool3d(%0, %6)
return (%10, %11))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_max_pool3d needs a 5D input or a 4D input
auto in = at::randint(-5, 5, {5, 3, 15, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

TEST(Converters, ATenAdaptiveMaxPool3DUsingPluginConvertsCorrectly) {
const auto graph =
R"IR(
graph(%0 : Tensor):
%2 : int = prim::Constant[value=7]()
%3 : int = prim::Constant[value=8]()
%4 : int = prim::Constant[value=9]()
%6 : int[] = prim::ListConstruct(%2, %3, %4)
%10 : Tensor, %11 : Tensor = aten::adaptive_max_pool3d(%0, %6)
return (%10, %11))IR";

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

// PyTorch adaptive_max_pool3d needs a 5D input or a 4D input
auto in = at::randint(-5, 5, {4, 5, 3, 15, 16}, at::kCUDA);

auto jit_in = at::clone(in);
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in});

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in});

ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
}

0 comments on commit 0ac503e

Please sign in to comment.