diff --git a/cinn/frontend/net_builder.cc b/cinn/frontend/net_builder.cc index 402af7e95b..4bf76eea63 100644 --- a/cinn/frontend/net_builder.cc +++ b/cinn/frontend/net_builder.cc @@ -95,6 +95,14 @@ Variable NetBuilder::Cast(const Variable& operand, const std::string& dtype) { return instr.GetOutput(0); } +Variable NetBuilder::Squeeze(const Variable& operand, const std::vector& axes) { + Instruction instr("squeeze", {operand}); + instr.SetAttr("axes", axes); + InferShape(instr); + AppendInstruction(instr); + return instr.GetOutput(0); +} + Variable NetBuilder::Conv2d(const Variable& a, const Variable& b, const std::vector& strides, diff --git a/cinn/frontend/net_builder.h b/cinn/frontend/net_builder.h index 07d6f93453..e4c7355d18 100755 --- a/cinn/frontend/net_builder.h +++ b/cinn/frontend/net_builder.h @@ -97,6 +97,11 @@ class NetBuilder : public BaseBuilder { */ Variable Cast(const Variable& operand, const std::string& dtype); + /** + * Squeeze Variable x along the given axes. + */ + Variable Squeeze(const Variable& operand, const std::vector& axes); + /** * The convolution2D layer calculates the output based on the input, filter * and strides, paddings, dilations, groups parameters. diff --git a/cinn/frontend/net_builder_test.cc b/cinn/frontend/net_builder_test.cc index 6effa13184..5ea3422c35 100644 --- a/cinn/frontend/net_builder_test.cc +++ b/cinn/frontend/net_builder_test.cc @@ -293,5 +293,165 @@ TEST(net_build, program_execute_cast) { } } +TEST(net_build, program_execute_squeeze_case1) { + const int B = 4; + const int C = 1; + const int H = 7; + const int W = 1; + + NetBuilder builder("net_builder"); + Placeholder input = builder.CreateInput(Float(32), {B, C, H, W}, "In"); + Variable output = builder.Squeeze(input, {1}); + auto program = builder.Build(); + + Target target = common::DefaultHostTarget(); + + auto graph = std::make_shared(program, target); + auto scope = BuildScope(target, graph); + hlir::framework::GraphCompiler gc(target, scope, graph); + auto runtime_program = gc.Build(); + + scope->Var(std::string(input.id())); + scope->Var(std::string(output->id)); + + auto input_tensor = scope->GetTensor(std::string(input.id())); + SetRandData(input_tensor, target); + float* input_data = input_tensor->mutable_data(target); + + runtime_program->Execute(); + + auto output_tensor = scope->GetTensor(std::string(output->id)); + const std::vector& output_shape = output_tensor->shape().data(); + EXPECT_EQ(output_shape.size(), 3UL); + EXPECT_EQ(output_shape[0], B); + EXPECT_EQ(output_shape[1], H); + EXPECT_EQ(output_shape[2], W); + + float* output_data = output_tensor->mutable_data(target); + VLOG(6) << "Visualize output_data"; + for (int b = 0; b < B; ++b) { + for (int c = 0; c < C; ++c) { + VLOG(6) << "b = " << b << ", c = " << c; + for (int h = 0; h < H; ++h) { + std::string line; + for (int w = 0; w < W; ++w) { + int index = w + W * (h + H * (c + C * b)); + float in_data = input_data[index]; + float out_data = output_data[index]; + line += (std::to_string(out_data) + ", "); + EXPECT_EQ(in_data, out_data); + } + VLOG(6) << line; + } + } + } +} + +TEST(net_build, program_execute_squeeze_case2) { + const int B = 4; + const int C = 1; + const int H = 7; + const int W = 1; + + NetBuilder builder("net_builder"); + Placeholder input = builder.CreateInput(Float(32), {B, C, H, W}, "In"); + Variable output = builder.Squeeze(input, {1, 3}); + auto program = builder.Build(); + + Target target = common::DefaultHostTarget(); + + auto graph = std::make_shared(program, target); + auto scope = BuildScope(target, graph); + hlir::framework::GraphCompiler gc(target, scope, graph); + auto runtime_program = gc.Build(); + + scope->Var(std::string(input.id())); + scope->Var(std::string(output->id)); + + auto input_tensor = scope->GetTensor(std::string(input.id())); + SetRandData(input_tensor, target); + float* input_data = input_tensor->mutable_data(target); + + runtime_program->Execute(); + + auto output_tensor = scope->GetTensor(std::string(output->id)); + const std::vector& output_shape = output_tensor->shape().data(); + EXPECT_EQ(output_shape.size(), 2UL); + EXPECT_EQ(output_shape[0], B); + EXPECT_EQ(output_shape[1], H); + + float* output_data = output_tensor->mutable_data(target); + VLOG(6) << "Visualize output_data"; + for (int b = 0; b < B; ++b) { + for (int c = 0; c < C; ++c) { + VLOG(6) << "b = " << b << ", c = " << c; + for (int h = 0; h < H; ++h) { + std::string line; + for (int w = 0; w < W; ++w) { + int index = w + W * (h + H * (c + C * b)); + float in_data = input_data[index]; + float out_data = output_data[index]; + line += (std::to_string(out_data) + ", "); + EXPECT_EQ(in_data, out_data); + } + VLOG(6) << line; + } + } + } +} + +TEST(net_build, program_execute_squeeze_case3) { + const int B = 4; + const int C = 1; + const int H = 7; + const int W = 1; + + NetBuilder builder("net_builder"); + Placeholder input = builder.CreateInput(Float(32), {B, C, H, W}, "In"); + Variable output = builder.Squeeze(input, {}); + auto program = builder.Build(); + + Target target = common::DefaultHostTarget(); + + auto graph = std::make_shared(program, target); + auto scope = BuildScope(target, graph); + hlir::framework::GraphCompiler gc(target, scope, graph); + auto runtime_program = gc.Build(); + + scope->Var(std::string(input.id())); + scope->Var(std::string(output->id)); + + auto input_tensor = scope->GetTensor(std::string(input.id())); + SetRandData(input_tensor, target); + float* input_data = input_tensor->mutable_data(target); + + runtime_program->Execute(); + + auto output_tensor = scope->GetTensor(std::string(output->id)); + const std::vector& output_shape = output_tensor->shape().data(); + EXPECT_EQ(output_shape.size(), 2UL); + EXPECT_EQ(output_shape[0], B); + EXPECT_EQ(output_shape[1], H); + + float* output_data = output_tensor->mutable_data(target); + VLOG(6) << "Visualize output_data"; + for (int b = 0; b < B; ++b) { + for (int c = 0; c < C; ++c) { + VLOG(6) << "b = " << b << ", c = " << c; + for (int h = 0; h < H; ++h) { + std::string line; + for (int w = 0; w < W; ++w) { + int index = w + W * (h + H * (c + C * b)); + float in_data = input_data[index]; + float out_data = output_data[index]; + line += (std::to_string(out_data) + ", "); + EXPECT_EQ(in_data, out_data); + } + VLOG(6) << line; + } + } + } +} + } // namespace frontend } // namespace cinn diff --git a/cinn/hlir/op/contrib/CMakeLists.txt b/cinn/hlir/op/contrib/CMakeLists.txt index 52e948e5f0..98f9811eb3 100644 --- a/cinn/hlir/op/contrib/CMakeLists.txt +++ b/cinn/hlir/op/contrib/CMakeLists.txt @@ -2,8 +2,10 @@ core_gather_headers() gather_srcs(cinnapi_src SRCS cast.cc + squeeze.cc clip.cc - ) + ) cc_test(test_cast SRCS cast_test.cc DEPS cinncore) +cc_test(test_squeeze SRCS squeeze_test.cc DEPS cinncore) cc_test(test_clip SRCS clip_test.cc DEPS cinncore) diff --git a/cinn/hlir/op/contrib/squeeze.cc b/cinn/hlir/op/contrib/squeeze.cc new file mode 100644 index 0000000000..5ce3daccbd --- /dev/null +++ b/cinn/hlir/op/contrib/squeeze.cc @@ -0,0 +1,211 @@ +// Copyright (c) 2022 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cinn/hlir/op/contrib/squeeze.h" + +#include + +#include +#include +#include +#include + +#include "cinn/common/cas.h" +#include "cinn/common/common.h" +#include "cinn/common/context.h" +#include "cinn/common/macros.h" +#include "cinn/hlir/framework/node.h" +#include "cinn/hlir/framework/op.h" +#include "cinn/hlir/framework/op_strategy.h" +#include "cinn/hlir/pe/elementwise.h" +#include "cinn/ir/ir.h" +#include "cinn/ir/ir_base.h" +#include "cinn/ir/tensor.h" +#include "cinn/lang/builtin.h" +#include "cinn/lang/compute.h" + +DECLARE_bool(cinn_ir_schedule); + +namespace cinn { +namespace hlir { +namespace op { + +using common::CINNValue; +using common::CINNValuePack; + +ir::Tensor Squeeze(const ir::Tensor &A, const std::vector &axes, const std::string &name) { + std::vector new_expr_shape; + std::vector A_expr_shape = A->shape; + if (axes.size() != 0) { + std::unordered_set axes_index; + for (int i = 0; i < axes.size(); ++i) { + axes_index.insert(axes[i]); + } + for (int i = 0; i < A_expr_shape.size(); ++i) { + CHECK(A_expr_shape[i].is_constant()) << "Input tensor's shape should be constant value."; + if (axes_index.count(i)) { + CHECK_EQ(A_expr_shape[i], Expr(1)); + } else { + new_expr_shape.push_back(A_expr_shape[i]); + } + } + } else { + for (auto &i : A_expr_shape) { + CHECK(i.is_constant()) << "Input tensor's shape should be constant value."; + if (i != Expr(1)) { + new_expr_shape.push_back(i); + } + } + } + auto res = Compute( + new_expr_shape, + [=](const std::vector &indices) { + Expr offset = Expr(0); + for (int i = 0; i < indices.size(); i++) { + offset = offset * new_expr_shape[i] + indices[i]; + } + std::vector indices_a; + for (int i = A_expr_shape.size() - 1; i >= 0; i--) { + auto temp = offset % A_expr_shape[i]; + indices_a.insert(indices_a.begin(), common::AutoSimplify(temp)); + offset = (offset - temp) / A_expr_shape[i]; + } + return lang::Identity(A(indices_a)); + }, + name); + return res; +} + +std::shared_ptr StrategyForSqueeze(const framework::NodeAttr &attrs, + const std::vector &inputs, + const std::vector &out_type, + const std::vector> &output_shapes, + const Target &target) { + CHECK(attrs.attr_store.count("axes")) << "find no attr of axes"; + std::vector axes = absl::get>(attrs.attr_store.at("axes")); + + framework::CINNCompute squeeze_compute([=](lang::Args args, lang::RetValue *ret) { + CHECK(!args.empty()) << "The input arguments of Squeeze compute is empty! Please check.\n"; + CINNValuePack a = args[0]; + CHECK_GE(a.size(), 1U) << "at least 1 input tensors for Squeeze compute\n"; + Expr A = a[0]; + CHECK(A.as_tensor()); + CHECK(!output_shapes.empty()); + auto tensor_A = A.as_tensor_ref(); + auto stages = CreateStages({tensor_A}); + VLOG(3) << "A shape: " << utils::Join(tensor_A->shape, ", ") + << ", output_shapes: " << utils::Join(output_shapes[0], ", "); + ir::Tensor out = Squeeze(tensor_A, axes, UniqName("Squeeze_out")); + std::vector res; + stages->InsertLazily(out); + res.push_back(CINNValue(out)); + CHECK(!out_type.empty()) << "Output type of Squeeze is empty! Please check.\n"; + res.push_back(CINNValue(stages)); + *ret = CINNValuePack{res}; + }); + + auto strategy = std::make_shared(); + strategy->AddImpl( + squeeze_compute, framework::GetInjectiveScheduleFunc(output_shapes, target), "strategy.squeeze.x86", 1); + return strategy; +} + +std::vector> InferShapeForSqueeze(const std::vector> &inputs_shape, + const framework::AttrMapType &attrs) { + CHECK_EQ(inputs_shape.size(), 1U) << "The input's shape size should be 1! Please check again."; + std::vector axes; + for (auto &iter : attrs) { + if (iter.first == "axes") { + axes = absl::get>(iter.second); + break; + } + } + + std::vector output_shape; + int tensor_size = 1; + if (axes.size() != 0) { + std::vector temp_shape = inputs_shape[0]; + for (auto &a : axes) { + CHECK(a < temp_shape.size()); + temp_shape[a] = 0; + } + for (auto &i : temp_shape) { + if (i != 0) { + output_shape.push_back(i); + tensor_size *= i; + } + } + } else { + for (auto &i : inputs_shape[0]) { + if (i != 1) { + output_shape.push_back(i); + tensor_size *= i; + } + } + } + + CHECK(!output_shape.empty()) << "infer_shape for squeeze turns out to be empty. Please check\n"; + int flag_index = -1; + for (int i = 0; i < output_shape.size(); i++) { + if (output_shape[i] > 0) { + CHECK_EQ(tensor_size % output_shape[i], 0) + << "Incompatible input shape and output shape in op reshape: " << tensor_size << ", " << output_shape[i]; + tensor_size /= output_shape[i]; + } else if (output_shape[i] == -1 && flag_index == -1) { + flag_index = i; + } else if (output_shape[i] == -1) { + LOG(FATAL) << "More than one -1 in output_shape of op reshape."; + } else { + LOG(FATAL) << "Unsupported output_shape " << output_shape[i]; + } + } + if (flag_index >= 0) output_shape[flag_index] = tensor_size; + std::vector> res{output_shape}; + return res; +} + +std::vector InferDtypeForSqueeze(const std::vector &inputs_type, const framework::AttrMapType &attrs) { + CHECK(!inputs_type.empty()) << "The input's type size is 0! Please check again."; + std::vector res{inputs_type[0]}; + return res; +} + +std::vector> InferLayoutForSqueeze(const std::vector &input_shapes, + const std::vector &input_layouts, + const framework::NodeAttr &attrs, + const Target &target) { + CHECK_EQ(input_shapes.size(), 1U) << "The input's shape size is not 1! Please check again."; + CHECK_EQ(input_layouts.size(), 1U) << "The input's layout size is not 1! Please check again."; + return {input_layouts, input_layouts}; +} + +} // namespace op +} // namespace hlir +} // namespace cinn + +CINN_REGISTER_HELPER(squeeze_ops) { + CINN_REGISTER_OP(squeeze) + .describe("Squeeze.") + .set_num_inputs(1) + .set_num_outputs(1) + .set_attr("CINNStrategy", cinn::hlir::op::StrategyForSqueeze) + .set_attr("infershape", MakeOpFunction(cinn::hlir::op::InferShapeForSqueeze)) + .set_attr("inferdtype", MakeOpFunction(cinn::hlir::op::InferDtypeForSqueeze)) +#ifndef CINN_WITH_CUDA + .set_attr("inferlayout", MakeOpFunction(cinn::hlir::op::InferLayoutForSqueeze)) +#endif + .set_support_level(4); + + return true; +} diff --git a/cinn/hlir/op/contrib/squeeze.h b/cinn/hlir/op/contrib/squeeze.h new file mode 100644 index 0000000000..b22d80b245 --- /dev/null +++ b/cinn/hlir/op/contrib/squeeze.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "cinn/ir/ir.h" +#include "cinn/ir/ir_base.h" +#include "cinn/ir/tensor.h" + +namespace cinn { +namespace hlir { +namespace op { + +ir::Tensor Squeeze(const ir::Tensor& A, const std::vector& axis, const std::string& name); + +} // namespace op +} // namespace hlir +} // namespace cinn diff --git a/cinn/hlir/op/contrib/squeeze_test.cc b/cinn/hlir/op/contrib/squeeze_test.cc new file mode 100644 index 0000000000..2823f2327d --- /dev/null +++ b/cinn/hlir/op/contrib/squeeze_test.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2022 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cinn/hlir/op/contrib/squeeze.h" + +#include +#include + +#include +#include + +#include "cinn/backends/codegen_c.h" +#include "cinn/backends/codegen_c_x86.h" +#include "cinn/backends/codegen_cuda_dev.h" +#include "cinn/common/context.h" +#include "cinn/lang/lower.h" +#include "cinn/lang/placeholder.h" +#include "cinn/poly/stage.h" + +namespace cinn { +namespace hlir { +namespace op { + +TEST(GenerateCode_Cpu, Squeeze) { + common::Context::Global().ResetNameId(); + + common::Target target = common::DefaultHostTarget(); + + ir::Expr n(4); + ir::Expr c(1); + ir::Expr h(28); + ir::Expr w(1); + + lang::Placeholder in("in", {n, c, h, w}); + ir::Tensor res = Squeeze(in, {1, 3}, "test_squeeze_out"); + + poly::StageMap stages = poly::CreateStages({res}); + std::vector funcs = + lang::LowerVec("TestGenerateCodeCpu_Squeeze", stages, {res}, {}, {}, nullptr, target, true); + + VLOG(6) << "Expr before CPU codegen:"; + VLOG(6) << funcs[0]->body; + + ir::Module::Builder builder("Squeeze_Module", target); + for (auto& f : funcs) { + builder.AddFunction(f); + } + + backends::CodeGenCX86 codegen(target, backends::CodeGenCX86::Feature::AVX512); + codegen.SetInlineBuiltinCodes(false); + std::string code = codegen.Compile(builder.Build(), backends::CodeGenC::OutputKind::CImpl); + VLOG(6) << "Cpu Codegen result:"; + VLOG(6) << code << std::endl; +} + +} // namespace op +} // namespace hlir +} // namespace cinn diff --git a/cinn/hlir/op/use_ops.h b/cinn/hlir/op/use_ops.h index e0715b202c..96403dce89 100644 --- a/cinn/hlir/op/use_ops.h +++ b/cinn/hlir/op/use_ops.h @@ -23,5 +23,6 @@ CINN_USE_REGISTER(broadcast_grad_ops) CINN_USE_REGISTER(elementwise_ops) CINN_USE_REGISTER(transform_ops) CINN_USE_REGISTER(cast_ops) +CINN_USE_REGISTER(squeeze_ops) CINN_USE_REGISTER(reduce_ops) CINN_USE_REGISTER(clip_ops) diff --git a/cinn/pybind/frontend.cc b/cinn/pybind/frontend.cc index ea825b48d8..58e468bd1d 100644 --- a/cinn/pybind/frontend.cc +++ b/cinn/pybind/frontend.cc @@ -455,6 +455,8 @@ void BindFrontend(pybind11::module *m) { "all", &NetBuilder::ReduceAll, py::arg("x"), py::arg("axis") = std::vector{}, py::arg("keepdim") = false) .def( "any", &NetBuilder::ReduceAny, py::arg("x"), py::arg("axis") = std::vector{}, py::arg("keepdim") = false) + .def("reduce_sum", &NetBuilder::ReduceSum, py::arg("x"), py::arg("dim"), py::arg("keep_dim") = false) + .def("squeeze", &NetBuilder::Squeeze, py::arg("a"), py::arg("axes")) .def("conv2d", &NetBuilder::Conv2d, py::arg("a"), diff --git a/python/tests/ops/test_squeeze_op.py b/python/tests/ops/test_squeeze_op.py new file mode 100644 index 0000000000..34eb92a516 --- /dev/null +++ b/python/tests/ops/test_squeeze_op.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 CINN Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cinn +import numpy as np +import paddle +import unittest + +from cinn.frontend import * +from cinn.common import * +from op_test import OpTest, OpTestTool + + +class TestSqueezeOp(OpTest): + def setUp(self): + self.init_case() + + def init_case(self): + self.inputs = {"x": np.random.random([2, 3, 1, 1]).astype("float32")} + self.axes = [2, 3] + + def build_paddle_program(self, target): + x = paddle.to_tensor(self.inputs["x"], stop_gradient=True) + out = paddle.squeeze(x, self.axes) + self.paddle_outputs = [out] + + def build_cinn_program(self, target): + builder = NetBuilder("squeeze_test") + x = builder.create_input(Float(32), self.inputs["x"].shape, "x") + out = builder.squeeze(x, self.axes) + + prog = builder.build() + res = self.get_cinn_output(prog, target, [x], [self.inputs["x"]], + [out]) + self.cinn_outputs = [res[0]] + + def test_check_results(self): + self.check_outputs_and_grads() + + +class TestSqueezeCase1(TestSqueezeOp): + def init_case(self): + self.inputs = {"x": np.random.random([2, 3, 1, 1]).astype("float32")} + self.axes = [2] + + +class TestSqueezeCase2(TestSqueezeOp): + def init_case(self): + self.inputs = {"x": np.random.random([2, 3, 1, 1]).astype("float32")} + self.axes = [] + + +if __name__ == "__main__": + unittest.main()