diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index 05de2abf071..9810278effe 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -61,7 +61,7 @@ add_kernel(strided_slice_compute_host Host extra SRCS strided_slice_compute.cc D add_kernel(tile_compute_host Host extra SRCS tile_compute.cc DEPS ${lite_kernel_deps}) add_kernel(topk_v2_compute_host Host extra SRCS topk_v2_compute.cc DEPS ${lite_kernel_deps}) add_kernel(fill_any_like_compute_host Host extra SRCS fill_any_like_compute.cc DEPS ${lite_kernel_deps}) - +add_kernel(tril_triu_compute_host Host extra SRCS tril_triu_compute.cc DEPS ${lite_kernel_deps}) if(LITE_BUILD_EXTRA AND LITE_WITH_x86) lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host) diff --git a/lite/kernels/host/expand_as_compute.cc b/lite/kernels/host/expand_as_compute.cc index a291068abe3..4edb9504c54 100644 --- a/lite/kernels/host/expand_as_compute.cc +++ b/lite/kernels/host/expand_as_compute.cc @@ -30,7 +30,6 @@ void ExpandAsCompute::Run() { const T* src = x->template data(); T* dst = out->template mutable_data(); - // int dims = expand_times.size(); for (int i = 0; i < target->dims().size(); ++i) { int times = target->dims()[i] / x->dims()[i]; expand_times.push_back(times); @@ -75,12 +74,29 @@ REGISTER_LITE_KERNEL(expand_as, kHost, kFloat, kAny, expand_as_float, def) {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny))}) - .BindInput("Target", + .BindInput("target_tensor", {LiteType::GetTensorTy(TARGET(kHost), - PRECISION(kFloat), + PRECISION(kAny), DATALAYOUT(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny))}) .Finalize(); + +using expand_as_int64 = + paddle::lite::kernels::host::ExpandAsCompute; +REGISTER_LITE_KERNEL(expand_as, kHost, kFloat, kAny, expand_as_int64, int64) + .BindInput("X", + {LiteType::GetTensorTy(TARGET(kHost), + PRECISION(kInt64), + DATALAYOUT(kAny))}) + .BindInput("target_tensor", + {LiteType::GetTensorTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) + .BindOutput("Out", + {LiteType::GetTensorTy(TARGET(kHost), + PRECISION(kInt64), + DATALAYOUT(kAny))}) + .Finalize(); diff --git a/lite/kernels/host/tril_triu_compute.cc b/lite/kernels/host/tril_triu_compute.cc new file mode 100644 index 00000000000..c88dec80be0 --- /dev/null +++ b/lite/kernels/host/tril_triu_compute.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/host/tril_triu_compute.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +template +void TrilTriu(const T* in, + const int64_t diagonal, + const bool lower, + const int64_t h, + const int64_t w, + T* out) { + int64_t size = h * w; + for (int64_t idx = 0; idx < size; idx++) { + const int64_t row = idx / w; + const int64_t col = idx % w; + const bool mask = lower ? (col - row > diagonal) : (col - row < diagonal); + out[idx] = mask ? 0 : in[idx]; + } + return; +} + +template +void TrilTriuCompute::Run() { + auto& param = this->template Param(); + const lite::Tensor* x = param.x; + lite::Tensor* out = param.out; + int64_t diagonal = param.diagonal; + bool lower = param.lower; + + const T* x_data = x->template data(); + T* out_data = out->template mutable_data(); + auto x_dims = x->dims(); + int64_t h = x_dims[x_dims.size() - 2]; + int64_t w = x_dims[x_dims.size() - 1]; + int64_t n = x_dims.production() / h / w; + + for (int64_t i = 0; i < n; i++) { + TrilTriu(x_data, diagonal, lower, h, w, out_data); + x_data += h * w; + out_data += h * w; + } + return; +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle + +using TrilTriuFloat32 = paddle::lite::kernels::host::TrilTriuCompute; +REGISTER_LITE_KERNEL(tril_triu, kHost, kAny, kNCHW, TrilTriuFloat32, float32) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))}) + .BindOutput("Out", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))}) + .Finalize(); diff --git a/lite/kernels/host/tril_triu_compute.h b/lite/kernels/host/tril_triu_compute.h new file mode 100644 index 00000000000..461beba2f1c --- /dev/null +++ b/lite/kernels/host/tril_triu_compute.h @@ -0,0 +1,37 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +template +class TrilTriuCompute : public KernelLite { + public: + using param_t = operators::TrilTriuParam; + + void Run() override; + + virtual ~TrilTriuCompute() = default; +}; + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index 6da165c9473..b591fb99371 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -144,6 +144,7 @@ add_operator(tensor_array_to_tensor_op extra SRCS tensor_array_to_tensor_op.cc D add_operator(expand_v2_op_lite extra SRCS expand_v2_op.cc DEPS ${op_DEPS}) add_operator(tile_op extra SRCS tile_op.cc DEPS ${op_DEPS}) add_operator(sum_op extra SRCS sum_op.cc DEPS ${op_DEPS}) +add_operator(tril_triu_op extra SRCS tril_triu_op.cc DEPS ${op_DEPS}) # for OCR specific add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS}) diff --git a/lite/operators/expand_as_op.cc b/lite/operators/expand_as_op.cc index 992e987d901..0f4203635d7 100644 --- a/lite/operators/expand_as_op.cc +++ b/lite/operators/expand_as_op.cc @@ -48,7 +48,7 @@ bool ExpandAsOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { auto Out_name = opdesc.Output("Out").front(); param_.X = GetVar(scope, X_name); param_.Out = GetMutableVar(scope, Out_name); - auto Target_name = opdesc.Input("Target").front(); + auto Target_name = opdesc.Input("target_tensor").front(); param_.Target = GetVar(scope, Target_name); return true; } diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index b1d777d1bf0..6ef08baaf53 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -646,6 +646,14 @@ struct TransposeParam : ParamBase { } }; +struct TrilTriuParam : ParamBase { + const lite::Tensor* x{nullptr}; + lite::Tensor* out{nullptr}; + + int diagonal{0}; + bool lower{true}; +}; + /// ----------------------- element wise operators ---------------------- struct ElementwiseParam : ParamBase { const lite::Tensor* X{}; diff --git a/lite/operators/tril_triu_op.cc b/lite/operators/tril_triu_op.cc new file mode 100644 index 00000000000..819bffe7b99 --- /dev/null +++ b/lite/operators/tril_triu_op.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/tril_triu_op.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool TrilTriuOp::CheckShape() const { + CHECK(param_.x); + CHECK(param_.out); + return true; +} + +bool TrilTriuOp::InferShapeImpl() const { + CHECK_GE(param_.x->dims().size(), 2UL); + param_.out->Resize(param_.x->dims()); + param_.out->set_lod(param_.x->lod()); + return true; +} + +bool TrilTriuOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { + param_.x = scope->FindTensor(op_desc.Input("X").front()); + param_.out = scope->FindMutableTensor(op_desc.Output("Out").front()); + + param_.diagonal = op_desc.GetAttr("diagonal"); + param_.lower = op_desc.GetAttr("lower"); + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(tril_triu, paddle::lite::operators::TrilTriuOp); diff --git a/lite/operators/tril_triu_op.h b/lite/operators/tril_triu_op.h new file mode 100644 index 00000000000..90d38397f65 --- /dev/null +++ b/lite/operators/tril_triu_op.h @@ -0,0 +1,45 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class TrilTriuOp : public OpLite { + public: + TrilTriuOp() {} + explicit TrilTriuOp(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShapeImpl() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "tril_triu"; } + + private: + mutable TrilTriuParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 9585143448f..7bbefba8db1 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -99,6 +99,7 @@ if(LITE_BUILD_EXTRA) lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS ${test_kernel_deps}) lite_cc_test(test_kernel_sin_compute SRCS sin_compute_test.cc DEPS arena_framework ${test_kernel_deps}) lite_cc_test(test_kernel_cos_compute SRCS cos_compute_test.cc DEPS arena_framework ${test_kernel_deps}) + lite_cc_test(test_kernel_tril_triu_compute SRCS tril_triu_compute_test.cc DEPS arena_framework ${test_kernel_deps}) lite_cc_test(test_kernel_pad3d_compute SRCS pad3d_compute_test.cc DEPS arena_framework ${test_kernel_deps}) lite_cc_test(test_kernel_select_input_compute SRCS select_input_compute_test.cc DEPS arena_framework ${test_kernel_deps}) # lite_cc_test(test_kernel_tensor_array_to_tensor_compute SRCS tensor_array_to_tensor_compute_test.cc DEPS arena_framework ${test_kernel_deps}) diff --git a/lite/tests/kernels/expand_as_compute_test.cc b/lite/tests/kernels/expand_as_compute_test.cc index 193a8fd59e5..252aea323a2 100644 --- a/lite/tests/kernels/expand_as_compute_test.cc +++ b/lite/tests/kernels/expand_as_compute_test.cc @@ -16,10 +16,12 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { +template class ExpandAsComputeTester : public arena::TestCase { protected: // common attributes for this op. @@ -55,8 +57,8 @@ class ExpandAsComputeTester : public arena::TestCase { out_shape[i] *= expand_times_[i]; } out->Resize(out_shape); - float* out_data = out->mutable_data(); - const float* input_data = input->data(); + T* out_data = out->template mutable_data(); + const T* input_data = input->template data(); std::vector in_stride(in_shape.size(), 1), out_stride(out_shape.size(), 1); for (int i = in_shape.size() - 2; i >= 0; --i) { @@ -78,30 +80,49 @@ class ExpandAsComputeTester : public arena::TestCase { void PrepareOpDesc(cpp::OpDesc* op_desc) { op_desc->SetType("expand_as"); op_desc->SetInput("X", {x_}); - op_desc->SetInput("Target", {target_}); + op_desc->SetInput("target_tensor", {target_}); op_desc->SetOutput("Out", {out_}); } void PrepareData() override { - std::vector in_data(dims_.production()); - std::vector target_data(target_dims_.production()); - for (int i = 0; i < dims_.production(); ++i) { - in_data[i] = i; - } - for (int i = 0; i < target_dims_.production(); ++i) { - target_data[i] = i; - } + std::vector in_data(dims_.production()); + fill_data_rand(in_data.data(), + static_cast(-10), + static_cast(10), + dims_.production()); SetCommonTensor(x_, dims_, in_data.data()); + + std::vector target_data(target_dims_.production()); + fill_data_rand(target_data.data(), + static_cast(-10), + static_cast(10), + target_dims_.production()); SetCommonTensor(target_, target_dims_, target_data.data()); + return; } }; +template void test_expand_as_3dim(Place place, float abs_error) { + auto precision = lite_api::PrecisionTypeTrait::Type(); + std::string alias("def"); + switch (precision) { + case lite_api::PrecisionType::kFloat: + alias = std::string("def"); + break; + case lite_api::PrecisionType::kInt64: + alias = std::string("int64"); + break; + default: + LOG(FATAL) << "unsupported precision: " + << lite_api::PrecisionToStr(precision); + } + for (int C : {3}) { for (int H : {2}) { for (int W : {4}) { - std::unique_ptr tester(new ExpandAsComputeTester( - place, "def", DDim({C, H, W}), DDim({C * 2, H * 3, W * 1}))); + std::unique_ptr tester(new ExpandAsComputeTester( + place, alias, DDim({C, H, W}), DDim({C * 2, H * 3, W * 1}))); arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision(); } @@ -109,16 +130,31 @@ void test_expand_as_3dim(Place place, float abs_error) { } } +template void test_expand_as_4dim(Place place, float abs_error) { + auto precision = lite_api::PrecisionTypeTrait::Type(); + std::string alias("def"); + switch (precision) { + case lite_api::PrecisionType::kFloat: + alias = std::string("def"); + break; + case lite_api::PrecisionType::kInt64: + alias = std::string("int64"); + break; + default: + LOG(FATAL) << "unsupported precision: " + << lite_api::PrecisionToStr(precision); + } + for (int N : {2}) { for (int C : {3}) { for (int H : {2}) { for (int W : {4}) { std::unique_ptr tester( - new ExpandAsComputeTester(place, - "def", - DDim({N, C, H, W}), - DDim({N * 2, C * 3, H * 1, W * 4}))); + new ExpandAsComputeTester(place, + alias, + DDim({N, C, H, W}), + DDim({N * 2, C * 3, H * 1, W * 4}))); arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision(); } @@ -130,19 +166,17 @@ void test_expand_as_4dim(Place place, float abs_error) { TEST(ExpandAs, precision) { float abs_error = 1e-5; Place place; -#if defined(LITE_WITH_NPU) - place = TARGET(kNPU); - abs_error = 1e-2; // Using fp16 in NPU -#elif defined(LITE_WITH_ARM) - place = TARGET(kHost); -#elif defined(LITE_WITH_X86) +#if defined(LITE_WITH_ARM) || defined(LITE_WITH_X86) place = TARGET(kHost); #else return; #endif - test_expand_as_3dim(place, abs_error); - test_expand_as_4dim(place, abs_error); + test_expand_as_3dim(place, abs_error); + test_expand_as_4dim(place, abs_error); + + test_expand_as_3dim(place, abs_error); + test_expand_as_4dim(place, abs_error); } } // namespace lite diff --git a/lite/tests/kernels/tril_triu_compute_test.cc b/lite/tests/kernels/tril_triu_compute_test.cc new file mode 100644 index 00000000000..e88a1cb3b51 --- /dev/null +++ b/lite/tests/kernels/tril_triu_compute_test.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" + +namespace paddle { +namespace lite { + +template +class TrilTriuComputeTester : public arena::TestCase { + protected: + std::string x_ = "X"; + std::string out_ = "Out"; + DDim x_dims_; + int diagonal_{0}; + bool lower_{true}; + + public: + TrilTriuComputeTester(const Place& place, + const std::string& alias, + const DDim& x_dims, + const int diagonal = 0, + const bool lower = true) + : TestCase(place, alias), + x_dims_(x_dims), + diagonal_(diagonal), + lower_(lower) {} + + void RunBaseline(Scope* scope) override { + auto* out = scope->NewTensor(out_); + auto* x = scope->FindTensor(x_); + out->Resize(x_dims_); + out->set_lod(x->lod()); + + auto* x_data = x->template data(); + auto* out_data = out->template mutable_data(); + auto h = x_dims_[x_dims_.size() - 2]; + auto w = x_dims_[x_dims_.size() - 1]; + auto n = x_dims_.production() / h / w; + + for (int64_t i = 0; i < n; i++) { + for (int64_t idx = 0; idx < h * w; idx++) { + auto row = idx / w; + auto col = idx % w; + bool mask = lower_ ? (col - row > diagonal_) : (col - row < diagonal_); + out_data[idx] = mask ? 0 : x_data[idx]; + } + x_data += h * w; + out_data += h * w; + } + return; + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("tril_triu"); + op_desc->SetInput("X", {x_}); + op_desc->SetOutput("Out", {out_}); + op_desc->SetAttr("diagonal", diagonal_); + op_desc->SetAttr("lower", lower_); + return; + } + + void PrepareData() override { + std::vector din(x_dims_.production()); + fill_data_rand(din.data(), + static_cast(-10), + static_cast(10), + x_dims_.production()); + SetCommonTensor(x_, x_dims_, din.data()); + return; + } +}; + +template +void TestTrilTriuHelper(Place place, + float abs_error, + const std::vector x_dims, + const int diagonal = 0, + const bool lower = true) { + auto precision = lite_api::PrecisionTypeTrait::Type(); + std::string alias("def"); + switch (precision) { + case lite_api::PrecisionType::kFloat: + alias = std::string("float32"); + break; + default: + LOG(FATAL) << "unsupported precision: " + << lite_api::PrecisionToStr(precision); + } + + std::unique_ptr tester(new TrilTriuComputeTester( + place, alias, DDim(x_dims), diagonal, lower)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); +} + +TEST(cumsum, precision) { + Place place; + float abs_error = 1e-5; +#if defined(LITE_WITH_ARM) || defined(LITE_WITH_X86) + place = TARGET(kHost); +#else + return; +#endif + + for (auto x_shape : + std::vector>{{3, 4}, {5, 6, 7}, {5, 6, 7, 8}}) { + for (auto lower : {true, false}) { + for (auto diagonal : {-1, 0, 2}) { + TestTrilTriuHelper(place, abs_error, x_shape, diagonal, lower); + } + } + } +} + +} // namespace lite +} // namespace paddle