diff --git a/paddle/fluid/framework/op_compatible_info.cc b/paddle/fluid/framework/op_compatible_info.cc index 37c8dd22c174a..d710e76c0c111 100644 --- a/paddle/fluid/framework/op_compatible_info.cc +++ b/paddle/fluid/framework/op_compatible_info.cc @@ -79,8 +79,6 @@ void OpCompatibleMap::InitOpCompatibleMap() { op_compatible_map_["dpsgd"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["eye"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["fill_any_like"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; - op_compatible_map_["filter_by_instag"] = {"1.6.0", - OpCompatibleType::DEFIN_NOT}; op_compatible_map_["hard_swish"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["gather_nd"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["instance_norm"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; @@ -91,7 +89,6 @@ void OpCompatibleMap::InitOpCompatibleMap() { op_compatible_map_["multiclass_nms2"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["one_hot_v2"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; - op_compatible_map_["prroi_pool"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["pull_box_sparse"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; op_compatible_map_["scatter_nd_add"] = {"1.6.0", OpCompatibleType::DEFIN_NOT}; diff --git a/paddle/fluid/operators/filter_by_instag_op.cc b/paddle/fluid/operators/filter_by_instag_op.cc deleted file mode 100644 index 02735a1ee5be0..0000000000000 --- a/paddle/fluid/operators/filter_by_instag_op.cc +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/filter_by_instag_op.h" - -#include - -#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" -#include "paddle/fluid/framework/var_type_inference.h" - -namespace paddle { -namespace operators { -class FilterByInstagOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("Ins"), - true, - platform::errors::InvalidArgument("Input(Ins) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Ins_tag"), - true, - platform::errors::InvalidArgument( - "Input(Ins_tag) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Filter_tag"), - true, - platform::errors::InvalidArgument( - "Input(Filter_tag) should be not null.")); - - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - platform::errors::InvalidArgument("Output(Out) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("LossWeight"), - true, - platform::errors::InvalidArgument( - "Output(LossWeight) shoudl not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("IndexMap"), - true, - platform::errors::InvalidArgument( - "Output(IndexMap) should be not null.")); - - auto x1_dims = ctx->GetInputDim("Ins"); // batch_size * vec - - ctx->SetOutputDim("Out", phi::make_ddim({-1, x1_dims[1]})); - ctx->SetOutputDim("LossWeight", phi::make_ddim({-1, 1})); - ctx->SetOutputDim("IndexMap", phi::make_ddim({-1, 2})); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Ins"); - return phi::KernelKey(data_type, ctx.device_context().GetPlace()); - } -}; - -class FilterByInstagOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Ins", "(phi::DenseTensor) embeded tensor"); - AddInput("Ins_tag", "(phi::DenseTensor) ins tag list"); - AddInput("Filter_tag", "(1D Tensor) filter tag list"); - AddAttr("is_lod", "is Ins with LoD info or not, default True"); - AddAttr("out_val_if_empty", - "if the output after filter is empty, the output value") - .SetDefault(0); - AddOutput("Out", "(phi::DenseTensor) embeded tensor filtered by instag"); - AddOutput("LossWeight", "(Tensor) loss weight."); - AddOutput("IndexMap", - "(phi::DenseTensor) mapping from Out rows to X1 rows"); - AddComment(R"DOC( -Filter By Instag Op - -This operator is used to filter embeded ins. - -There are 3 inputs. First is embeded ins, Second is tags for ins, -Third is tags to filter. - -There are 3 outputs. First is filtered embeded ins, Second is Loss Weight, -Third is the IndexMap from Out line number to X1 line number. -)DOC"); - } -}; - -class FilterByInstagOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("IndexMap"), - true, - platform::errors::InvalidArgument( - "Input(IndexMap) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Grad Input(Out) should be not null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Ins"), - true, - platform::errors::InvalidArgument("Input(Ins) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("LossWeight"), - true, - platform::errors::InvalidArgument( - "Input(LossWeight) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Ins")), - true, - platform::errors::InvalidArgument( - "Grad Output(Ins) should be not null")); - - auto grad_out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - auto x1_dims = ctx->GetInputDim("Ins"); - ctx->SetOutputDim(framework::GradVarName("Ins"), - phi::make_ddim({x1_dims[0], grad_out_dims[1]})); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - return phi::KernelKey(data_type, ctx.device_context().GetPlace()); - } -}; - -template -class FilterByInstagGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("filter_by_instag_grad"); - op->SetInput("IndexMap", this->Output("IndexMap")); - op->SetInput("Ins", this->Input("Ins")); - op->SetAttrMap(this->Attrs()); - op->SetInput("LossWeight", this->Output("LossWeight")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("Ins"), this->InputGrad("Ins")); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(filter_by_instag, - ops::FilterByInstagOp, - ops::FilterByInstagOpMaker, - ops::FilterByInstagGradOpMaker, - ops::FilterByInstagGradOpMaker); - -REGISTER_OPERATOR(filter_by_instag_grad, ops::FilterByInstagOpGrad); - -PD_REGISTER_STRUCT_KERNEL(filter_by_instag, - CPU, - ALL_LAYOUT, - ops::FilterByInstagKernel, - float, - double, - int32_t, - int64_t) {} - -PD_REGISTER_STRUCT_KERNEL(filter_by_instag_grad, - CPU, - ALL_LAYOUT, - ops::FilterByInstagGradKernel, - float, - double, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu deleted file mode 100644 index 4449044acb89b..0000000000000 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ /dev/null @@ -1,639 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11000 - -#include "paddle/fluid/operators/filter_by_instag_op.h" - -#if defined(PADDLE_WITH_CUDA) -#include -#endif - -#include -#include - -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/phi/core/mixed_vector.h" - -#if defined(PADDLE_WITH_CUDA) -namespace cg = cooperative_groups; -#endif - -namespace paddle { -namespace operators { - -using SelectedRows = phi::SelectedRows; - -template -using Vector = phi::Vector; - -#define WARP_SIZE 32 -#define MAX_WARP_NUM 32 - -#if defined(PADDLE_WITH_CUDA) - -template -__global__ void filter_copy_fuse_kernel(const size_t N, - const int ins_per_thread, - size_t* x1_lods_data, - size_t* x2_lods_data, - const int64_t* x2_data, - const int64_t* x3_data, - int64_t filter_tag_size, - T* out_data, - int64_t* map_data, - size_t* map_lods_data, - size_t* out_lods_data, - size_t* out_idx_data, - const T* x1_data, - int x1_embed_size, - float* loss_weight_data, - float fill_value) { - // N is instance num - // one threads for ins_per_thread instances - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - cg::thread_block b = cg::this_thread_block(); - cg::thread_block_tile g = cg::tiled_partition(b); - - int gid = idx / WARP_SIZE; - - // general use - int thread_num = - (N + (ins_per_thread - 1)) / ins_per_thread; // real thread num - int total_warp_num = thread_num / WARP_SIZE; // 30 - int remain_thread_num = thread_num % WARP_SIZE; // 16 - - int warp_thread_num = -1; - if (gid < total_warp_num) { - warp_thread_num = WARP_SIZE; - } else { - warp_thread_num = remain_thread_num; - } - - int group_num = total_warp_num; - if (remain_thread_num > 0) { - group_num = total_warp_num + 1; - } - - if (gid >= group_num) return; - - int ins_start = idx * ins_per_thread; - int ins_end = (idx + 1) * ins_per_thread; - - if (N < ins_end) ins_end = N; - - int flag_data[5]; - int prefix_sum_data[5]; - int prefix_sum_data2[5]; - - __shared__ int shr[MAX_WARP_NUM]; - __shared__ int shr2[MAX_WARP_NUM]; - __shared__ int shr3[MAX_WARP_NUM]; - - for (int p = ins_start; p < ins_end; p++) { - int ins_tag_start = x2_lods_data[p]; - int ins_tag_end = x2_lods_data[p + 1]; - flag_data[p - ins_start] = 0; - // filter logic - int i = ins_tag_start; - for (; i < ins_tag_end; i++) { - int64_t ins_tag = x2_data[i]; - int j = 0; - for (; j < filter_tag_size; j++) { - if (x3_data[j] == ins_tag) break; - } - // if ins_tag in filter tag - if (j < filter_tag_size) { - flag_data[p - ins_start] = 1; - break; - } - } - } - - int sum_addr = 0; - int sum_flag = 0; - int sum_out_lods = 0; - - int local_addr = 0; - int local_flag = 0; - int local_out_lods = 0; - - if (ins_start < ins_end) { - for (int p = ins_start; p < ins_end; p++) { - int previous = -1; - if (p == ins_start) { - previous = 0; - } else { - previous = prefix_sum_data[p - ins_start - 1]; - } - - prefix_sum_data[p - ins_start] = - previous + - flag_data[p - ins_start] * (x1_lods_data[p + 1] - x1_lods_data[p]); - } - - local_addr = prefix_sum_data[ins_end - 1 - ins_start]; - sum_addr = local_addr; - - for (int p = ins_start; p < ins_end; p++) { - local_flag += flag_data[p - ins_start]; - } - sum_flag = local_flag; - - for (int p = ins_start; p < ins_end; p++) { - local_out_lods += - flag_data[p - ins_start] * (x1_lods_data[p + 1] - x1_lods_data[p]); - } - - sum_out_lods = local_out_lods; - } - - for (int i = 1; i < warp_thread_num; i *= 2) { - int temp_addr = g.shfl_up(sum_addr, i); - int temp_flag = g.shfl_up(sum_flag, i); - int temp_out_lods = g.shfl_up(sum_out_lods, i); - - if (g.thread_rank() >= i) { - sum_addr += temp_addr; - sum_flag += temp_flag; - sum_out_lods += temp_out_lods; - } - } - - if (g.thread_rank() == warp_thread_num - 1) { - shr[gid] = sum_addr; - shr2[gid] = sum_flag; - shr3[gid] = sum_out_lods; - } - - b.sync(); - - int sum_addr2 = 0; - int sum_flag2 = 0; - int sum_out_lods2 = 0; - - // communicate between warp - if (g.thread_rank() < group_num) { - sum_addr2 = shr[g.thread_rank()]; - sum_flag2 = shr2[g.thread_rank()]; - sum_out_lods2 = shr3[g.thread_rank()]; - } - - for (int i = 1; i < group_num; i *= 2) { - int temp_addr2 = g.shfl_up(sum_addr2, i); - int temp_flag2 = g.shfl_up(sum_flag2, i); - int temp_out_lods2 = g.shfl_up(sum_out_lods2, i); - - if (g.thread_rank() >= i) { - sum_addr2 += temp_addr2; - sum_flag2 += temp_flag2; - sum_out_lods2 += temp_out_lods2; - } - } - - int sum_addr3 = g.shfl(sum_addr2, gid); - int sum_flag3 = g.shfl(sum_flag2, gid); - int sum_out_lods3 = g.shfl(sum_out_lods2, gid); - - int p_flag; - int p_addr; - int p_out_lods; - - if (ins_start < ins_end) { - p_addr = sum_addr3 - shr[gid] + sum_addr - local_addr; - p_flag = sum_flag3 - shr2[gid] + sum_flag - local_flag; - p_out_lods = sum_out_lods3 - shr3[gid] + sum_out_lods - local_out_lods; - - for (int p = ins_start; p < ins_end; p++) { - if (ins_start == p) { - prefix_sum_data2[p - ins_start] = p_addr; - } else { - prefix_sum_data2[p - ins_start] = - prefix_sum_data2[p - ins_start - 1] + - flag_data[p - ins_start - 1] * - (x1_lods_data[p] - x1_lods_data[p - 1]); - } - } - - if (gid == 0 && g.thread_rank() == group_num - 1) { - *out_idx_data = (sum_flag2 + 1); - map_lods_data[sum_flag2] = sum_flag2; - } - } - - int sum_out_lods4 = g.shfl(sum_out_lods2 + 1, group_num - 1); - - if (ins_start < ins_end) { - int out_lods_idx = p_flag + 1; - for (int p = ins_start; p < ins_end; p++) { - if (flag_data[p - ins_start] == 1) { - size_t batch_len = x1_lods_data[p + 1] - x1_lods_data[p]; - int t = out_lods_idx - 1; - int previous; - if (out_lods_idx == p_flag + 1) { - previous = p_out_lods; - } else { - previous = out_lods_data[t]; - } - map_data[t * 3] = (int64_t)previous; - map_data[t * 3 + 1] = x1_lods_data[p]; - map_lods_data[t] = t; - out_lods_data[out_lods_idx] = previous + batch_len; - map_data[t * 3 + 2] = batch_len; - out_lods_idx++; - } - } - - // fill loss_weight_data - if (sum_out_lods4 > 1) { - int out_data_num = sum_out_lods4 - 1; - int out_start = ins_start; - if (out_start < out_data_num) { - int out_end = ins_end >= out_data_num ? out_data_num : ins_end; - for (int p = out_start; p < out_end; p++) { - loss_weight_data[p] = fill_value; - } - } - } - - for (int p = ins_start; p < ins_end; p++) { - // copy logic - if (flag_data[p - ins_start] == 1) { - auto output_start_idx = prefix_sum_data2[p - ins_start]; - T* dst = out_data + output_start_idx * x1_embed_size; - const T* src_start = x1_data + x1_lods_data[p] * x1_embed_size; - const T* src_end = x1_data + x1_lods_data[p + 1] * x1_embed_size; - for (const T* j = src_start; j != src_end; dst++, j++) { - *dst = *j; - } - } - } - } - - b.sync(); -} - -template -__global__ void copy_grad_kernel(const size_t N, - const int ins_per_thread, - const T* out_grad_data, - T* x1_grad_data, - const int64_t* map_data, - int x1_embed_size) { - // N is instance num - // one threads for one instance - int idx = blockIdx.x * blockDim.x + threadIdx.x; - int ins_start = idx * ins_per_thread; - int ins_end = (idx + 1) * ins_per_thread; - if (ins_start >= N) { - return; - } - if (ins_end > N) ins_end = N; - for (int p = ins_start; p < ins_end; p++) { - T* dst = x1_grad_data + map_data[p * 3 + 1] * x1_embed_size; - const T* src_start = out_grad_data + map_data[p * 3] * x1_embed_size; - const T* src_end = - out_grad_data + (map_data[p * 3] + map_data[p * 3 + 2]) * x1_embed_size; - - for (const T* j = src_start; j != src_end; dst++, j++) { - *dst = *j; - } - } -} - -#endif - -template -class FilterByInstagGPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { -#if defined(PADDLE_WITH_CUDA) - - auto gpu_place = context.GetPlace(); - - gpuStream_t current_stream = context.cuda_device_context().stream(); - - int max_thread_num_per_block = 1024; - // context.cuda_device_context().GetMaxThreadsPerBlock(); - // X1 is global FC output - // Dim [batch size, embedding size] - const phi::DenseTensor* x1 = context.Input("Ins"); - bool is_lod = context.Attr("is_lod"); - - int is_x1_lod = -1; - if (is_lod) - is_x1_lod = 1; - else - is_x1_lod = 0; - - int64_t out_val_if_empty = context.Attr("out_val_if_empty"); - size_t x1_embed_size = x1->dims()[1]; - // X2 is ins tag list - // LoD [[0, Sum(ins1), Sum(ins1, ins2), ... ]] - const phi::DenseTensor* x2 = context.Input("Ins_tag"); - // expected auto = const int64_t - const int64_t* x2_data = x2->data(); - - // X3 is local fc tag list - // LoD [[0, Sum(fc1), Sum(fc1, fc2) ...]] - const phi::DenseTensor* x3 = context.Input("Filter_tag"); - const int64_t* x3_data = x3->data(); - - Vector x2_lods; - if (x2->lod().size() != 0) { // lod_level = 1 - x2_lods = x2->lod()[0]; - } else { // lod_level = 0 - const size_t x2_lods_size = x2->dims()[0]; - const size_t instag_per_num = x2->dims()[1]; - // x2_lods.resize(x2->dims()[0] + 1); - // move to cuda - x2_lods.push_back(0); - for (size_t i = 0; i < x2_lods_size; i++) { - x2_lods.push_back(x2_lods.back() + instag_per_num); - } - } - - const size_t x2_lods_size = x2_lods.size() - 1; - phi::MixVector mixv_x2_lods(&x2_lods); - - size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place); - - Vector x1_lods; - if (!is_x1_lod) { - x1_lods.push_back(0); - for (int i = 0; i < x1->dims()[0]; i++) { - x1_lods.push_back(i + 1); - } - } else { - // x1_lods = context.Input("Ins")->lod()[0]; - // new: lod_level=0 => lod() return {} - if (x1->lod().size() != 0) { // lod_level = 1 - x1_lods = x1->lod()[0]; - } else { // lod_level = 0 - // x1_lods.resize(x1->dims()[0] + 1); - // move to cuda - x1_lods.push_back(0); - for (int i = 0; i < x1->dims()[0]; i++) { - x1_lods.push_back(i + 1); - } - } - } - - phi::MixVector mixv_x1_lods(&x1_lods); - - size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place); - auto* x1_data = x1->data(); - - // set output value - // for those whose ins been dropout, set 0 for whole lines. - // otherwise, copy whole line - // Dim [local fc count, batch size, embedding size] - phi::DenseTensor* out = context.Output("Out"); - phi::DenseTensor* map = context.Output("IndexMap"); - phi::DenseTensor* loss_weight = - context.Output("LossWeight"); - - int out_first = x1_lods.back(); - - out->Resize(phi::make_ddim({(int64_t)out_first, (int64_t)x1_embed_size})); - map->Resize(phi::make_ddim({(int64_t)x2_lods_size, 3})); - loss_weight->Resize(phi::make_ddim({(int64_t)x2_lods_size, 1})); - - T* out_data = out->mutable_data(gpu_place); - int64_t* map_data = map->mutable_data(gpu_place); - float* loss_weight_data = loss_weight->mutable_data(gpu_place); - - int block_size = max_thread_num_per_block; - int ins_per_thread = (x2_lods_size + block_size - 1) / block_size; - dim3 block_dim(block_size); - dim3 grid_dim(1); - - Vector out_lods(x2_lods_size + 1, 0); - Vector map_lods(x2_lods_size + 1, 0); - - phi::MixVector mixv_out_lods(&out_lods); - phi::MixVector mixv_map_lods(&map_lods); - - // thrust::device_vector out_idx(1); - Vector out_idx(1, 0); - phi::MixVector mixv_out_idx(&out_idx); - - size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place); - size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place); - size_t* map_lods_data = mixv_map_lods.CUDAMutableData(gpu_place); - - float fill_value = 1.0; - - filter_copy_fuse_kernel<<>>( - x2_lods_size, - ins_per_thread, - x1_lods_data, - x2_lods_data, - x2_data, - x3_data, - x3->numel(), - out_data, - map_data, - map_lods_data, - out_lods_data, - out_idx_data, - x1_data, - x1_embed_size, - loss_weight_data, - fill_value); - - platform::GpuStreamSync(current_stream); - - mixv_out_lods.resize(mixv_out_idx[0]); - - if (mixv_out_lods.size() - 1 > 0) { - out->Resize(phi::make_ddim( - {(int64_t)mixv_out_lods.back(), (int64_t)x1_embed_size})); - - map->Resize(phi::make_ddim({(int64_t)mixv_out_lods.size() - 1, 3})); - loss_weight->Resize( - phi::make_ddim({(int64_t)mixv_out_lods.size() - 1, 1})); - - } else { - out->Resize(phi::make_ddim({1, (int64_t)x1_embed_size})); - map->Resize(phi::make_ddim({1, 3})); - loss_weight->Resize(phi::make_ddim({1, 1})); - } - - if (mixv_out_lods.size() - 1 > 0) { - map_lods.resize(mixv_out_lods.size()); - - mixv_map_lods.CopyToCPU(); - - std::vector> map_lod_info; - map_lod_info.emplace_back(map_lods); - - map->set_lod(map_lod_info); - loss_weight->set_lod(map_lod_info); - - mixv_out_lods.CopyToCPU(); - std::vector> out_lod_info; - out_lod_info.emplace_back(out_lods); - out->set_lod(out_lod_info); - - } else { - Vector map_lods(2, 0); - phi::MixVector mixv_map_lods(&map_lods); - thrust::device_ptr map_data_ptr(map_data); - - map_data_ptr[0] = 0; - map_data_ptr[1] = 1; - map_data_ptr[2] = 1; - - mixv_map_lods[0] = 0; - mixv_map_lods[1] = 1; - mixv_out_lods.push_back(1); - - mixv_map_lods.CopyToCPU(); - mixv_out_lods.CopyToCPU(); - - std::vector> map_lod_info; - map_lod_info.emplace_back(map_lods); - map->set_lod(map_lod_info); - - loss_weight->set_lod(map_lod_info); - - std::vector> out_lod_info; - out_lod_info.emplace_back(out_lods); - out->set_lod(out_lod_info); - - thrust::device_ptr out_data_ptr(out_data); - - // gpu kernel - if (std::is_same::value) { - thrust::fill(out_data_ptr, - out_data_ptr + out->numel(), - static_cast(out_val_if_empty)); - } else if (std::is_same::value) { - thrust::fill(out_data_ptr, - out_data_ptr + out->numel(), - static_cast(out_val_if_empty)); - } else if (std::is_same::value) { - thrust::fill(out_data_ptr, - out_data_ptr + out->numel(), - static_cast(out_val_if_empty)); - } else { - thrust::fill(out_data_ptr, - out_data_ptr + out->numel(), - static_cast(out_val_if_empty)); - } - - thrust::device_ptr loss_weight_data_ptr(loss_weight_data); - loss_weight_data_ptr[0] = 0; - } - -#endif - } -}; - -template -class FilterByInstagGradGPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { -#if defined(PADDLE_WITH_CUDA) - - auto gpu_place = context.GetPlace(); - gpuStream_t current_stream = context.cuda_device_context().stream(); - auto max_thread_num_per_block = 1024; - auto* output_grad = - context.Input(framework::GradVarName("Out")); - auto* x1_grad = - context.Output(framework::GradVarName("Ins")); - auto* loss_weight = context.Input("LossWeight"); - auto* mmap = context.Input("IndexMap"); - auto* x1 = context.Input("Ins"); - - x1_grad->set_lod(context.Input("Ins")->lod()); - x1_grad->Resize(x1->dims()); - - auto* mmap_data = mmap->data(); - // expected auto = T - auto* output_grad_data = output_grad->data(); - auto* loss_weight_data = loss_weight->data(); - - // expected auto = T - auto* x1_grad_data = x1_grad->mutable_data(gpu_place); - thrust::device_ptr x1_grad_data_ptr(x1_grad_data); - thrust::device_ptr loss_weight_data_ptr(loss_weight_data); - - thrust::fill( - x1_grad_data_ptr, x1_grad_data_ptr + x1->dims()[0] * x1->dims()[1], 0); - - if (loss_weight->numel() != 1 || loss_weight_data_ptr[0] != 0) { - auto output_dims = output_grad->dims(); - int x1_embed_size = output_dims[1]; - - // one thread for multi-instances - int block_size = max_thread_num_per_block; - - size_t N = mmap->dims()[0]; - dim3 block_dim(block_size); - - dim3 grid_dim((N + block_size - 1) / block_size); - - const int ins_per_thread = 1; - - copy_grad_kernel<<>>( - N, - ins_per_thread, - output_grad_data, - x1_grad_data, - mmap_data, - x1_embed_size); - - cudaStreamSynchronize(current_stream); - } - -#endif - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(filter_by_instag, - GPU, - ALL_LAYOUT, - ops::FilterByInstagGPUKernel, - float, - double, - int32_t, - int64_t) {} - -PD_REGISTER_STRUCT_KERNEL(filter_by_instag_grad, - GPU, - ALL_LAYOUT, - ops::FilterByInstagGradGPUKernel, - float, - double, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h deleted file mode 100644 index 04dc713a4dcc9..0000000000000 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/phi/core/mixed_vector.h" - -namespace paddle { -namespace operators { -using SelectedRows = phi::SelectedRows; - -template -using Vector = phi::Vector; - -template -class FilterByInstagKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - // X1 is global FC output - // Dim [batch size, embedding size] - auto* x1 = context.Input("Ins"); - bool is_x1_lod = context.Attr("is_lod"); - int64_t out_val_if_empty = context.Attr("out_val_if_empty"); - // X2 is ins tag list - // LoD [[0, Sum(ins1), Sum(ins1, ins2), ... ]] - auto* x2 = context.Input("Ins_tag"); - // X3 is local fc tag list - // LoD [[0, Sum(fc1), Sum(fc1, fc2) ...]] - auto* x3 = context.Input("Filter_tag"); - - std::unordered_set filter_tag; - auto* x3_data = x3->data(); - size_t len = x3->dims()[0]; - for (size_t i = 0; i < len; i++) { - filter_tag.insert(x3_data[i]); - } - - // expected auto = const int64_t - auto* x2_data = x2->data(); - // e.g get [0, 1, 2, 3, ...] - // size_t x2_lods_size = x2->dims()[0]; - // size_t instag_num_per_ins = x2->dims()[1]; - - Vector x2_lods(1, 0); - if (x2->lod().size() != 0) { // lod_level = 1 - x2_lods = x2->lod()[0]; - } else { // lod_level = 0 - const size_t x2_lods_size = x2->dims()[0]; - const size_t instag_num_per_ins = x2->dims()[1]; - for (size_t i = 0; i < x2_lods_size; i++) { - x2_lods.push_back(x2_lods.back() + instag_num_per_ins); - } - } - - Vector x1_lods(1, 0); - if (!is_x1_lod) { - for (int i = 0; i < x1->dims()[0]; i++) { - x1_lods.push_back(i + 1); - } - } else { - // new: lod_level=0 => lod() return {} - if (x1->lod().size() != 0) { - x1_lods = x1->lod()[0]; - } else { - for (int i = 0; i < x1->dims()[0]; i++) { - x1_lods.push_back(i + 1); - } - } - } - std::unordered_map mmap_aux; - Vector out_lods(1, 0); - for (size_t i = 0; i < x2_lods.size() - 1; i++) { - for (size_t j = x2_lods[i]; j < x2_lods[i + 1]; j++) { - if (filter_tag.find(x2_data[j]) != filter_tag.end()) { - size_t batch_len = x1_lods[i + 1] - x1_lods[i]; - mmap_aux[out_lods.back()] = x1_lods[i]; - out_lods.push_back(out_lods.back() + batch_len); - break; - } - } - } - // set output value - // for those whose ins been dropout, set 0 for whole lines. - // otherwise, copy whole line - // Dim [local fc count, batch size, embedding size] - phi::DenseTensor* out = context.Output("Out"); - phi::DenseTensor* map = context.Output("IndexMap"); - phi::DenseTensor* loss_weight = - context.Output("LossWeight"); - // expected auto = const T - auto* x1_data = x1->data(); - // expected auto = T - size_t x1_embed_size = x1->dims()[1]; - if (out_lods.size() - 1 > 0) { - out->Resize( - phi::make_ddim({(int64_t)out_lods.back(), (int64_t)x1_embed_size})); - map->Resize(phi::make_ddim({(int64_t)out_lods.size() - 1, 3})); - loss_weight->Resize(phi::make_ddim({(int64_t)out_lods.size() - 1, 1})); - } else { - out->Resize(phi::make_ddim({1, (int64_t)x1_embed_size})); - map->Resize(phi::make_ddim({1, 3})); - loss_weight->Resize(phi::make_ddim({1, 1})); - } - auto* out_data = out->mutable_data(context.GetPlace()); - auto* map_data = map->mutable_data(context.GetPlace()); - auto* loss_weight_data = - loss_weight->mutable_data(context.GetPlace()); - if (out_lods.size() - 1 > 0) { - Vector map_lods; - for (size_t i = 0; i < out_lods.size() - 1; i++) { - map_data[i * 3] = (int64_t)out_lods[i]; - map_data[i * 3 + 1] = mmap_aux[map_data[i * 3]]; - map_data[i * 3 + 2] = out_lods[i + 1] - out_lods[i]; - map_lods.push_back(i); - } - map_lods.push_back(out_lods.size() - 1); - std::vector> map_lod_info; - map_lod_info.push_back(map_lods); - - map->set_lod(map_lod_info); - loss_weight->set_lod(map_lod_info); - std::vector> out_lod_info; - out_lod_info.push_back(out_lods); - out->set_lod(out_lod_info); - memset(out_data, 0, out->numel() * sizeof(T)); - for (int i = 0; i < loss_weight->numel(); i++) { - loss_weight_data[i] = 1; - } - - for (size_t i = 0; i < out_lods.size() - 1; i++) { - size_t pos = out_lods[i]; - for (int k = map_data[i * 3 + 1]; - k < map_data[i * 3 + 1] + map_data[i * 3 + 2]; - k++) { - memcpy(out_data + pos * x1_embed_size, - x1_data + k * x1_embed_size, - x1_embed_size * sizeof(T)); - ++pos; - } - } - } else { - Vector map_lods; - map_data[0] = 0; - map_data[1] = 1; - map_data[2] = 1; - map_lods.push_back(0); - map_lods.push_back(1); - out_lods.push_back(1); - std::vector> map_lod_info; - map_lod_info.push_back(map_lods); - map->set_lod(map_lod_info); - loss_weight->set_lod(map_lod_info); - std::vector> out_lod_info; - out_lod_info.push_back(out_lods); - out->set_lod(out_lod_info); - for (int64_t oi = 0; oi < out->numel(); ++oi) { - if (std::is_same::value) { - out_data[oi] = (int32_t)out_val_if_empty; - } else if (std::is_same::value) { - out_data[oi] = (int64_t)out_val_if_empty; - } else if (std::is_same::value) { - out_data[oi] = static_cast(out_val_if_empty); - } else { - out_data[oi] = static_cast(out_val_if_empty); - } - } - loss_weight_data[0] = 0; - } - } -}; - -template -class FilterByInstagGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* output_grad = - context.Input(framework::GradVarName("Out")); - auto* x1_grad = - context.Output(framework::GradVarName("Ins")); - auto* loss_weight = context.Input("LossWeight"); - auto* mmap = context.Input("IndexMap"); - auto* x1 = context.Input("Ins"); - x1_grad->set_lod(context.Input("Ins")->lod()); - x1_grad->Resize(x1->dims()); - auto mmap_data = mmap->data(); - // expected auto = T - auto* output_grad_data = output_grad->data(); - - auto* loss_weight_data = loss_weight->data(); - // expected auto = T - auto* x1_grad_data = x1_grad->mutable_data(context.GetPlace()); - memset(x1_grad_data, 0, x1->dims()[0] * x1->dims()[1] * sizeof(T)); - if (loss_weight->numel() != 1 || loss_weight_data[0] != 0) { - auto output_dims = output_grad->dims(); - for (int i = 0; i < mmap->dims()[0]; i++) { - int src_ln = mmap_data[i * 3], dst_ln = mmap_data[i * 3 + 1]; - int line_cnt = mmap_data[i * 3 + 2]; - for (int l = 0; l < line_cnt; l++) { - for (int j = 0; j < output_dims[1]; j++) { - x1_grad_data[(dst_ln + l) * output_dims[1] + j] = - output_grad_data[(src_ln + l) * output_dims[1] + j]; - } - } - } - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc deleted file mode 100644 index 0f0dbf3c6888a..0000000000000 --- a/paddle/fluid/operators/prroi_pool_op.cc +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/prroi_pool_op.h" - -#include - -namespace paddle { -namespace operators { - -class PRROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor), " - "the input of PRROIPoolOp. " - "The format of input tensor is NCHW. Where N is the batch size, " - "C is the number of input channels, " - "H is the height of the input feature map, and " - "W is the width."); - AddInput("ROIs", - "(phi::DenseTensor), " - "ROIs (Regions of Interest) to pool over. " - "should be a 2-D phi::DenseTensor of shape (num_rois, 4) " - "given as [(x1, y1, x2, y2), ...]. " - "where (x1, y1) is the top left coordinates, and " - "(x2, y2) is the bottom right coordinates. " - "The roi batch index can be calculated from LoD."); - AddInput("BatchRoINums", - "(Tensor), " - "1-D tensor with shape [N], the number of" - " rois for each image in batch, where N is the batch size") - .AsDispensable(); - AddOutput("Out", - "(Tensor), " - "the output of PRROIPoolOp is a 4-D Tensor with shape " - "(num_rois, output_channels, pooled_h, pooled_w)."); - AddAttr("spatial_scale", - "(float, default 1.0), " - "Multiplicative spatial scale factor " - "to translate ROI coords from their input scale " - "to the scale used when pooling.") - .SetDefault(1.0); - AddAttr("pooled_height", - "(int, default 1), " - "the pooled output height.") - .SetDefault(1); - AddAttr("pooled_width", - "(int, default 1), " - "the pooled output width.") - .SetDefault(1); - AddComment(R"Doc( -**PRROIPool Operator** - -Precise region of interest pooling (also known as PRROIPooling) is to perform - bilinear interpolation average pooling method for RoI Pooling. - -Please refer to https://arxiv.org/abs/1807.11590 for more details. - - )Doc"); - } -}; - -class PRROIPoolOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "prroi_pool"); - OP_INOUT_CHECK(ctx->HasInput("ROIs"), "Input", "ROIs", "prroi_pool"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Input", "Out", "prroi_pool"); - - auto input_dims = ctx->GetInputDim("X"); - auto rois_dims = ctx->GetInputDim("ROIs"); - - PADDLE_ENFORCE_EQ(input_dims.size(), - 4, - platform::errors::InvalidArgument( - "The format of input tensor is NCHW")); - PADDLE_ENFORCE_EQ( - rois_dims.size(), - 2, - platform::errors::InvalidArgument( - "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 4) " - "given as [(x1, y1, x2, y2), ...]")); - PADDLE_ENFORCE_EQ( - rois_dims[1], - 4, - platform::errors::InvalidArgument( - "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 4) " - "given as [(x1, y1, x2, y2), ...]")); - int pooled_height = ctx->Attrs().Get("pooled_height"); - int pooled_width = ctx->Attrs().Get("pooled_width"); - float spatial_scale = ctx->Attrs().Get("spatial_scale"); - - PADDLE_ENFORCE_GT(pooled_height, - 0, - platform::errors::InvalidArgument( - "The pooled output height must be greater than 0")); - PADDLE_ENFORCE_GT(pooled_width, - 0, - platform::errors::InvalidArgument( - "The pooled output width must be greater than 0")); - PADDLE_ENFORCE_GT(spatial_scale, - 0.0f, - platform::errors::InvalidArgument( - "The spatial scale must greater than 0.")); - - auto out_dims = input_dims; - out_dims[0] = rois_dims[0]; - out_dims[1] = input_dims[1]; - out_dims[2] = pooled_height; - out_dims[3] = pooled_width; - - if (ctx->HasInput("BatchRoINums")) { - auto rois_batch_index = ctx->GetInputDim("BatchRoINums"); - PADDLE_ENFORCE_EQ(rois_batch_index[0], - input_dims[0], - platform::errors::InvalidArgument( - "The length of BatchRoINums should equal to " - "first dim of inputs(X)")); - } - ctx->SetOutputDim("Out", out_dims); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class PRROIPoolGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "prroi_pool"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "prroi_pool"); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - ctx->SetOutputDim(framework::GradVarName("ROIs"), ctx->GetInputDim("ROIs")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -template -class PRROIPoolGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("prroi_pool_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Out", this->Output("Out")); - op->SetInput("ROIs", this->Input("ROIs")); - op->SetInput("BatchRoINums", this->Input("BatchRoINums")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetOutput(framework::GradVarName("ROIs"), this->InputGrad("ROIs")); - op->SetAttrMap(this->Attrs()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(prroi_pool, - ops::PRROIPoolOp, - ops::PRROIPoolOpMaker, - ops::PRROIPoolGradMaker, - ops::PRROIPoolGradMaker); -REGISTER_OPERATOR(prroi_pool_grad, ops::PRROIPoolGradOp); - -PD_REGISTER_STRUCT_KERNEL(prroi_pool, - CPU, - ALL_LAYOUT, - ops::CPUPRROIPoolOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad, - CPU, - ALL_LAYOUT, - ops::CPUPRROIPoolGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu deleted file mode 100644 index 5d1243964279b..0000000000000 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ /dev/null @@ -1,439 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/prroi_pool_op.h" - -namespace paddle { -namespace operators { - -static constexpr int kNumCUDAThreads = 512; -static constexpr int kNumMaximumNumBlocks = 4096; - -static inline int NumBlocks(const int N) { - return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, - kNumMaximumNumBlocks); -} - -template -__global__ void GPUPRROIPoolForward(const int nthreads, - const T* input_data, - const T* input_rois, - const float spatial_scale, - const int input_channels, - const int height, - const int width, - const int output_channels, - const int pooled_height, - const int pooled_width, - const int* rois_batch_id_data, - T* output_data) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int offset = blockDim.x * gridDim.x; - for (size_t i = index; i < nthreads; i += offset) { - // The output is in order (n, c, ph, pw) - int pw = i % pooled_width; - int ph = (i / pooled_width) % pooled_height; - int c = (i / pooled_width / pooled_height) % output_channels; - int n = i / pooled_width / pooled_height / output_channels; - - // set roi_batch_id - int roi_batch_id = rois_batch_id_data[n]; - - // [start, end) interval for spatial sampling - const T* offset_input_rois = input_rois + n * 4; - T roi_start_w = static_cast(offset_input_rois[0]) * spatial_scale; - T roi_start_h = static_cast(offset_input_rois[1]) * spatial_scale; - T roi_end_w = static_cast(offset_input_rois[2]) * spatial_scale; - T roi_end_h = static_cast(offset_input_rois[3]) * spatial_scale; - - T roi_width = max(roi_end_w - roi_start_w, static_cast(0.0)); - T roi_height = max(roi_end_h - roi_start_h, static_cast(0.0)); - - // Compute w and h at input feature map - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - T win_start_w = roi_start_w + bin_size_w * pw; - T win_start_h = roi_start_h + bin_size_h * ph; - T win_end_w = win_start_w + bin_size_w; - T win_end_h = win_start_h + bin_size_h; - - T win_size = max(static_cast(0.0), bin_size_w * bin_size_h); - int input_channel = c; - const T* offset_input_data = - input_data + - (roi_batch_id * input_channels + input_channel) * height * width; - - if (win_size > static_cast(0.0)) { - int s_w = floor(win_start_w); - int e_w = ceil(win_end_w); - int s_h = floor(win_start_h); - int e_h = ceil(win_end_h); - T sum_out = 0; - - for (int w_iter = s_w; w_iter < e_w; ++w_iter) { - for (int h_iter = s_h; h_iter < e_h; ++h_iter) { - sum_out += PrRoIPoolingMatCalculation( - offset_input_data, - h_iter, - w_iter, - h_iter + 1, - w_iter + 1, - max(win_start_h, static_cast(h_iter)), - max(win_start_w, static_cast(w_iter)), - min(win_end_h, static_cast(h_iter) + static_cast(1.0)), - min(win_end_w, static_cast(w_iter) + static_cast(1.0)), - height, - width); - } - } - output_data[i] = sum_out / win_size; - } else { - output_data[i] = 0.; - } - } -} - -template -__global__ void GPUPRROIPoolBackward(const int nthreads, - const T* in_data, - const T* input_rois, - const T* output_grad_data, - const float spatial_scale, - const int input_channels, - const int height, - const int width, - const int output_channels, - const int pooled_height, - const int pooled_width, - const int* rois_batch_id_data, - T* input_grad_data, - const T* out_data, - T* input_roi_grad_data) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int offset = blockDim.x * gridDim.x; - for (int i = index; i < nthreads; i += offset) { - // The output is in order (n, c, ph, pw) - int pw = i % pooled_width; - int ph = (i / pooled_width) % pooled_height; - int c = (i / pooled_width / pooled_height) % output_channels; - int n = i / pooled_width / pooled_height / output_channels; - - // set roi_batch_id - int roi_batch_id = rois_batch_id_data[n]; - int input_channel = c; - int input_offset = - (roi_batch_id * input_channels + input_channel) * height * width; - T* offset_input_grad_data = input_grad_data + input_offset; - const T* offset_output_grad_data = output_grad_data + i; - - // [start, end) interval for spatial sampling - const T* offset_input_rois = input_rois + n * 4; - T roi_start_w = static_cast(offset_input_rois[0]) * spatial_scale; - T roi_start_h = static_cast(offset_input_rois[1]) * spatial_scale; - T roi_end_w = static_cast(offset_input_rois[2]) * spatial_scale; - T roi_end_h = static_cast(offset_input_rois[3]) * spatial_scale; - T* offset_input_roi_grad_data = input_roi_grad_data + n * 4; - - T roi_width = max(roi_end_w - roi_start_w, static_cast(0.0)); - T roi_height = max(roi_end_h - roi_start_h, static_cast(0.0)); - - // Compute w and h at input feature map - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - T win_start_w = roi_start_w + bin_size_w * pw; - T win_start_h = roi_start_h + bin_size_h * ph; - T win_end_w = win_start_w + bin_size_w; - T win_end_h = win_start_h + bin_size_h; - - T win_size = max(static_cast(0.0), bin_size_w * bin_size_h); - int s_w = floor(win_start_w); - int e_w = ceil(win_end_w); - int s_h = floor(win_start_h); - int e_h = ceil(win_end_h); - - T sum_out = win_size == static_cast(0.) - ? static_cast(0.) - : *offset_output_grad_data / win_size; - - for (int w_iter = s_w; w_iter < e_w; ++w_iter) { - for (int h_iter = s_h; h_iter < e_h; ++h_iter) { - PrRoIPoolingMatDistributeDiff( - offset_input_grad_data, - sum_out, - h_iter, - w_iter, - h_iter + 1, - w_iter + 1, - max(win_start_h, static_cast(h_iter)), - max(win_start_w, static_cast(w_iter)), - min(win_end_h, static_cast(h_iter) + static_cast(1.0)), - min(win_end_w, static_cast(w_iter) + static_cast(1.0)), - height, - width); - } - } - - const T* offset_out_data = out_data + i; - const T* offset_in_data = in_data + input_offset; - PrRoIPoolingCoorBackward(s_w, - e_w, - s_h, - e_h, - width, - height, - win_start_w, - win_start_h, - win_end_w, - win_end_h, - pw, - ph, - pooled_width, - pooled_height, - win_size, - spatial_scale, - offset_in_data, - offset_out_data, - offset_input_roi_grad_data, - offset_output_grad_data); - } -} - -template -class GPUPRROIPoolOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - - auto in_dims = in->dims(); - int batch_size = in_dims[0]; - int input_channels = in_dims[1]; - auto output_channels = input_channels; - int height = in_dims[2]; - int width = in_dims[3]; - - int rois_num = rois->dims()[0]; - if (rois_num == 0) return; - - // set rois batch id - phi::DenseTensor rois_batch_id_list; - rois_batch_id_list.Resize({rois_num}); - int* rois_batch_id_data = - rois_batch_id_list.mutable_data(platform::CPUPlace()); - - if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - phi::DenseTensor batch_index_cpu; - framework::TensorCopySync( - *batchroinum, platform::CPUPlace(), &batch_index_cpu); - - int rois_batch_size = batchroinum->dims()[0]; - auto* batch_index = batch_index_cpu.data(); - size_t c = 0; - for (int n = 0; n < rois_batch_size; ++n) { - for (int64_t k = 0; k < batch_index[n]; ++k) { - rois_batch_id_data[c] = n; - c = c + 1; - } - } - - } else { - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ( - rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The rois_batch_size and input(X) batch_size must be the same.")); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ( - rois_num, - rois_num_with_lod, - platform::errors::InvalidArgument( - "The rois_num from input and lod must be the same.")); - - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; - } - } - } - - int output_size = out->numel(); - int blocks = NumBlocks(output_size); - int threads = kNumCUDAThreads; - - auto cplace = platform::CPUPlace(); - auto& dev_ctx = ctx.cuda_device_context(); - int bytes = rois_batch_id_list.numel() * sizeof(int); - auto roi_ptr = memory::Alloc( - dev_ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - int* roi_id_data = reinterpret_cast(roi_ptr->ptr()); - const auto gplace = ctx.GetPlace(); - memory::Copy(gplace, - roi_id_data, - cplace, - rois_batch_id_data, - bytes, - dev_ctx.stream()); - - // call cuda kernel function - GPUPRROIPoolForward<<>>( - output_size, - in->data(), - rois->data(), - spatial_scale, - input_channels, - height, - width, - output_channels, - pooled_height, - pooled_width, - roi_id_data, - out->mutable_data(ctx.GetPlace())); - } -}; - -template -class GPUPRROIPoolGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Input("Out"); - - auto* output_grad = - ctx.Input(framework::GradVarName("Out")); - auto* input_grad = - ctx.Output(framework::GradVarName("X")); - auto* input_roi_grad = - ctx.Output(framework::GradVarName("ROIs")); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - - int rois_num = rois->dims()[0]; - int input_channels = in->dims()[1]; - auto output_channels = input_channels; - int height = in->dims()[2]; - int width = in->dims()[3]; - - if (input_grad || input_roi_grad) { - // set roi batch id - phi::DenseTensor rois_batch_id_list; - rois_batch_id_list.Resize({rois_num}); - int* rois_batch_id_data = - rois_batch_id_list.mutable_data(platform::CPUPlace()); - - if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - phi::DenseTensor batch_index_cpu; - framework::TensorCopySync( - *batchroinum, platform::CPUPlace(), &batch_index_cpu); - - int rois_batch_size = batchroinum->dims()[0]; - auto* batch_index = batch_index_cpu.data(); - size_t c = 0; - for (int n = 0; n < rois_batch_size; ++n) { - for (int64_t k = 0; k < batch_index[n]; ++k) { - rois_batch_id_data[c] = n; - c = c + 1; - } - } - } else { - PADDLE_ENFORCE_EQ(rois->lod().empty(), - false, - platform::errors::InvalidArgument( - "the lod of Input ROIs should not be empty when " - "BatchRoINums is None!")); - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; - } - } - } - - auto cplace = platform::CPUPlace(); - auto& dev_ctx = ctx.cuda_device_context(); - int bytes = rois_batch_id_list.numel() * sizeof(int); - auto roi_ptr = memory::Alloc( - dev_ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - int* roi_id_data = reinterpret_cast(roi_ptr->ptr()); - const auto gplace = ctx.GetPlace(); - memory::Copy(gplace, - roi_id_data, - cplace, - rois_batch_id_data, - bytes, - dev_ctx.stream()); - - input_grad->mutable_data(ctx.GetPlace()); - phi::funcs::SetConstant set_zero; - set_zero(ctx.cuda_device_context(), input_grad, static_cast(0)); - input_roi_grad->mutable_data(ctx.GetPlace()); - set_zero(ctx.cuda_device_context(), input_roi_grad, static_cast(0)); - - int output_grad_size = output_grad->numel(); - int blocks = NumBlocks(output_grad_size); - int threads = kNumCUDAThreads; - - if (output_grad_size > 0) { - GPUPRROIPoolBackward<<>>( - output_grad_size, - in->data(), - rois->data(), - output_grad->data(), - spatial_scale, - input_channels, - height, - width, - output_channels, - pooled_height, - pooled_width, - roi_id_data, - input_grad->mutable_data(ctx.GetPlace()), - out->data(), - input_roi_grad->mutable_data(ctx.GetPlace())); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL( - prroi_pool, GPU, ALL_LAYOUT, ops::GPUPRROIPoolOpKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad, - GPU, - ALL_LAYOUT, - ops::GPUPRROIPoolGradOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h deleted file mode 100644 index e2417a071ce88..0000000000000 --- a/paddle/fluid/operators/prroi_pool_op.h +++ /dev/null @@ -1,653 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#if defined(__NVCC__) || defined(__HIPCC__) -#include "paddle/phi/backends/gpu/gpu_primitives.h" -#endif - -namespace paddle { -namespace operators { - -template -inline HOSTDEVICE T PrRoIPoolingGetData(const T* data, - const int h, - const int w, - const int height, - const int width) { - bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width); - T retVal = overflow ? 0.0f : data[h * width + w]; - return retVal; -} - -template -inline HOSTDEVICE T PrRoIPoolingMatCalculation(const T* this_data, - const int s_h, - const int s_w, - const int e_h, - const int e_w, - const T y0, - const T x0, - const T y1, - const T x1, - const int h0, - const int w0) { - T alpha, beta, lim_alpha, lim_beta, tmp; - T sum_out = 0; - - alpha = x0 - static_cast(s_w); - beta = y0 - static_cast(s_h); - lim_alpha = x1 - static_cast(s_w); - lim_beta = y1 - static_cast(s_h); - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp; - - alpha = static_cast(e_w) - x1; - lim_alpha = static_cast(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp; - - alpha = x0 - static_cast(s_w); - beta = static_cast(e_h) - y1; - lim_alpha = x1 - static_cast(s_w); - lim_beta = static_cast(e_h) - y0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp; - - alpha = static_cast(e_w) - x1; - lim_alpha = static_cast(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp; - - return sum_out; -} - -#if defined(__NVCC__) || defined(__HIPCC__) -template -DEVICE void PrRoIPoolingDistributeDiff(T* diff, - const T top_diff, - const int h, - const int w, - const int height, - const int width, - const T coeff) { - bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width); - if (!overflow) { - phi::CudaAtomicAdd(diff + h * width + w, top_diff * coeff); - } -} -#else -template -inline HOSTDEVICE void PrRoIPoolingDistributeDiff(T* diff, - const T top_diff, - const int h, - const int w, - const int height, - const int width, - const T coeff) { - bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width); - if (!overflow) { - *(diff + h * width + w) += top_diff * coeff; - } -} -#endif - -template -HOSTDEVICE void PrRoIPoolingMatDistributeDiff(T* diff, - const T top_diff, - const int s_h, - const int s_w, - const int e_h, - const int e_w, - const T y0, - const T x0, - const T y1, - const T x1, - const int h0, - const int w0) { - T alpha, beta, lim_alpha, lim_beta, tmp; - - alpha = x0 - static_cast(s_w); - beta = y0 - static_cast(s_h); - lim_alpha = x1 - static_cast(s_w); - lim_beta = y1 - static_cast(s_h); - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, s_h, s_w, h0, w0, tmp); - - alpha = static_cast(e_w) - x1; - lim_alpha = static_cast(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, s_h, e_w, h0, w0, tmp); - - alpha = x0 - static_cast(s_w); - beta = static_cast(e_h) - y1; - lim_alpha = x1 - static_cast(s_w); - lim_beta = static_cast(e_h) - y0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, e_h, s_w, h0, w0, tmp); - - alpha = static_cast(e_w) - x1; - lim_alpha = static_cast(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, e_h, e_w, h0, w0, tmp); -} - -#if defined(__NVCC__) || defined(__HIPCC__) -template -DEVICE void AccumulateRois(T* offset, T data) { - phi::CudaAtomicAdd(offset, data); -} -#else -template -inline HOSTDEVICE void AccumulateRois(T* offset, T data) { - *offset += data; -} -#endif - -#if defined(__NVCC__) || defined(__HIPCC__) -template -DEVICE T MaxFunctor(const T x, const T y) { - return max(x, y); -} -template -DEVICE T MinFunctor(const T x, const T y) { - return min(x, y); -} -#else -template -inline HOSTDEVICE T MaxFunctor(const T x, const T y) { - return std::max(x, y); -} -template -inline HOSTDEVICE T MinFunctor(const T x, const T y) { - return std::max(x, y); -} -#endif - -template -inline HOSTDEVICE static T PrRoIPoolingGetCoeff(T dh, T dw) { - dw = dw > 0 ? dw : -dw; - dh = dh > 0 ? dh : -dh; - return (1.0f - dh) * (1.0f - dw); -} - -template -inline HOSTDEVICE static T PrRoIPoolingInterpolation( - const T* data, const H h, const W w, const int height, const int width) { - T retVal = 0.0f; - int h1 = floorf(h); - int w1 = floorf(w); - retVal += - PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - static_cast(h1), w - static_cast(w1)); - h1 = floorf(h) + 1; - w1 = floorf(w); - retVal += - PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - static_cast(h1), w - static_cast(w1)); - h1 = floorf(h); - w1 = floorf(w) + 1; - retVal += - PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - static_cast(h1), w - static_cast(w1)); - h1 = floorf(h) + 1; - w1 = floorf(w) + 1; - retVal += - PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - static_cast(h1), w - static_cast(w1)); - return retVal; -} - -template -inline HOSTDEVICE T PrRoIPoolingSingleCoorIntegral(T s, T t, T c1, T c2) { - return 0.5f * (t * t - s * s) * c2 + - (t - 0.5f * t * t - s + 0.5f * s * s) * c1; -} - -template -inline HOSTDEVICE void PrRoIPoolingCoorBackward(int s_w, - int e_w, - int s_h, - int e_h, - int width, - int height, - T win_start_w, - T win_start_h, - T win_end_w, - T win_end_h, - int pw, - int ph, - const int pooled_width, - const int pooled_height, - T win_size, - const float spatial_scale, - const T* this_bottom_data, - const T* this_top_data, - T* this_data_grad, - const T* this_out_grad) { - T g_x1_y = 0.f; - T g_x2_y = 0.f; - T g_x_y1 = 0.f; - T g_x_y2 = 0.f; - - for (int h_iter = s_h; h_iter < e_h; ++h_iter) { - g_x1_y += PrRoIPoolingSingleCoorIntegral( - MaxFunctor(win_start_h, static_cast(h_iter)) - h_iter, - MinFunctor(win_end_h, static_cast(h_iter + 1)) - h_iter, - PrRoIPoolingInterpolation( - this_bottom_data, h_iter, win_start_w, height, width), - PrRoIPoolingInterpolation( - this_bottom_data, h_iter + 1, win_start_w, height, width)); - - g_x2_y += PrRoIPoolingSingleCoorIntegral( - MaxFunctor(win_start_h, static_cast(h_iter)) - h_iter, - MinFunctor(win_end_h, static_cast(h_iter + 1)) - h_iter, - PrRoIPoolingInterpolation( - this_bottom_data, h_iter, win_end_w, height, width), - PrRoIPoolingInterpolation( - this_bottom_data, h_iter + 1, win_end_w, height, width)); - } - - for (int w_iter = s_w; w_iter < e_w; ++w_iter) { - g_x_y1 += PrRoIPoolingSingleCoorIntegral( - MaxFunctor(win_start_w, static_cast(w_iter)) - w_iter, - MinFunctor(win_end_w, static_cast(w_iter + 1)) - w_iter, - PrRoIPoolingInterpolation( - this_bottom_data, win_start_h, w_iter, height, width), - PrRoIPoolingInterpolation( - this_bottom_data, win_start_h, w_iter + 1, height, width)); - - g_x_y2 += PrRoIPoolingSingleCoorIntegral( - MaxFunctor(win_start_w, static_cast(w_iter)) - w_iter, - MinFunctor(win_end_w, static_cast(w_iter + 1)) - w_iter, - PrRoIPoolingInterpolation( - this_bottom_data, win_end_h, w_iter, height, width), - PrRoIPoolingInterpolation( - this_bottom_data, win_end_h, w_iter + 1, height, width)); - } - - float partial_x1 = -g_x1_y + (win_end_h - win_start_h) * (*this_top_data); - float partial_y1 = -g_x_y1 + (win_end_w - win_start_w) * (*this_top_data); - float partial_x2 = g_x2_y - (win_end_h - win_start_h) * (*this_top_data); - float partial_y2 = g_x_y2 - (win_end_w - win_start_w) * (*this_top_data); - - partial_x1 = partial_x1 / win_size * spatial_scale; - partial_x2 = partial_x2 / win_size * spatial_scale; - partial_y1 = partial_y1 / win_size * spatial_scale; - partial_y2 = partial_y2 / win_size * spatial_scale; - - AccumulateRois( - this_data_grad + 0, - (partial_x1 * (1.0 - static_cast(pw) / pooled_width) + - partial_x2 * (1.0 - static_cast(pw + 1) / pooled_width)) * - (*this_out_grad)); - AccumulateRois( - this_data_grad + 1, - (partial_y1 * (1.0 - static_cast(ph) / pooled_height) + - partial_y2 * (1.0 - static_cast(ph + 1) / pooled_height)) * - (*this_out_grad)); - AccumulateRois(this_data_grad + 2, - (partial_x2 * static_cast(pw + 1) / pooled_width + - partial_x1 * static_cast(pw) / pooled_width) * - (*this_out_grad)); - AccumulateRois(this_data_grad + 3, - (partial_y2 * static_cast(ph + 1) / pooled_height + - partial_y1 * static_cast(ph) / pooled_height) * - (*this_out_grad)); -} - -template -class CPUPRROIPoolOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - auto in_dims = in->dims(); - int batch_size = in_dims[0]; - int input_channels = in_dims[1]; - auto output_channels = input_channels; - int height = in_dims[2]; - int width = in_dims[3]; - int rois_num = rois->dims()[0]; - if (rois_num == 0) return; - - auto in_stride = phi::stride(in_dims); - auto out_stride = phi::stride(out->dims()); - - const T* input_data = in->data(); - - phi::DenseTensor rois_batch_id_list; - rois_batch_id_list.Resize({rois_num}); - int* rois_batch_id_data = - rois_batch_id_list.mutable_data(ctx.GetPlace()); - if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - auto* batch_index = batchroinum->data(); - int rois_batch_size = batchroinum->dims()[0]; - size_t c = 0; - for (int n = 0; n < rois_batch_size; ++n) { - for (int64_t k = 0; k < batch_index[n]; ++k) { - rois_batch_id_data[c] = n; - c = c + 1; - } - } - } else { - PADDLE_ENFORCE_EQ(rois->lod().empty(), - false, - platform::errors::InvalidArgument( - "The lod of Input ROIs should not be empty when " - "BatchRoINums is None!")); - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ(rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The rois_batch_size and input(X)'s " - "batch_size should be the same but received" - "rois_batch_size: %d and batch_size: %d", - rois_batch_size, - batch_size)); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ( - rois_num_with_lod, - rois_num, - platform::errors::InvalidArgument("The rois_num from input should be " - "equal to the rois_num from lod, " - "but received rois_num from input: " - "%d and the rois_num from lod: %d.", - rois_num_with_lod, - rois_num)); - - // calculate batch id index for each roi according to LoD - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; - } - } - } - - T* output_data = out->mutable_data(ctx.GetPlace()); - const T* input_rois = rois->data(); - // calculate prroipooling, parallel processing can be implemented per ROI - for (int n = 0; n < rois_num; ++n) { - // set roi batch id - int roi_batch_id = rois_batch_id_data[n]; - - // [start, end) interval for spatial sampling - const T* offset_input_rois = input_rois + n * 4; - T roi_start_w = static_cast(offset_input_rois[0]) * spatial_scale; - T roi_start_h = static_cast(offset_input_rois[1]) * spatial_scale; - T roi_end_w = static_cast(offset_input_rois[2]) * spatial_scale; - T roi_end_h = static_cast(offset_input_rois[3]) * spatial_scale; - - T roi_width = std::max(roi_end_w - roi_start_w, static_cast(0.0)); - T roi_height = std::max(roi_end_h - roi_start_h, static_cast(0.0)); - - // Compute w and h at input feature map - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - T win_size = std::max(static_cast(0.0), bin_size_w * bin_size_h); - - // calculate each pixel of the output feature map. - int out_roi_offset = n * out_stride[0]; - for (int c = 0; c < output_channels; ++c) { - // per category - int out_plane_offset = out_roi_offset + c * out_stride[1]; - for (int ph = 0; ph < pooled_height; ++ph) { - int out_row_offset = out_plane_offset + ph * out_stride[2]; - for (int pw = 0; pw < pooled_width; ++pw) { - // calculate w and h at input feature map - T win_start_h = static_cast(ph) * bin_size_h + roi_start_h; - T win_start_w = static_cast(pw) * bin_size_w + roi_start_w; - T win_end_h = win_start_h + bin_size_h; - T win_end_w = win_start_w + bin_size_w; - // Add roi offsets and clip to input boundaries - int s_w = std::floor(win_start_w); - int e_w = std::ceil(win_end_w); - int s_h = std::floor(win_start_h); - int e_h = std::ceil(win_end_h); - - int output_index = out_row_offset + pw; - int input_channel = c; - int input_plane_offset = - roi_batch_id * in_stride[0] + input_channel * in_stride[1]; - const T* offset_input_data = input_data + input_plane_offset; - T sum_out = 0.; - - if (win_size > static_cast(0.0)) { - for (int w_iter = s_w; w_iter < e_w; ++w_iter) { - for (int h_iter = s_h; h_iter < e_h; ++h_iter) { - sum_out += PrRoIPoolingMatCalculation( - offset_input_data, - h_iter, - w_iter, - h_iter + 1, - w_iter + 1, - std::max(win_start_h, static_cast(h_iter)), - std::max(win_start_w, static_cast(w_iter)), - std::min(win_end_h, - static_cast(h_iter) + static_cast(1.0)), - std::min(win_end_w, - static_cast(w_iter) + static_cast(1.0)), - height, - width); - } - } - - output_data[output_index] = sum_out / win_size; - } else { - output_data[output_index] = 0.; - } - } - } - } - } - } -}; - -template -class CPUPRROIPoolGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* out = ctx.Input("Out"); - auto* rois = ctx.Input("ROIs"); - auto* output_grad = - ctx.Input(framework::GradVarName("Out")); - auto* input_grad = - ctx.Output(framework::GradVarName("X")); - auto* input_roi_grad = - ctx.Output(framework::GradVarName("ROIs")); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - - if (input_grad || input_roi_grad) { - auto in_dims = in->dims(); - auto* in_data = in->data(); - auto* out_data = out->data(); - - int input_channels = in_dims[1]; - auto output_channels = input_channels; - int height = in_dims[2]; - int width = in_dims[3]; - int rois_num = rois->dims()[0]; - - // set roi batch id - phi::DenseTensor rois_batch_id_list; - rois_batch_id_list.Resize({rois_num}); - int* rois_batch_id_data = - rois_batch_id_list.mutable_data(ctx.GetPlace()); - if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - auto* batch_index = batchroinum->data(); - int rois_batch_size = batchroinum->dims()[0]; - size_t c = 0; - for (int n = 0; n < rois_batch_size; ++n) { - for (int64_t k = 0; k < batch_index[n]; ++k) { - rois_batch_id_data[c] = n; - c = c + 1; - } - } - } else { - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - // calculate batch id index for each roi according to LoD - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; - } - } - } - - const T* input_rois = rois->data(); - const T* output_grad_data = output_grad->data(); - - input_grad->mutable_data(ctx.GetPlace()); - input_roi_grad->mutable_data(ctx.GetPlace()); - // set gradient of X to be 0. before backpropagate. - phi::funcs::SetConstant set_zero; - set_zero(ctx.template device_context(), - input_grad, - static_cast(0)); - set_zero(ctx.template device_context(), - input_roi_grad, - static_cast(0)); - - T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - T* input_roi_grad_data = input_roi_grad->mutable_data(ctx.GetPlace()); - - // backpropagate gradient per output pixel - int output_grad_size = output_grad->numel(); - for (int i = 0; i < output_grad_size; ++i) { - // The output is in order (n, c, ph, pw) - int pw = i % pooled_width; - int ph = (i / pooled_width) % pooled_height; - int c = (i / pooled_width / pooled_height) % output_channels; - int n = i / pooled_width / pooled_height / output_channels; - - // set roi_batch_id - int roi_batch_id = rois_batch_id_data[n]; - int input_channel = c; - int input_offset = - (roi_batch_id * input_channels + input_channel) * height * width; - T* offset_input_grad_data = input_grad_data + input_offset; - const T* offset_output_grad_data = output_grad_data + i; - const T* offset_out_data = out_data + i; - - // [start, end) interval for spatial sampling - const T* offset_input_rois = input_rois + n * 4; - T roi_start_w = static_cast(offset_input_rois[0]) * spatial_scale; - T roi_start_h = static_cast(offset_input_rois[1]) * spatial_scale; - T roi_end_w = static_cast(offset_input_rois[2]) * spatial_scale; - T roi_end_h = static_cast(offset_input_rois[3]) * spatial_scale; - T* offset_input_roi_grad_data = input_roi_grad_data + n * 4; - - T roi_width = std::max(roi_end_w - roi_start_w, static_cast(0.0)); - T roi_height = std::max(roi_end_h - roi_start_h, static_cast(0.0)); - - // Compute w and h at input feature map - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - T win_start_w = roi_start_w + bin_size_w * pw; - T win_start_h = roi_start_h + bin_size_h * ph; - T win_end_w = win_start_w + bin_size_w; - T win_end_h = win_start_h + bin_size_h; - - T win_size = std::max(static_cast(0.0), bin_size_w * bin_size_h); - - T sum_out = win_size == static_cast(0.) - ? static_cast(0.) - : *offset_output_grad_data / win_size; - - int s_w = std::floor(win_start_w); - int e_w = std::ceil(win_end_w); - int s_h = std::floor(win_start_h); - int e_h = std::ceil(win_end_h); - - for (int w_iter = s_w; w_iter < e_w; ++w_iter) { - for (int h_iter = s_h; h_iter < e_h; ++h_iter) { - PrRoIPoolingMatDistributeDiff( - offset_input_grad_data, - sum_out, - h_iter, - w_iter, - h_iter + 1, - w_iter + 1, - std::max(win_start_h, static_cast(h_iter)), - std::max(win_start_w, static_cast(w_iter)), - std::min(win_end_h, - static_cast(h_iter) + static_cast(1.0)), - std::min(win_end_w, - static_cast(w_iter) + static_cast(1.0)), - height, - width); - } - } - - const T* offset_in_data = in_data + input_offset; - PrRoIPoolingCoorBackward(s_w, - e_w, - s_h, - e_h, - width, - height, - win_start_w, - win_start_h, - win_end_w, - win_end_h, - pw, - ph, - pooled_width, - pooled_height, - win_size, - spatial_scale, - offset_in_data, - offset_out_data, - offset_input_roi_grad_data, - offset_output_grad_data); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/random_crop_op.cc b/paddle/fluid/operators/random_crop_op.cc deleted file mode 100644 index 62e805e323f84..0000000000000 --- a/paddle/fluid/operators/random_crop_op.cc +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "paddle/fluid/operators/random_crop_op.h" - -namespace paddle { -namespace operators { - -class RandomCropOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - auto shape = ctx->Attrs().Get>("shape"); - auto x_dim = ctx->GetInputDim("X"); - PADDLE_ENFORCE_GT( - x_dim.size(), - static_cast(shape.size()), - platform::errors::InvalidArgument( - "The dimensions of Input(X) must be greater than the length of " - "Attr(shape)," - "But received dimensions of Input(X) is [%d], receivecd length" - "of Attr(shape) is [%d].", - x_dim.size(), - static_cast(shape.size()))); - auto out_dim = phi::vectorize(x_dim); - for (size_t i = 1; i <= shape.size(); ++i) { - size_t x_i = x_dim.size() - i; - size_t shape_i = shape.size() - i; - if (ctx->IsRuntime() || - (x_dim[static_cast(x_i)] > 0 && shape[shape_i] > 0)) { - PADDLE_ENFORCE_GE( - x_dim[x_i], - shape[shape_i], - platform::errors::InvalidArgument( - "The dimensions of Input(X) must be larger than Attr(shape)," - "But received dimensions of Input(X) is [%d], received" - "size of Attr(shape) is [%d].", - x_dim[x_i], - shape[shape_i])); - } - out_dim[x_i] = shape[shape_i]; - } - ctx->SetOutputDim("Out", phi::make_ddim(out_dim)); - } - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "A batch of instances to random crop."); - AddInput("Seed", "The random seed."); - AddOutput("Out", "The cropped instance batch."); - AddOutput("SeedOut", "The random seed after random cropping.") - .AsIntermediate(); - AddAttr>("shape", "The shape of a cropped instance."); - AddAttr("startup_seed", - "If the input 'Seed' is not initialized, the 'startup_seed' " - "will be used to replace it. Even so, the seed after random " - "crop will also be outputed to the 'SeedOut'.") - .SetDefault(0); - AddComment(R"DOC( - This operator takes a batch of instance, and do random cropping on each instance. - It means that cropping positions differs on each instance, which is determined - by an uniform random generator. All cropped instances have the same shape, which - is determined by the operator's attribute 'shape'. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - random_crop, - ops::RandomCropOp, - ops::RandomCropOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(random_crop, - CPU, - ALL_LAYOUT, - ops::RandomCropKernel, - float, - int, - double, - uint8_t, - int16_t) {} diff --git a/paddle/fluid/operators/random_crop_op.cu b/paddle/fluid/operators/random_crop_op.cu deleted file mode 100644 index 33182dff93fa4..0000000000000 --- a/paddle/fluid/operators/random_crop_op.cu +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/random_crop_op.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(random_crop, - GPU, - ALL_LAYOUT, - ops::RandomCropKernel, - float, - int, - double, - uint8_t, - int16_t) {} diff --git a/paddle/fluid/operators/random_crop_op.h b/paddle/fluid/operators/random_crop_op.h deleted file mode 100644 index fc625826b9a91..0000000000000 --- a/paddle/fluid/operators/random_crop_op.h +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/for_range.h" -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include -#endif - -namespace paddle { -namespace operators { - -template -struct Random; - -template <> -struct Random { - using Engine = std::minstd_rand; - - template - using UniformIntDist = std::uniform_int_distribution; -}; - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -template <> -struct Random { - using Engine = thrust::minstd_rand; - - template - using UniformIntDist = thrust::uniform_int_distribution; -}; -#endif - -template -HOSTDEVICE inline void StridedMemcpy(const T* x, - const size_t* x_dims, - T* out, - const size_t* out_dims, - int i, - int rank, - size_t prod_x_remain, - size_t prod_out_remain, - const size_t* offsets) { - size_t x_dim_i = x_dims[i]; - size_t out_dim_i = out_dims[i]; - size_t x_stride = prod_x_remain / x_dim_i; - size_t out_stride = prod_out_remain / out_dim_i; - size_t offset_i = offsets[i]; - - if (i == rank - 1) { - x += offset_i; - for (size_t j = 0; j < out_dim_i; ++j) { - *out++ = *x++; - } - } else { - x += offset_i * x_stride; - for (size_t j = 0; j < out_dim_i; ++j) { - StridedMemcpy( - x, x_dims, out, out_dims, i + 1, rank, x_stride, out_stride, offsets); - x += x_stride; - out += out_stride; - } - } -} - -template -struct RandomCropFunctor { - const T* x_; - T* out_; - size_t x_dims_[9]; - size_t out_dims_[9]; - int num_batchsize_dims_; - int rank_; - int64_t seed_; - - size_t prod_batchsize_dims_; - size_t prod_x_ins_dims_; - size_t prod_out_ins_dims_; - - RandomCropFunctor(const T* x, - T* out, - const framework::DDim& x_dims, - const framework::DDim& out_dims, - int num_batchsize_dims, - int64_t seed) - : x_(x), - out_(out), - num_batchsize_dims_(num_batchsize_dims), - rank_(x_dims.size()), - seed_(seed) { - PADDLE_ENFORCE_EQ( - x_dims.size(), - out_dims.size(), - platform::errors::InvalidArgument( - "The dimensions of Input(X) must equal to be the dimensions" - "of Output(Out), but received dimensions of Input(X) is [%d]," - "received dimensions of Output(Out) is [%d].", - x_dims.size(), - out_dims.size())); - PADDLE_ENFORCE_GT( - rank_, - num_batchsize_dims_, - platform::errors::InvalidArgument( - "The dimensions of Input(X) must be greater than the diff" - "value of Input(X)'s dimensions minus Atrr(shape)'s dimensions," - "But received Input(X)'s dimensions is [%d], received value of" - "Input(X)'s dimensions minus Attr(shape)'s dimensions is [%d].", - rank_, - num_batchsize_dims_)); - prod_batchsize_dims_ = 1; - prod_x_ins_dims_ = 1; - prod_out_ins_dims_ = 1; - for (size_t i = 0; i < static_cast(rank_); ++i) { - size_t x_dim_i = x_dims[i]; - size_t out_dim_i = out_dims[i]; - x_dims_[i] = x_dim_i; - out_dims_[i] = out_dim_i; - if (i < static_cast(num_batchsize_dims_)) { - PADDLE_ENFORCE_EQ( - x_dim_i, - out_dim_i, - platform::errors::InvalidArgument( - "The first [%d] dimension value of Input(X) and Output(Out)" - "must be equal, but received the [%d] dimension value of" - "Input(X) and Output(Out) respectively are [%d] and [%d].", - num_batchsize_dims_, - i, - x_dim_i, - out_dim_i)); - prod_batchsize_dims_ *= x_dim_i; - } else { - prod_x_ins_dims_ *= x_dim_i; - prod_out_ins_dims_ *= out_dim_i; - } - } - } - - HOSTDEVICE void operator()(size_t ins_idx) { - typename Random::Engine engine(seed_); - engine.discard(ins_idx * (rank_ - num_batchsize_dims_)); - size_t offsets[9] = {}; - for (int i = num_batchsize_dims_; i < rank_; ++i) { - typename Random::template UniformIntDist dist( - 0, x_dims_[i] - out_dims_[i]); - offsets[i - num_batchsize_dims_] = dist(engine); - } - - const T* x = x_ + ins_idx * prod_x_ins_dims_; - T* out = out_ + ins_idx * prod_out_ins_dims_; - - StridedMemcpy(x, - x_dims_ + num_batchsize_dims_, - out, - out_dims_ + num_batchsize_dims_, - 0, - rank_ - num_batchsize_dims_, - prod_x_ins_dims_, - prod_out_ins_dims_, - offsets); - } -}; - -template -class RandomCropKernel : public framework::OpKernel { - public: - virtual void Compute(const framework::ExecutionContext& ctx) const { - int64_t seed = 0; - auto& seed_tensor = GET_DATA_SAFELY( - ctx.Input("Seed"), "Input", "Seed", "RandomCrop"); - if (seed_tensor.IsInitialized()) { - if (platform::is_cpu_place(seed_tensor.place())) { - seed = *seed_tensor.template data(); - } else { - LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " - "your program"; - phi::DenseTensor cpu_seed; - framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed); - seed = *cpu_seed.data(); - } - } else { - VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute " - "'startup_seed' instead."; - seed = ctx.Attr("startup_seed"); - } - auto shape = ctx.Attr>("shape"); - auto& x = GET_DATA_SAFELY( - ctx.Input("X"), "Input", "X", "RandomCrop"); - auto& out = GET_DATA_SAFELY( - ctx.Output("Out"), "Output", "Out", "RandomCrop"); - - int num_batchsize_dims = x.dims().size() - shape.size(); - RandomCropFunctor functor( - x.template data(), - out.template mutable_data(ctx.GetPlace()), - x.dims(), - out.dims(), - num_batchsize_dims, - seed); - platform::ForRange for_range( - ctx.template device_context(), - functor.prod_batchsize_dims_); - - for_range(functor); - - Random::Engine engine(seed); - engine.discard(functor.prod_batchsize_dims_ * - (functor.rank_ - functor.num_batchsize_dims_)); - *ctx.Output("SeedOut")->mutable_data( - phi::make_ddim({1}), platform::CPUPlace()) = engine(); - } -}; - -// TODO(fengjiayi): Backward of random crop op - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/space_to_depth_op.cc b/paddle/fluid/operators/space_to_depth_op.cc deleted file mode 100644 index da0c2e4a3cbb2..0000000000000 --- a/paddle/fluid/operators/space_to_depth_op.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/space_to_depth_op.h" - -#include -#include -#include - -#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" - -namespace paddle { -namespace operators { - -class SpaceToDepthOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - platform::errors::InvalidArgument( - "Input(X) of SpaceToDepthOp should not be null.")); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - platform::errors::InvalidArgument( - "Output(Out) of SpaceToDepthOp should not be null.")); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 4, - platform::errors::InvalidArgument("input should be a 4D tensor")); - auto blocksize = ctx->Attrs().Get("blocksize"); - - PADDLE_ENFORCE_GT(blocksize, - 1, - platform::errors::InvalidArgument( - "The blocksize should be Greater than 1")); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_GT(x_dims[1], - 0, - platform::errors::InvalidArgument( - "input channel should be Greater than 0")); - PADDLE_ENFORCE_GT(x_dims[2], - 0, - platform::errors::InvalidArgument( - "input Height should be Greater than 0")); - PADDLE_ENFORCE_GT(x_dims[3], - 0, - platform::errors::InvalidArgument( - "input Width should be Greater than 0")); - - PADDLE_ENFORCE_EQ( - x_dims[1] % (blocksize * blocksize), - 0, - platform::errors::InvalidArgument( - "input channel should be divisible of the square of " - "SpaceToDepthOp blocksize")); - PADDLE_ENFORCE_EQ(x_dims[2] % (blocksize), - 0, - platform::errors::InvalidArgument( - "input Height should be divisible of the square of " - "SpaceToDepthOp blocksize")); - PADDLE_ENFORCE_EQ(x_dims[3] % (blocksize), - 0, - platform::errors::InvalidArgument( - "input Width should be divisible of the square of " - "SpaceToDepthOp blocksize")); - } else { - if (x_dims[1] != -1) { - PADDLE_ENFORCE_GT(x_dims[1], - 0, - platform::errors::InvalidArgument( - "input channel should be Greater than 0")); - PADDLE_ENFORCE_EQ( - x_dims[1] % (blocksize * blocksize), - 0, - platform::errors::InvalidArgument( - "input channel should be divisible of the square of " - "SpaceToDepthOp blocksize")); - } - if (x_dims[2] != -1) { - PADDLE_ENFORCE_GT(x_dims[2], - 0, - platform::errors::InvalidArgument( - "input Height should be Greater than 0")); - PADDLE_ENFORCE_EQ( - x_dims[2] % (blocksize), - 0, - platform::errors::InvalidArgument( - "input Height should be divisible of the square of " - "SpaceToDepthOp blocksize")); - } - - if (x_dims[3] != -1) { - PADDLE_ENFORCE_GT(x_dims[3], - 0, - platform::errors::InvalidArgument( - "input Width should be Greater than 0")); - - PADDLE_ENFORCE_EQ( - x_dims[3] % (blocksize), - 0, - platform::errors::InvalidArgument( - "input Width should be divisible of the square of " - "SpaceToDepthOp blocksize")); - } - } - - VLOG(3) << "SpaceToDepthOp operator x.shape=" << x_dims - << "Attribute blocksize" << blocksize << std::endl; - - std::vector output_shape(4, 0); // [B,C,H,W] - output_shape[0] = x_dims[0]; - output_shape[1] = x_dims[1] * blocksize * blocksize; - output_shape[2] = x_dims[2] / blocksize; - output_shape[3] = x_dims[3] / blocksize; - - auto out_dims = phi::make_ddim(output_shape); - - ctx->SetOutputDim("Out", out_dims); - - if (x_dims[0] == out_dims[0]) { - // Only pass LoD when the first dimension of output and Input(X) - // are the same. - ctx->ShareLoD("X", /*->*/ "Out"); - } - } -}; - -class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor). The input should be a 4D tensor B * C * W * H of " - "SpaceToDepthOp " - "operator."); - AddOutput("Out", - "(Tensor), The output should be a 4D tensor B * C2 * W2 * H2 of " - "SpaceToDepthOp operator."); - AddAttr( - "blocksize", - "(int64_t, default 2) blocksize used to do change Space To Depth.") - .SetDefault(2) - .GreaterThan(1); - AddComment(R"DOC( - reorg operator used in Yolo v2. - The equation is: C2 = C1/blocksize * blocksize, W2 = W1 * blocksize + offset % blocksize, H2 = H1 * blocksize + offset / blocksize, - - Reshape Input(X) into the shape according to Attr(blocksize). The - data in Input(X) are unchanged. - - Examples: - - 1. Given a 4-D tensor Input(X) with a shape [128, 2048, 26, 26], and the blocksize is 2, the reorg operator will transform Input(X) - into a 4-D tensor with shape [128, 2048, 13, 13] and leaving Input(X)'s data unchanged. - - )DOC"); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SpaceToDepthGradOpNoBufferVarsInferer, "X"); - -template -class SpaceToDepthGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("space_to_depth_grad"); - - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("X", this->Input("X")); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class SpaceToDepthGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE( - ctx->HasInput("X"), - platform::errors::InvalidArgument("Input(X) shouldn't be null.")); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - platform::errors::InvalidArgument( - "Input(Out@GRAD) shouldn't be null.")); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(space_to_depth, - ops::SpaceToDepthOp, - ops::SpaceToDepthOpMaker, - ops::SpaceToDepthGradOpMaker, - ops::SpaceToDepthGradOpMaker); -REGISTER_OPERATOR(space_to_depth_grad, - ops::SpaceToDepthGradOp, - ops::SpaceToDepthGradOpNoBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(space_to_depth, - CPU, - ALL_LAYOUT, - ops::SpaceToDepthKernel, - int, - int64_t, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(space_to_depth_grad, - CPU, - ALL_LAYOUT, - ops::SpaceToDepthGradKernel, - int, - int64_t, - float, - double) {} diff --git a/paddle/fluid/operators/space_to_depth_op.cu b/paddle/fluid/operators/space_to_depth_op.cu deleted file mode 100644 index 7f62509ee7d2e..0000000000000 --- a/paddle/fluid/operators/space_to_depth_op.cu +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/space_to_depth_op.h" - -namespace plat = paddle::platform; -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(space_to_depth, - GPU, - ALL_LAYOUT, - ops::SpaceToDepthKernel, - int, - int64_t, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(space_to_depth_grad, - GPU, - ALL_LAYOUT, - ops::SpaceToDepthGradKernel, - int, - int64_t, - float, - double) {} diff --git a/paddle/fluid/operators/space_to_depth_op.h b/paddle/fluid/operators/space_to_depth_op.h deleted file mode 100644 index 18ff67c6132be..0000000000000 --- a/paddle/fluid/operators/space_to_depth_op.h +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifndef PADDLE_FLUID_OPERATORS_SPACE_TO_DEPTH_OP_H_ -#define PADDLE_FLUID_OPERATORS_SPACE_TO_DEPTH_OP_H_ -#endif // PADDLE_FLUID_OPERATORS_SPACE_TO_DEPTH_OP_H_ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/for_range.h" - -namespace paddle { -namespace operators { - -template -class space_to_depth_compute { - public: - HOSTDEVICE space_to_depth_compute(const T *x, - int64_t w, - int64_t h, - int64_t c, - int64_t batch, - int64_t blocksize, - int64_t forward, - T *out) - : x_(x), - w_(w), - h_(h), - c_(c), - batch_(batch), - blocksize_(blocksize), - forward_(forward), - out_(out) {} - - HOSTDEVICE void operator()(int64_t in_index) { - int64_t out_c = c_ / (blocksize_ * blocksize_); - // calculate each dim position with index of tensor - int64_t b = in_index / (c_ * h_ * w_); - int64_t k = (in_index % (c_ * h_ * w_)) / (h_ * w_); - int64_t j = ((in_index % (c_ * h_ * w_)) % (h_ * w_)) / w_; - int64_t i = ((in_index % (c_ * h_ * w_)) % (h_ * w_)) % w_; - - int64_t c2 = k % out_c; - int64_t offset = k / out_c; - int64_t w2 = i * blocksize_ + offset % blocksize_; - int64_t h2 = j * blocksize_ + offset / blocksize_; - int64_t out_index = - w2 + w_ * blocksize_ * (h2 + h_ * blocksize_ * (c2 + out_c * b)); - if (forward_) - out_[out_index] = x_[in_index]; - else - out_[in_index] = x_[out_index]; - } - - private: - const T *x_; - int64_t w_, h_, c_, batch_, blocksize_, forward_; - T *out_; -}; - -template -class SpaceToDepthKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *out = context.Output("Out"); - auto *x = context.Input("X"); - auto blocksize = context.Attr("blocksize"); - auto in_dims = x->dims(); - out->mutable_data(context.GetPlace(), x->type()); - - auto out_dims = out->dims(); - auto B = in_dims[0]; - auto C = in_dims[1]; - auto H = in_dims[2]; - auto W = in_dims[3]; - platform::ForRange for_range( - context.template device_context(), - static_cast(x->numel())); - - auto *x_data = x->data(); - auto *out_data = out->data(); - paddle::operators::space_to_depth_compute computer( - x_data, W, H, C, B, blocksize, 1, out_data); - for_range(computer); - - out->Resize(out_dims); - } -}; - -template -class SpaceToDepthGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *d_out = - context.Input(framework::GradVarName("Out")); - auto *d_x = context.Output(framework::GradVarName("X")); - auto blocksize = context.Attr("blocksize"); - auto in_dims = d_x->dims(); - d_x->mutable_data(context.GetPlace(), d_out->type()); - - auto B = in_dims[0]; - auto C = in_dims[1]; - auto H = in_dims[2]; - auto W = in_dims[3]; - - platform::ForRange for_range( - context.template device_context(), - static_cast(d_x->numel())); - - auto *dx_data = d_x->data(); - auto *dout_data = d_out->data(); - - paddle::operators::space_to_depth_compute computer( - dout_data, W, H, C, B, blocksize, 0, dx_data); - for_range(computer); - - d_x->Resize(in_dims); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/squared_l2_distance_op.cc b/paddle/fluid/operators/squared_l2_distance_op.cc deleted file mode 100644 index 0f2f727dd9135..0000000000000 --- a/paddle/fluid/operators/squared_l2_distance_op.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/squared_l2_distance_op.h" - -#include - -#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" - -namespace paddle { -namespace operators { - -class SquaredL2DistanceOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SquaredL2DistanceOp"); - OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "SquaredL2DistanceOp"); - OP_INOUT_CHECK(ctx->HasOutput("sub_result"), - "Output", - "sub_result", - "SquaredL2DistanceOp"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "SquaredL2DistanceOp"); - - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); - - PADDLE_ENFORCE_EQ(phi::arity(x_dims), - phi::arity(y_dims), - platform::errors::InvalidArgument( - "Input(X) and Input(X) of SquaredL2DistanceOp should " - "have same dimensions. " - "But received X's shape = [%s] and Y's shape = [%s], " - "the dimensions are %d and %d respectively", - x_dims, - y_dims, - phi::arity(x_dims), - phi::arity(y_dims))); - - int rank = phi::arity(x_dims); - PADDLE_ENFORCE_GE( - rank, - 2, - platform::errors::InvalidArgument( - "Input dimensions of SquaredL2DistanceOp should be at least 2." - "But received shape = [%s] and dimension is %d.", - x_dims, - rank)); - bool check = true; - if ((!ctx->IsRuntime()) && - (phi::product(x_dims) <= 0 || phi::product(y_dims) <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ( - product(x_dims) / x_dims[0], - product(y_dims) / y_dims[0], - platform::errors::InvalidArgument( - "Input(X) and Input(Y) of SquaredL2DistanceOp should " - "have same dimensions." - "But received X's shape = [%s] and Y's shape = [%s]" - ", the products are %d and %d respectively", - x_dims, - y_dims, - product(x_dims) / x_dims[0], - product(y_dims) / y_dims[0])); - } - check = true; - if ((!ctx->IsRuntime()) && (y_dims[0] <= 0 || x_dims[0] <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ( - y_dims[0] == 1 || y_dims[0] == x_dims[0], - true, - platform::errors::InvalidArgument( - "First dimension of Input(Y) of SquaredL2DistanceOp " - "must be equal to 1 or to first dimension of Input(X)." - "But received X's shape = [%s] and Y's shape = [%s]," - "the first dimensions are %d and %d respectively", - x_dims, - y_dims, - x_dims[0], - y_dims[0])); - } - ctx->SetOutputDim("sub_result", {x_dims[0], product(x_dims) / x_dims[0]}); - ctx->SetOutputDim("Out", {x_dims[0], 1}); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SquaredL2DistanceGradOpNoBufferVarsInferer, - "X", - "Y"); - -template -class SquaredL2DistanceGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("squared_l2_distance_grad"); - - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("sub_result", this->Output("sub_result")); - op->SetInput("X", this->Input("X")); - op->SetInput("Y", this->Input("Y")); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) Input of SquaredL2DistanceOp."); - AddInput("Y", "(Tensor) Target of SquaredL2DistanceOp."); - AddOutput("sub_result", - "(Tensor) Buffering subtraction result which " - "will be reused in backward.") - .AsIntermediate(); - AddOutput("Out", "(Tensor) Squared l2 distance between input and target."); - AddComment(R"DOC( -SquaredL2Distance operator - -This operator will cacluate the squared L2 distance for the input and -the target. Number of distance value will be equal to the first dimension -of input. First dimension of the target could be equal to the input or to 1. -If the first dimension of target is 1, the operator will broadcast target's -first dimension to input's first dimension. During backward propagation, -the user can decide whether to calculate the gradient of the input or -the target or both. - -Both the input X and Y can carry the LoD (Level of Details) information. -However, the output only shares the LoD information with input X. - )DOC"); - } -}; - -class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("sub_result"), - "Input", - "sub_result", - "SquaredL2DistanceGradOp"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - "Out@GRAD", - "SquaredL2DistanceGradOp"); - auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - out_dims[0], - x_dims[0], - platform::errors::InvalidArgument( - "First dimension of output gradient and Input(X) " - "of SquaredL2DistanceGradOp must be equal " - "But received X's shape = [%s] and grad's shape = [%s], " - "the first dimensions are %d and %d respectively", - x_dims, - out_dims, - x_dims[0], - out_dims[0])); - PADDLE_ENFORCE_EQ(out_dims[1], - 1, - platform::errors::InvalidArgument( - "Second dimension of output gradient of " - "SquaredL2DistanceGradOp must be 1. " - "But received grad's shape = [%s], " - "with second dimension %d", - out_dims, - out_dims[1])); - } - auto x_grad_name = framework::GradVarName("X"); - auto y_grad_name = framework::GradVarName("Y"); - if (ctx->HasOutput(x_grad_name)) ctx->SetOutputDim(x_grad_name, x_dims); - if (ctx->HasOutput(y_grad_name)) ctx->SetOutputDim(y_grad_name, y_dims); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "sub_result"), - ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - squared_l2_distance, - ops::SquaredL2DistanceOp, - ops::SquaredL2DistanceOpMaker, - ops::SquaredL2DistanceGradOpMaker, - ops::SquaredL2DistanceGradOpMaker); -REGISTER_OPERATOR(squared_l2_distance_grad, - ops::SquaredL2DistanceGradOp, - ops::SquaredL2DistanceGradOpNoBufferVarsInferer); - -PD_REGISTER_STRUCT_KERNEL( - squared_l2_distance, CPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) { -} -PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad, - CPU, - ALL_LAYOUT, - ops::SquaredL2DistanceGradKernel, - float) {} diff --git a/paddle/fluid/operators/squared_l2_distance_op.cu b/paddle/fluid/operators/squared_l2_distance_op.cu deleted file mode 100644 index 4411df4d9ab7f..0000000000000 --- a/paddle/fluid/operators/squared_l2_distance_op.cu +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include "paddle/fluid/operators/squared_l2_distance_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL( - squared_l2_distance, GPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) { -} -PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad, - GPU, - ALL_LAYOUT, - ops::SquaredL2DistanceGradKernel, - float) {} diff --git a/paddle/fluid/operators/squared_l2_distance_op.h b/paddle/fluid/operators/squared_l2_distance_op.h deleted file mode 100644 index 18039835c55c3..0000000000000 --- a/paddle/fluid/operators/squared_l2_distance_op.h +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class SquaredL2DistanceKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* in1 = context.Input("Y"); - auto* out0 = context.Output("sub_result"); - auto* out1 = context.Output("Out"); - - auto in0_dims = in0->dims(); - auto in1_dims = in1->dims(); - - int cols = in0->numel() / in0_dims[0]; - // reduce dimensions except the first - auto x = framework::EigenMatrix::From( - *in0, phi::make_ddim({in0_dims[0], cols})); - auto y = framework::EigenMatrix::From( - *in1, phi::make_ddim({in1_dims[0], cols})); - - out0->mutable_data(context.GetPlace()); - out1->mutable_data(context.GetPlace()); - auto sub_result = framework::EigenMatrix::From(*out0); - auto z = framework::EigenVector::Flatten(*out1); - - auto& place = - *context.template device_context().eigen_device(); - auto x_dims = x.dimensions(); - auto y_dims = y.dimensions(); - // buffer the substraction result - if (y_dims[0] == 1 && x_dims[0] > y_dims[0]) { - sub_result.device(place) = - x - - y.broadcast(Eigen::array({{static_cast(x_dims[0]), 1}})); - } else { - sub_result.device(place) = x - y; - } - auto sub_res_pow2 = sub_result * sub_result; - z.device(place) = sub_res_pow2.sum(Eigen::array({{1}})); - } -}; - -template -class SquaredL2DistanceGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("sub_result"); - auto* in1 = context.Input(framework::GradVarName("Out")); - auto* x_g = context.Output(framework::GradVarName("X")); - auto* y_g = context.Output(framework::GradVarName("Y")); - - PADDLE_ENFORCE_NOT_NULL( - x_g, - platform::errors::NotFound( - "variable(%s) cannot be found " - "in scope for operator 'squared_l2_distance_grad'.", - framework::GradVarName("X"))); - PADDLE_ENFORCE_NOT_NULL( - y_g, - platform::errors::NotFound( - "variable(%s) cannot be found " - "in scope for operator 'squared_l2_distance_grad'.", - framework::GradVarName("Y"))); - - auto sub_result = framework::EigenMatrix::From(*in0); - auto out_grad = framework::EigenMatrix::From(*in1); - - auto x_dims = x_g->dims(); - auto y_dims = y_g->dims(); - - int cols = x_g->numel() / x_dims[0]; - // calculate gradient - auto grad_mat = 2 * - (out_grad.broadcast(Eigen::array({{1, cols}}))) * - sub_result; - - // propagate back to input - auto& eigen_place = - *context.template device_context().eigen_device(); - - x_g->mutable_data(context.GetPlace()); - // eigen matrix - auto x_grad = framework::EigenMatrix::From( - *x_g, phi::make_ddim({x_dims[0], cols})); - // dimensions are same with subResult - x_grad.device(eigen_place) = grad_mat; - - y_g->mutable_data(context.GetPlace()); - - PADDLE_ENFORCE_GE(sub_result.dimensions()[0], - y_dims[0], - platform::errors::InvalidArgument( - "First dimension of gradient must be greater or " - "equal than first dimension of target. But received " - "gradient dimension = %d and target dimension is %d.", - sub_result.dimensions()[0], - y_dims[0])); - - if (sub_result.dimensions()[0] == y_dims[0]) { - auto y_grad = framework::EigenMatrix::From( - *y_g, phi::make_ddim({y_dims[0], cols})); - y_grad.device(eigen_place) = -1 * grad_mat; - } else { - auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({{0}}))); - auto y_grad = framework::EigenVector::Flatten(*y_g); - y_grad.device(eigen_place) = col_sum_res; - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/tree_conv_op.cc b/paddle/fluid/operators/tree_conv_op.cc deleted file mode 100644 index 7265d966b9e2a..0000000000000 --- a/paddle/fluid/operators/tree_conv_op.cc +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/tree_conv_op.h" - -#include -#include - -namespace paddle { -namespace operators { -class TreeConvOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("NodesVector", - "(Tensor) The feature vector of every node on the tree. " - "The shape of the feature vector must be " - "[max_tree_node_size, feature_size]."); - AddInput("EdgeSet", - "(Tensor) The Edges of Tree. The edge must be directional. " - "The shape of the edge set must be [max_tree_node_size, 2]."); - AddInput("Filter", - "(Tensor) The feature detector. " - "The shape of the filter is " - "[feature_size, 3, output_size, num_filters]."); - AddOutput("Out", - "(Tensor) The feature vector of subtrees. " - "The shape of the output tensor is [max_tree_node_size, " - "output_size, num_filters]. " - "The output tensor could be a new feature " - "vector for next tree convolution layers."); - AddAttr("max_depth", - "(int, default: 2) The depth of feature detector.") - .SetDefault(2) - .GreaterThan(1); - AddComment(R"DOC( -**Tree-Based Convolution Operator** - -Tree-Based Convolution is a kind of convolution based on tree structure. -Tree-Based Convolution is a part of Tree-Based Convolution Neural Network(TBCNN), -which is used to classify tree structures, such as Abstract Syntax Tree. -Tree-Based Convolution proposed a kind of data structure called continuous binary tree, -which regards multiway tree as binary tree. -The paper of Tree-Based Convolution Operator is here: -https://arxiv.org/abs/1409.5718v1 -)DOC"); - } -}; -class TreeConvOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("NodesVector"), "Input", "NodesVector", "TreeConv"); - OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "TreeConv"); - OP_INOUT_CHECK(ctx->HasInput("EdgeSet"), "Input", "EdgeSet", "TreeConv"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "TreeConv"); - - auto edge_dims = ctx->GetInputDim("EdgeSet"); - auto vector_dims = ctx->GetInputDim("NodesVector"); - auto filter_dims = ctx->GetInputDim("Filter"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(edge_dims[2], - 2, - platform::errors::InvalidArgument( - "Input(EdgeSet) dim[2] should be 2. " - "But received Input(EdgeSet) dim[2] is %d.", - edge_dims[2])); - } else { - if (edge_dims[2] != -1) { - PADDLE_ENFORCE_EQ(edge_dims[2], - 2, - platform::errors::InvalidArgument( - "Input(EdgeSet) dim[2] should be 2. " - "But received Input(EdgeSet) dim[2] is %d.", - edge_dims[2])); - } - } - PADDLE_ENFORCE_EQ(edge_dims.size(), - 3, - platform::errors::InvalidArgument( - "The dimension of EdgeSet Tensor should be 3. " - "But received the dimension of EdgeSet Tensor is %d.", - edge_dims.size())); - PADDLE_ENFORCE_EQ( - vector_dims.size(), - 3, - platform::errors::InvalidArgument( - "The dimension of NodesVector Tensor should be 3. " - "But received the dimension of NodesVector Tensor is %d.", - vector_dims.size())); - PADDLE_ENFORCE_EQ(filter_dims.size(), - 4, - platform::errors::InvalidArgument( - "The dimension of Filter Tensor should be 4. " - "But received the dimension of Filter Tensor is %d.", - filter_dims.size())); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(filter_dims[1], - 3, - platform::errors::InvalidArgument( - "Input(Filter) dim[1] should be 3. " - "But received Input(Filter) dim[1] is %d.", - filter_dims[1])); - PADDLE_ENFORCE_EQ( - filter_dims[0], - vector_dims[2], - platform::errors::InvalidArgument( - "Input(Filter) dim[0] must equal to Input(NodesVector) dim[2]. " - "But received Input(Filter) dim[0] = %d, Input(NodesVector) " - "dim[2] = %d.", - filter_dims[0], - vector_dims[2])); - } else { - if (filter_dims[1] != -1) { - PADDLE_ENFORCE_EQ(filter_dims[1], - 3, - platform::errors::InvalidArgument( - "Input(Filter) dim[1] should be 3. " - "But received Input(Filter) dim[1] is %d.", - filter_dims[1])); - } - - if (filter_dims[0] != -1 && vector_dims[2] != -1) { - PADDLE_ENFORCE_EQ( - filter_dims[0], - vector_dims[2], - platform::errors::InvalidArgument( - "Input(Filter) dim[0] must equal to Input(NodesVector) dim[2]. " - "But received Input(Filter) dim[0] = %d, Input(NodesVector) " - "dim[2] = %d.", - filter_dims[0], - vector_dims[2])); - } - } - auto output_dims = phi::make_ddim( - {vector_dims[0], vector_dims[1], filter_dims[2], filter_dims[3]}); - ctx->SetOutputDim("Out", output_dims); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "NodesVector"), - ctx.GetPlace()); - } -}; - -template -class TreeConvGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("tree_conv_grad"); - - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("Filter", this->Input("Filter")); - op->SetInput("EdgeSet", this->Input("EdgeSet")); - op->SetInput("NodesVector", this->Input("NodesVector")); - - op->SetOutput(framework::GradVarName("NodesVector"), - this->InputGrad("NodesVector")); - op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class TreeConvGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "grad_TreeConv"); - OP_INOUT_CHECK( - ctx->HasInput("EdgeSet"), "Input", "EdgeSet", "grad_TreeConv"); - OP_INOUT_CHECK( - ctx->HasInput("NodesVector"), "Input", "NodesVector", "grad_TreeConv"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "grad_TreeConv"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("NodesVector")), - "Output", - framework::GradVarName("NodesVector"), - "grad_TreeConv"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Filter")), - "Output", - framework::GradVarName("Filter"), - "grad_TreeConv"); - - auto vectors_dims = ctx->GetInputDim("NodesVector"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } - if (ctx->HasOutput(framework::GradVarName("NodesVector"))) { - ctx->SetOutputDim(framework::GradVarName("NodesVector"), vectors_dims); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "NodesVector"), - ctx.GetPlace()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(tree_conv, - ops::TreeConvOp, - ops::TreeConvOpMaker, - ops::TreeConvGradOpMaker, - ops::TreeConvGradOpMaker); - -REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp); - -PD_REGISTER_STRUCT_KERNEL( - tree_conv, CPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL( - tree_conv_grad, CPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {} diff --git a/paddle/fluid/operators/tree_conv_op.cu b/paddle/fluid/operators/tree_conv_op.cu deleted file mode 100644 index 1bfcb94013c2b..0000000000000 --- a/paddle/fluid/operators/tree_conv_op.cu +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/tree_conv_op.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL( - tree_conv, GPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL( - tree_conv_grad, GPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {} diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h deleted file mode 100644 index 18fd5bea29d30..0000000000000 --- a/paddle/fluid/operators/tree_conv_op.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/tree2col.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" - -namespace paddle { -namespace operators { -using DDim = framework::DDim; -template -class TreeConvKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - math::Tree2ColFunctor tree2col; - phi::funcs::SetConstant constant; - - auto *Edges = ctx.Input("EdgeSet"); - auto *Embeddings = ctx.Input("NodesVector"); - auto *Filter = ctx.Input("Filter"); - auto *output_emb = ctx.Output("Out"); - int max_depth = ctx.Attr("max_depth"); - - auto &dev_ctx = ctx.template device_context(); - auto blas = phi::funcs::GetBlas(dev_ctx); - - phi::DenseTensor W; - W.ShareDataWith(*Filter); - W.Resize(phi::flatten_to_2d(Filter->dims(), 2)); - - int batch_size = static_cast(Edges->dims()[0]); - int n = static_cast(Embeddings->dims()[1]); - int out_size = static_cast(Filter->dims()[2]); - int num_filters = static_cast(Filter->dims()[3]); - output_emb->mutable_data({batch_size, n, out_size, num_filters}, - ctx.GetPlace()); - - auto edge_set_slicedim = phi::slice_ddim( - Edges->dims(), 1, static_cast(Edges->dims().size())); - - auto embedding_slicedim = phi::slice_ddim( - Embeddings->dims(), 1, static_cast(Embeddings->dims().size())); - - auto output_slicedim = phi::slice_ddim( - output_emb->dims(), 1, static_cast(output_emb->dims().size())); - - output_slicedim = phi::flatten_to_2d(output_slicedim, 1); - - for (int idx = 0; idx < batch_size; idx++) { - auto edge_set = Edges->Slice(idx, idx + 1).Resize(edge_set_slicedim); - auto embeddings = - Embeddings->Slice(idx, idx + 1).Resize(embedding_slicedim); - auto out_vec = output_emb->Slice(idx, idx + 1).Resize(output_slicedim); - phi::DenseTensor patch; - tree2col(dev_ctx, edge_set, embeddings, &patch, max_depth); - constant(dev_ctx, &out_vec, 0); - blas.MatMul(patch, W, &out_vec); - } - } -}; -template -class TreeConvGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *out_g = ctx.Input(framework::GradVarName("Out")); - auto *in_g = - ctx.Output(framework::GradVarName("NodesVector")); - auto *filter_g = - ctx.Output(framework::GradVarName("Filter")); - int max_depth = ctx.Attr("max_depth"); - auto *Embeddings = ctx.Input("NodesVector"); - auto *edges = ctx.Input("EdgeSet"); - auto *Filter = ctx.Input("Filter"); - math::Tree2ColFunctor tree2col; - math::Col2TreeFunctor col2tree; - phi::funcs::SetConstant constant; - auto &dev_ctx = ctx.template device_context(); - auto blas = phi::funcs::GetBlas(dev_ctx); - - phi::DenseTensor W; - W.ShareDataWith(*Filter); - W.Resize(phi::flatten_to_2d(Filter->dims(), 1)); - - int batch_size = static_cast(Embeddings->dims()[0]); - - auto edge_set_slicedim = phi::slice_ddim( - edges->dims(), 1, static_cast(edges->dims().size())); - - auto embedding_slicedim = phi::slice_ddim( - Embeddings->dims(), 1, static_cast(Embeddings->dims().size())); - - auto out_grad_dims = phi::slice_ddim( - out_g->dims(), 1, static_cast(out_g->dims().size())); - out_grad_dims = phi::flatten_to_2d(out_grad_dims, 1); - if (filter_g) { - filter_g->mutable_data(Filter->dims(), ctx.GetPlace()); - phi::DenseTensor f_g; - f_g.ShareDataWith(*filter_g); - f_g.Resize(phi::flatten_to_2d(Filter->dims(), 2)); - constant(dev_ctx, filter_g, 0); - for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto edge_set = - edges->Slice(batch_id, batch_id + 1).Resize(edge_set_slicedim); - auto embeddings = Embeddings->Slice(batch_id, batch_id + 1) - .Resize(embedding_slicedim); - auto out_grad = - out_g->Slice(batch_id, batch_id + 1).Resize(out_grad_dims); - phi::DenseTensor patch; - tree2col(dev_ctx, edge_set, embeddings, &patch, max_depth); - blas.MatMul(patch, true, out_grad, false, T(1.0), &f_g, T(1.0)); - } - } - if (in_g) { - auto input_grad_dims = phi::slice_ddim( - in_g->dims(), 1, static_cast(in_g->dims().size())); - in_g->mutable_data(Embeddings->dims(), ctx.GetPlace()); - constant(dev_ctx, in_g, 0); - for (int batch_id = 0; batch_id < batch_size; batch_id++) { - auto edge_set = - edges->Slice(batch_id, batch_id + 1).Resize(edge_set_slicedim); - auto out_grad = - out_g->Slice(batch_id, batch_id + 1).Resize(out_grad_dims); - auto in_grad = - in_g->Slice(batch_id, batch_id + 1).Resize(input_grad_dims); - phi::DenseTensor in_grad_temp; - col2tree(dev_ctx, edge_set, out_grad, &in_grad_temp, max_depth); - blas.MatMul(in_grad_temp, false, W, true, &in_grad); - } - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 3cfb53ea14b65..1b9b91e3efc41 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -97,8 +97,7 @@ register_unity_group( fill_constant_batch_size_like_op.cc fill_constant_op.cc fill_op.cc - fill_zeros_like_op.cc - filter_by_instag_op.cc) + fill_zeros_like_op.cc) register_unity_group( cc flatten_op.cc @@ -203,7 +202,6 @@ register_unity_group( positive_negative_pair_op.cc prelu_op.cc print_op.cc - prroi_pool_op.cc psroi_pool_op.cc pull_box_extended_sparse_op.cc pull_box_sparse_op.cc @@ -215,7 +213,6 @@ register_unity_group( quantize_op.cc mkldnn/quantize_mkldnn_op.cc queue_generator_op.cc - random_crop_op.cc range_op.cc rank_attention_op.cc rank_loss_op.cc @@ -258,7 +255,6 @@ register_unity_group( slice_op.cc) register_unity_group( cc - space_to_depth_op.cc spectral_norm_op.cc split_lod_tensor_op.cc split_op.cc @@ -283,7 +279,6 @@ register_unity_group( trace_op.cc transpose_op.cc mkldnn/transpose_mkldnn_op.cc - tree_conv_op.cc unbind_op.cc unfold_op.cc) register_unity_group( @@ -328,7 +323,7 @@ register_unity_group( unbind_op.cu.cc unpool_op.cu.cc unsqueeze_op.cu.cc) -register_unity_group(cc arg_max_op.cc arg_min_op.cc squared_l2_distance_op.cc) +register_unity_group(cc arg_max_op.cc arg_min_op.cc) register_unity_group( cc linear_chain_crf_op.cc @@ -441,8 +436,7 @@ register_unity_group( masked_select_op.cu lstmp_op.cu shuffle_channel_op.cu - softmax_cudnn_op.cu - squared_l2_distance_op.cu) + softmax_cudnn_op.cu) register_unity_group( cu conv_shift_op.cu @@ -474,18 +468,11 @@ register_unity_group( partial_sum_op.cu pixel_shuffle_op.cu prelu_op.cu - prroi_pool_op.cu run_program_op.cu pull_box_extended_sparse_op.cu pull_box_sparse_op.cu) -register_unity_group( - cu - random_crop_op.cu - range_op.cu - reverse_op.cu - partial_concat_op.cu - kldiv_loss_op.cu - instance_norm_op.cu) +register_unity_group(cu range_op.cu reverse_op.cu partial_concat_op.cu + kldiv_loss_op.cu instance_norm_op.cu) register_unity_group( cu roi_align_op.cu @@ -509,7 +496,6 @@ register_unity_group( slice_op.cu) register_unity_group( cu - space_to_depth_op.cu spectral_norm_op.cu split_op.cu split_selected_rows_op.cu @@ -517,14 +503,8 @@ register_unity_group( sum_op.cu temporal_shift_op.cu arg_max_op.cu) -register_unity_group( - cu - row_conv_op.cu - tree_conv_op.cu - tril_triu_op.cu - unfold_op.cu - arg_min_op.cu - crop_tensor_op.cu) +register_unity_group(cu row_conv_op.cu tril_triu_op.cu unfold_op.cu + arg_min_op.cu crop_tensor_op.cu) register_unity_group( cu smooth_l1_loss_op.cu diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index 7aa25386076e5..6aa04cf4a6896 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -523,9 +523,7 @@ set(TEST_OPS_WITH_GC test_mean_op test_pad2d_op test_scatter_op - test_slice_op - test_space_to_depth_op - test_squared_l2_distance_op) + test_slice_op) foreach(TEST_OP ${TEST_OPS_WITH_GC}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) @@ -1047,7 +1045,6 @@ set_tests_properties(test_sigmoid_cross_entropy_with_logits_op set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150) set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cond PROPERTIES TIMEOUT 120) -set_tests_properties(test_space_to_depth_op PROPERTIES TIMEOUT 200) set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250) set_tests_properties(test_parallel_executor_seresnext_base_gpu PROPERTIES TIMEOUT 120) diff --git a/test/legacy_test/test_filter_by_instag_op.py b/test/legacy_test/test_filter_by_instag_op.py deleted file mode 100644 index 211889feaa06b..0000000000000 --- a/test/legacy_test/test_filter_by_instag_op.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""This is unit test of Test filter_instag Op.""" - -import unittest - -import numpy as np -from op_test import OpTest - -"""This is Test Case 1""" - - -class TestFilterByInstagOp(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - x1 = np.zeros((36, 4), dtype=np.float64) - for i in range(36): - for j in range(4): - x1[i, j] = i - x1_lod = [[1, 2, 3, 4, 5, 6, 7, 8]] - - x2 = np.array([[1], [2], [1], [2], [1], [2], [1], [2]]).astype('int64') - x2_lod = [[1, 1, 1, 1, 1, 1, 1, 1]] - - x3 = np.array([2]).astype('int64') - - out = np.zeros((20, 4), dtype=np.float64) - out_lod = [[2, 4, 6, 8]] - start_num_lst = [1, 6, 15, 28] - - ln = 0 - for i in range(4): - start = start_num_lst[i] - len = out_lod[0][i] - for j in range(len): - cur = start + j - for k in range(4): - out[ln, k] = cur - ln += 1 - - mmap = np.array([[0, 1, 2], [2, 6, 4], [6, 15, 6], [12, 28, 8]]).astype( - 'int64' - ) - mmap_lod = [[1, 1, 1, 1]] - - loss_weight = np.array([[1], [1], [1], [1]]).astype('double') - - self.inputs = { - 'Ins': (x1, x1_lod), - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - - self.attrs = {'is_lod': True, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['Ins'], 'Out', no_grad_set={'Ins_tag', 'Filter_tag'}) - - -"""This is Test Case 2""" - - -class TestFilterByInstagOp2(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - - x1 = np.random.random((4, 36)).astype('double') - x1_lod = [[1, 1, 1, 1]] - - x2 = np.array([[2], [1], [2], [1]]).astype('int64') - x2_lod = [[1, 1, 1, 1]] - - x3 = np.array([1]).astype('int64') - - out = np.zeros([2, 36]).astype('double') - out[0] = x1[1] - out[1] = x1[3] - out_lod = [[1, 1]] - - mmap = np.array([[0, 1, 1], [1, 3, 1]]).astype('int64') - mmap_lod = [[1, 1]] - - loss_weight = np.array([[1], [1]]).astype('double') - self.inputs = { - 'Ins': (x1, x1_lod), - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - self.attrs = {'is_lod': True, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['Ins'], 'Out', no_grad_set={'Ins_tag', 'Filter_tag'}) - - -"""This is Test Case 3""" - - -class TestFilterByInstagOp3(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - - x1 = np.random.random((4, 36)).astype('double') - x1_lod = [[1, 1, 1, 1]] - - x2 = np.array([[2], [1], [2], [1]]).astype('int64') - x2_lod = [[1, 1, 1, 1]] - - x3 = np.array([3]).astype('int64') - - out = np.zeros((1, 36)).astype('double') - out_lod = [[1]] - - mmap = np.array([[0, 1, 1]]).astype('int64') - mmap_lod = [[1]] - - loss_weight = np.array([[0]]).astype('double') - self.inputs = { - 'Ins': (x1, x1_lod), - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - self.attrs = {'is_lod': True, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['Ins'], 'Out', no_grad_set={'Ins_tag', 'Filter_tag'}) - - -"""This is Test Case 4""" - - -class TestFilterByInstagOp4(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - - x1 = np.random.random((4, 36)).astype('double') - - x2 = np.array([[2], [1], [2], [1]]).astype('int64') - x2_lod = [[1, 1, 1, 1]] - - x3 = np.array([3]).astype('int64') - - out = np.zeros((1, 36)).astype('double') - out_lod = [[1]] - - mmap = np.array([[0, 1, 1]]).astype('int64') - mmap_lod = [[1]] - - loss_weight = np.array([[0]]).astype('double') - self.inputs = { - 'Ins': x1, - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - self.attrs = {'is_lod': False, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['Ins'], 'Out', no_grad_set={'Ins_tag', 'Filter_tag'}) - - -class TestFilterByInstagOp6(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - - x1 = np.random.random((4, 36)).astype('int64') - - x2 = np.array([[2], [1], [2], [1]]).astype('int64') - x2_lod = [[1, 1, 1, 1]] - - x3 = np.array([3]).astype('int64') - - out = np.zeros((1, 36)).astype('double') - out_lod = [[1]] - - mmap = np.array([[0, 1, 1]]).astype('int64') - mmap_lod = [[1]] - - loss_weight = np.array([[0]]).astype('double') - self.inputs = { - 'Ins': x1, - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - self.attrs = {'is_lod': False, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - pass - - -class TestFilterByInstagOp7(OpTest): - def setUp(self): - self.op_type = 'filter_by_instag' - - x1 = np.random.random((4, 36)).astype('int32') - - x2 = np.array([[2], [1], [2], [1]]).astype('int64') - x2_lod = [[1, 1, 1, 1]] - - x3 = np.array([3]).astype('int64') - - out = np.zeros((1, 36)).astype('double') - out_lod = [[1]] - - mmap = np.array([[0, 1, 1]]).astype('int64') - mmap_lod = [[1]] - - loss_weight = np.array([[0]]).astype('double') - self.inputs = { - 'Ins': x1, - 'Ins_tag': (x2, x2_lod), - 'Filter_tag': x3, - } - self.outputs = { - 'Out': (out, out_lod), - 'LossWeight': (loss_weight, mmap_lod), - 'IndexMap': (mmap, mmap_lod), - } - self.attrs = {'is_lod': False, 'out_val_if_empty': 0} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_random_crop_op.py b/test/legacy_test/test_random_crop_op.py deleted file mode 100644 index 08355378207c1..0000000000000 --- a/test/legacy_test/test_random_crop_op.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -class TestRandomCropOp(OpTest): - def setUp(self): - to_crop = np.array( - [[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]] * 5 - ).astype(np.int32) - self.possible_res = [ - np.array([[1, 2, 3], [5, 6, 7]]).astype(np.int32), - np.array([[2, 3, 4], [6, 7, 8]]).astype(np.int32), - np.array([[5, 6, 7], [9, 10, 11]]).astype(np.int32), - np.array([[6, 7, 8], [10, 11, 12]]).astype(np.int32), - ] - self.op_type = "random_crop" - self.inputs = {'X': to_crop, 'Seed': np.array([10]).astype('int64')} - self.outputs = {'Out': np.array([]), 'SeedOut': np.array([])} - self.attrs = {'shape': [2, 3]} - - def test_check_output(self): - self.check_output_customized(self.verify_output) - - def verify_output(self, outs): - out = np.array(outs[1]) - for ins in out[:]: - is_equal = [(ins == res).all() for res in self.possible_res] - self.assertIn(True, is_equal) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_space_to_depth_op.py b/test/legacy_test/test_space_to_depth_op.py deleted file mode 100644 index c7cd6cae179db..0000000000000 --- a/test/legacy_test/test_space_to_depth_op.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - -from paddle import base - - -class TestSpaceToDepthOp(OpTest): - @staticmethod - def helper(in_, width, height, channel, batch, blocksize, forward, out_): - channel_out = channel // (blocksize * blocksize) - for b in range(batch): - for k in range(channel): - for j in range(height): - for i in range(width): - in_index = i + width * (j + height * (k + channel * b)) - channel2 = k % channel_out - offset = k // channel_out - width2 = i * blocksize + offset % blocksize - height2 = j * blocksize + offset // blocksize - out_index = width2 + width * blocksize * ( - height2 - + height * blocksize * (channel2 + channel_out * b) - ) - if forward: - out_[out_index] = in_[in_index] - else: - out_[in_index] = in_[out_index] - - def setUp(self): - self.init_data() - - self.op_type = "space_to_depth" - self.inputs = {"X": self.x} - self.helper( - self.x_1d, - self.x.shape[3], - self.x.shape[2], - self.x.shape[1], - self.x.shape[0], - self.blocksize, - self.forward, - self.out_1d, - ) - self.out = np.reshape(self.out_1d, self.infered_shape) - self.attrs = {"blocksize": self.blocksize} - self.outputs = {"Out": self.out} - - def init_data(self): - self.ori_shape = (32, 12, 6, 6) - self.infered_shape = (32, 48, 3, 3) - self.one_d_len = 32 * 48 * 3 * 3 - - self.blocksize = 2 - self.x = np.random.random(self.ori_shape).astype('float64') - self.x_1d = np.reshape(self.x, self.one_d_len) - self.out = np.zeros(self.infered_shape).astype('float64') - self.out_1d = np.reshape(self.out, self.one_d_len) - self.forward = 1 - - def test_check_output(self): - place = ( - base.core.CUDAPlace(0) - if base.core.is_compiled_with_cuda() - else base.core.CPUPlace() - ) - self.check_output_with_place( - place=place, atol=1e-5, no_check_set=None, equal_nan=False - ) - - def test_check_grad(self): - place = ( - base.core.CUDAPlace(0) - if base.core.is_compiled_with_cuda() - else base.core.CPUPlace() - ) - self.check_grad_with_place(place, ['X'], 'Out') - - -class TestSpaceToDepthOpBasic(TestSpaceToDepthOp): - def init_data(self): - self.ori_shape = (32, 8, 6, 6) - self.infered_shape = (32, 32, 3, 3) - self.one_d_len = 32 * 32 * 3 * 3 - - self.blocksize = 2 - self.x = np.random.random(self.ori_shape).astype('float64') - self.x_1d = np.reshape(self.x, self.one_d_len) - self.out = np.zeros(self.infered_shape).astype('float64') - self.out_1d = np.reshape(self.out, self.one_d_len) - self.forward = 1 - - -class TestSpaceToDepthOpDoubleBasic(TestSpaceToDepthOp): - def init_data(self): - self.ori_shape = (32, 8, 6, 6) - self.infered_shape = (32, 32, 3, 3) - self.one_d_len = 32 * 32 * 3 * 3 - - self.blocksize = 2 - self.x = np.random.random(self.ori_shape).astype('float64') - self.x_1d = np.reshape(self.x, self.one_d_len) - self.out = np.zeros(self.infered_shape).astype('float64') - self.out_1d = np.reshape(self.out, self.one_d_len) - self.forward = 1 - - -class TestSpaceToDepthOpWithStride3(TestSpaceToDepthOp): - def init_data(self): - self.ori_shape = (32, 9, 6, 6) - self.infered_shape = (32, 81, 2, 2) - self.one_d_len = 32 * 81 * 2 * 2 - - self.blocksize = 3 - self.x = np.random.random(self.ori_shape).astype('float64') - self.x_1d = np.reshape(self.x, self.one_d_len) - self.out = np.zeros(self.infered_shape).astype('float64') - self.out_1d = np.reshape(self.out, self.one_d_len) - self.forward = 1 - - -class TestSpaceToDepthOpWithNotSquare(TestSpaceToDepthOp): - def init_data(self): - self.ori_shape = (32, 9, 9, 6) - self.infered_shape = (32, 81, 3, 2) - self.one_d_len = 32 * 81 * 3 * 2 - - self.blocksize = 3 - self.x = np.random.random(self.ori_shape).astype('float64') - self.x_1d = np.reshape(self.x, self.one_d_len) - self.out = np.zeros(self.infered_shape).astype('float64') - self.out_1d = np.reshape(self.out, self.one_d_len) - self.forward = 1 - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_squared_l2_distance_op.py b/test/legacy_test/test_squared_l2_distance_op.py deleted file mode 100644 index 579681ab0c098..0000000000000 --- a/test/legacy_test/test_squared_l2_distance_op.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -class TestSquaredL2DistanceOp_f0(OpTest): - def setUp(self): - self.op_type = "squared_l2_distance" - self.inputs = { - 'X': np.random.uniform(0.1, 0.6, (5, 20)).astype("float32"), - 'Y': np.random.uniform(0.1, 0.6, (5, 20)).astype("float32"), - } - sub_res = self.inputs['X'] - self.inputs['Y'] - output = sub_res * sub_res - self.outputs = { - 'sub_result': sub_res, - 'Out': np.expand_dims(output.sum(1), 1), - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X', 'Y'], 'Out') - - -class TestSquaredL2DistanceOp_f1(OpTest): - def setUp(self): - self.op_type = "squared_l2_distance" - self.inputs = { - 'X': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32"), - 'Y': np.random.uniform(0.1, 0.6, (1, 3)).astype("float32"), - } - sub_res = self.inputs['X'] - self.inputs['Y'] - output = sub_res * sub_res - self.outputs = { - 'sub_result': sub_res, - 'Out': np.expand_dims(output.sum(1), 1), - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X', 'Y'], 'Out') - - -class TestSquaredL2DistanceOp_f2(OpTest): - def setUp(self): - self.op_type = "squared_l2_distance" - self.inputs = { - 'X': np.random.uniform(0.1, 0.6, (2, 3, 4)).astype("float32"), - 'Y': np.random.uniform(0.1, 0.6, (1, 3, 4)).astype("float32"), - } - sub_res = self.inputs['X'] - self.inputs['Y'] - sub_res = sub_res.reshape((2, 3 * 4)) - output = sub_res * sub_res - self.outputs = { - 'sub_result': sub_res, - 'Out': np.expand_dims(output.sum(1), 1), - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X', 'Y'], 'Out') - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/test_tree_conv_op.py b/test/legacy_test/test_tree_conv_op.py deleted file mode 100644 index e05ee1a4d4cdf..0000000000000 --- a/test/legacy_test/test_tree_conv_op.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -def collect_node_patch(og, max_depth): - """ - The naive method to construct patches - :param og: original graph - :param max_depth: the depth of convolution filters - :return: convolution patches - """ - - def gen(node, max_depth): - collected = [(node, 1, 1, 0, max_depth)] - - def recurse_helper(node, depth): - if depth > max_depth: - return - l = len(og[node]) - for idx, c in enumerate(og[node], 1): - if depth + 1 < max_depth: - collected.append((c, idx, l, depth + 1, max_depth)) - recurse_helper(c, depth + 1) - - recurse_helper(node, 0) - return collected - - res = [] - for u in range(1, len(og)): - lis = gen(u, max_depth) - if len(lis) > 0: - res.append(lis) - return res - - -class TestTreeConvOp(OpTest): - def setUp(self): - self.n = 17 - self.fea_size = 3 - self.output_size = 1 - self.max_depth = 2 - self.batch_size = 2 - self.num_filters = 1 - adj_array = [ - 1, - 2, - 1, - 3, - 1, - 4, - 1, - 5, - 2, - 6, - 2, - 7, - 2, - 8, - 4, - 9, - 4, - 10, - 5, - 11, - 6, - 12, - 6, - 13, - 9, - 14, - 9, - 15, - 9, - 16, - 9, - 17, - ] - adj = np.array(adj_array).reshape((1, self.n - 1, 2)).astype('int32') - adj = np.tile(adj, (self.batch_size, 1, 1)) - self.op_type = 'tree_conv' - vectors = np.random.random( - (self.batch_size, self.n, self.fea_size) - ).astype('float64') - self.inputs = { - 'EdgeSet': adj, - 'NodesVector': vectors, - 'Filter': np.random.random( - (self.fea_size, 3, self.output_size, self.num_filters) - ).astype('float64'), - } - self.attrs = {'max_depth': self.max_depth} - vectors = [] - for i in range(self.batch_size): - vector = self.get_output_naive(i) - vectors.append(vector) - self.outputs = { - 'Out': np.array(vectors).astype('float64'), - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad( - ['NodesVector', 'Filter'], 'Out', max_relative_error=0.5 - ) - - def get_output_naive(self, batch_id): - og = [[] for i in range(1, self.n + 2)] - st = np.array(self.inputs['EdgeSet'][batch_id]).tolist() - for e in st: - og[e[0]].append(e[1]) - patches = collect_node_patch(og, self.max_depth) - W = np.array(self.inputs['Filter']).astype('float64') - W = np.transpose(W, axes=[1, 0, 2, 3]) - vec = [] - for i, patch in enumerate(patches, 1): - result = np.zeros((1, W.shape[2], W.shape[3])) - for v in patch: - eta_t = float(v[4] - v[3]) / float(v[4]) - eta_l = (1.0 - eta_t) * ( - 0.5 if v[2] == 1 else float(v[1] - 1.0) / float(v[2] - 1.0) - ) - eta_r = (1.0 - eta_t) * (1.0 - eta_l) - x = self.inputs['NodesVector'][batch_id][v[0] - 1] - eta = ( - np.array([eta_l, eta_r, eta_t]) - .reshape((3, 1)) - .astype('float64') - ) - Wconvi = np.tensordot(eta, W, axes=([0], [0])) - x = np.array(x).reshape((1, 1, self.fea_size)) - res = np.tensordot(x, Wconvi, axes=2) - result = result + res - vec.append(result) - vec = np.concatenate(vec, axis=0) - vec = np.concatenate( - [ - vec, - np.zeros( - (self.n - vec.shape[0], W.shape[2], W.shape[3]), - dtype='float64', - ), - ], - axis=0, - ) - return vec - - -if __name__ == "__main__": - unittest.main() diff --git a/test/white_list/check_shape_white_list.py b/test/white_list/check_shape_white_list.py index db5a710867277..c01909ba97f9d 100644 --- a/test/white_list/check_shape_white_list.py +++ b/test/white_list/check_shape_white_list.py @@ -23,8 +23,6 @@ 'matmul', 'scatter', 'soft_relu', - 'squared_l2_distance', - 'tree_conv', 'cvm', 'cudnn_lstm', 'rnn', diff --git a/test/white_list/compile_vs_runtime_white_list.py b/test/white_list/compile_vs_runtime_white_list.py index a00c1a720aa5c..0c74eb327a853 100644 --- a/test/white_list/compile_vs_runtime_white_list.py +++ b/test/white_list/compile_vs_runtime_white_list.py @@ -29,7 +29,6 @@ 'gru', 'rpn_target_assign', 'retinanet_target_assign', - 'filter_by_instag', 'im2sequence', 'generate_proposal_labels', 'detection_map', diff --git a/test/white_list/no_grad_set_white_list.py b/test/white_list/no_grad_set_white_list.py index 33960cf4c64d3..81bb85bb784d3 100644 --- a/test/white_list/no_grad_set_white_list.py +++ b/test/white_list/no_grad_set_white_list.py @@ -44,7 +44,6 @@ 'elementwise_pow', 'elementwise_fmin', 'elementwise_fmax', - 'filter_by_instag', 'fused_elemwise_activation', 'fused_emb_seq_pool', 'fused_embedding_seq_pool', diff --git a/test/white_list/op_accuracy_white_list.py b/test/white_list/op_accuracy_white_list.py index 49b501e765b54..b3855c417ec40 100644 --- a/test/white_list/op_accuracy_white_list.py +++ b/test/white_list/op_accuracy_white_list.py @@ -68,7 +68,6 @@ 'smooth_l1_loss', 'softmax', 'spectral_norm', - 'squared_l2_distance', 'squared_l2_norm', 'tanh', 'mish', diff --git a/tools/enforce/count_enforce_by_file.sh b/tools/enforce/count_enforce_by_file.sh index fafc3516904d8..c79d486c62838 100644 --- a/tools/enforce/count_enforce_by_file.sh +++ b/tools/enforce/count_enforce_by_file.sh @@ -53,7 +53,6 @@ fi FILE_WHITE_LIST="\ box_clip_op.cc \ box_clip_op.h \ - random_crop_op.h \ elementwise_op_function.cu.h \ fused_elemwise_activation_op.cc \ auc_op.cu \ diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index c22938e27d150..14f5e4c4bd1ec 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -298,7 +298,6 @@ 'test_ps_dispatcher', 'test_analyzer_rnn2', 'test_multi_gru_seq_fuse_pass', - 'test_filter_by_instag_op', 'test_switch', 'test_matmul_transpose_reshape_fuse_pass', 'test_mkldnn_caching', @@ -915,7 +914,6 @@ 'test_transformer', 'test_for_enumerate', 'test_variable_trans_func', - 'test_squared_l2_distance_op', 'test_quantize_transpiler_v2', 'test_im2sequence_op', 'test_reader_reset', @@ -1101,7 +1099,6 @@ 'test_empty_like_op', 'test_imperative_layer_children', 'nccl_op_test', - 'test_tree_conv_op', 'test_share_data_op', 'test_ir_memory_optimize_transformer', 'test_math_op_patch', @@ -1382,7 +1379,6 @@ 'test_complex_abs', 'test_gradient_accmulator', 'test_instance_norm_op_v2', - 'test_random_crop_op', 'test_mobile_net', 'test_parallel_executor_transformer', 'test_tensor_scalar_type_promotion_dynamic', @@ -1530,7 +1526,6 @@ 'test_imperative_transformer_sorted_gradient', 'test_bicubic_interp_v2_op', 'test_rank_attention_op', - 'test_space_to_depth_op', 'test_image_classification', 'test_custom_relu_op_setup', 'test_sgd_op', @@ -1870,7 +1865,6 @@ 'test_fleet', 'test_flags_use_mkldnn', 'test_flags_mkldnn_ops_on_off', - 'test_filter_by_instag_op', 'test_fetch_var', 'test_fetch_handler', 'test_feed_fetch_method', @@ -2659,7 +2653,6 @@ 'test_unfold_op', 'test_conv_bn_fuse_pass', 'test_truncated_gaussian_random_op', - 'test_tree_conv_op', 'test_traced_layer_err_msg', 'test_unique_with_counts', 'test_auc_single_pred_op', @@ -2691,7 +2684,6 @@ 'test_optimizer', 'test_deformable_conv_op', 'test_py_reader_push_pop', - 'test_random_crop_op', 'test_shuffle_channel_op', 'test_center_loss', 'test_temporal_shift_op', @@ -2708,13 +2700,10 @@ 'test_top_k_op', 'test_batch_fc_op', 'test_tensor_scalar_type_promotion_static', - 'test_squared_l2_distance_op', 'test_bicubic_interp_op', 'test_spp_op', - 'test_space_to_depth_op', 'test_callbacks', 'test_sigmoid_focal_loss_op', - 'test_collect_fpn_proposals_op', 'test_sequence_unpad_op', 'test_conv1d_transpose_layer', 'test_sequence_pool', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index a7e7ad08e3ab1..228218e46ecf4 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -39,8 +39,6 @@ 'test_sequence_scatter_op', 'test_sequence_slice_op', 'test_slice_op', - 'test_space_to_depth_op', - 'test_squared_l2_distance_op', 'test_accuracy_op', 'test_activation_nn_grad', 'test_adadelta_op', @@ -202,7 +200,6 @@ 'test_fill_constant_op', 'test_fill_op', 'test_fill_zeros_like_op', - 'test_filter_by_instag_op', 'test_flatten2_op', 'test_flatten_contiguous_range_op', 'test_flatten_op', @@ -412,7 +409,6 @@ 'test_queue', 'test_randint_op', 'test_randn_op', - 'test_random_crop_op', 'test_randperm_op', 'test_range', 'test_rank_loss_op', @@ -492,7 +488,6 @@ 'test_trace_op', 'test_trainable', 'test_transpose_op', - 'test_tree_conv_op', 'test_tril_triu_op', 'test_trilinear_interp_op', 'test_trilinear_interp_v2_op', diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index e7c05f2768a83..2b8073c8b5ccf 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -217,7 +217,6 @@ long_time_test="^test_gru_op$|\ ^test_gather_op$|\ ^test_gather_nd_op$|\ ^test_sequence_conv$|\ -^test_space_to_depth_op$|\ ^test_activation_nn_grad$|\ ^test_activation_op$|\ ^test_bicubic_interp_v2_op$|\