diff --git a/paddle/fluid/operators/fill_op.cc b/paddle/fluid/operators/fill_op.cc index bcb7081847111..aeefe07d348e9 100644 --- a/paddle/fluid/operators/fill_op.cc +++ b/paddle/fluid/operators/fill_op.cc @@ -78,9 +78,3 @@ REGISTER_OPERATOR( ops::FillOpVarTypeInference, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(fill, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel); diff --git a/paddle/fluid/operators/fill_op.cu.cc b/paddle/fluid/operators/fill_op.cu.cc deleted file mode 100644 index 1a22f53898cf0..0000000000000 --- a/paddle/fluid/operators/fill_op.cu.cc +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/fill_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(fill, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel, - ops::FillKernel); diff --git a/paddle/fluid/operators/fill_op.h b/paddle/fluid/operators/fill_op.h index 5f4f993bec20b..086ad44e024b0 100644 --- a/paddle/fluid/operators/fill_op.h +++ b/paddle/fluid/operators/fill_op.h @@ -42,42 +42,5 @@ struct FillOpVisitor { const std::vector &value_; }; -template -class FillKernel : public framework::OpKernel { - public: - void Compute(const paddle::framework::ExecutionContext &ctx) const override { - auto &out = GET_DATA_SAFELY( - ctx.Output("Out"), "Output", "Out", "Fill"); - out.Resize(phi::make_ddim(ctx.Attr>("shape"))); - auto dtype = - static_cast(ctx.Attr("dtype")); - auto phi_dtype = framework::TransToPhiDataType(dtype); - platform::CPUPlace cpu; - auto force_cpu = ctx.Attr("force_cpu"); - out.mutable_data(force_cpu ? cpu : ctx.GetPlace(), phi_dtype); - - phi::DenseTensor tensor; - - if (force_cpu || platform::is_cpu_place(ctx.GetPlace())) { - tensor.ShareDataWith(out); - } else { - // Always make tensor in CPU memory. - tensor.Resize(out.dims()); - tensor.mutable_data(cpu, phi_dtype); - } - - framework::VisitDataType( - dtype, FillOpVisitor(&tensor, ctx.Attr>("value"))); - - if (!force_cpu && platform::is_gpu_place(ctx.GetPlace())) { - // Copy tensor to out - framework::TensorCopy( - tensor, - ctx.GetPlace(), - ctx.template device_context(), - &out); - } - } -}; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.h b/paddle/fluid/operators/fused/fused_bn_activation_op.h index c82c9aca413c3..3b04eb1bc59ed 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.h @@ -88,17 +88,5 @@ class FusedBatchNormActOpInferVarType } }; -template -class FusedBatchNormActKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override; -}; - -template -class FusedBatchNormActGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override; -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 337a4e3178956..29d2807b23970 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -402,231 +402,6 @@ static inline void GetPaddings(int* paddings, std::copy(pads.begin(), pads.end(), paddings); } } - -template -class Pad2dCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::array pads; - GetPaddings(pads.data(), context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - T value = static_cast(context.Attr("pad_value")); - - auto* x = context.Input("X"); - auto in_dims = x->dims(); - const T* in_data = x->data(); - - auto* out = context.Output("Out"); - if (data_format == "NCHW") { - out->Resize({in_dims[0], - in_dims[1], - in_dims[2] + pads[0] + pads[1], - in_dims[3] + pads[2] + pads[3]}); - } else { - out->Resize({in_dims[0], - in_dims[1] + pads[0] + pads[1], - in_dims[2] + pads[2] + pads[3], - in_dims[3]}); - } - auto out_dims = out->dims(); - T* out_data = out->mutable_data(context.GetPlace()); - - const int pad_top = pads[0]; - const int pad_left = pads[2]; - const int num = static_cast(in_dims[0]); - if (data_format == "NCHW") { - const int channels = static_cast(in_dims[1]); - const int in_height = static_cast(in_dims[2]); - const int in_width = static_cast(in_dims[3]); - const int out_height = static_cast(out_dims[2]); - const int out_width = static_cast(out_dims[3]); - if (mode == "reflect") { - Pad2DReflectNCHW(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else if (mode == "edge") { - Pad2DEdgeNCHW(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else { - Pad2DConstNCHW(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - value, - out_data); - } - } else { - const int channels = static_cast(in_dims[3]); - const int in_height = static_cast(in_dims[1]); - const int in_width = static_cast(in_dims[2]); - const int out_height = static_cast(out_dims[1]); - const int out_width = static_cast(out_dims[2]); - if (mode == "reflect") { - Pad2DReflectNHWC(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else if (mode == "edge") { - Pad2DEdgeNHWC(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else { - Pad2DConstNHWC(in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - value, - out_data); - } - } - } -}; - -template -class Pad2dGradCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::array pads; - GetPaddings(pads.data(), context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - auto* d_out = - context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); - auto d_in_dims = d_in->dims(); - auto d_out_dims = d_out->dims(); - const T* d_out_data = d_out->data(); - T* d_in_data = d_in->mutable_data(context.GetPlace()); - phi::funcs::SetConstant set_zero; - set_zero(context.template device_context(), - d_in, - static_cast(0)); - const int pad_top = pads[0]; - const int pad_left = pads[2]; - const int num = static_cast(d_in_dims[0]); - if (data_format == "NCHW") { - const int channels = static_cast(d_in_dims[1]); - const int in_height = static_cast(d_in_dims[2]); - const int in_width = static_cast(d_in_dims[3]); - const int out_height = static_cast(d_out_dims[2]); - const int out_width = static_cast(d_out_dims[3]); - if (mode == "reflect") { - Pad2DGradReflectNCHW(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else if (mode == "edge") { - Pad2DGradEdgeNCHW(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else { - Pad2DGradConstNCHW(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } - } else { - const int channels = static_cast(d_in_dims[3]); - const int in_height = static_cast(d_in_dims[1]); - const int in_width = static_cast(d_in_dims[2]); - const int out_height = static_cast(d_out_dims[1]); - const int out_width = static_cast(d_out_dims[2]); - if (mode == "reflect") { - Pad2DGradReflectNHWC(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else if (mode == "edge") { - Pad2DGradEdgeNHWC(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else { - Pad2DGradConstNHWC(d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } - } - } -}; - class Pad2dOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -872,8 +647,3 @@ REGISTER_OPERATOR(pad2d, REGISTER_OPERATOR(pad2d_grad, ops::Pad2dOpGrad, ops::Pad2dOpGradNoNeedBufferVarsInferer); - -PD_REGISTER_STRUCT_KERNEL( - pad2d, CPU, ALL_LAYOUT, ops::Pad2dCPUKernel, float, double, int, int64_t) {} -PD_REGISTER_STRUCT_KERNEL( - pad2d_grad, CPU, ALL_LAYOUT, ops::Pad2dGradCPUKernel, float, double) {} diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu deleted file mode 100644 index b8263ea6bb169..0000000000000 --- a/paddle/fluid/operators/pad2d_op.cu +++ /dev/null @@ -1,636 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using phi::PADDLE_CUDA_NUM_THREADS; - -template -__global__ void Pad2DConstNCHW(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T value, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - out_data[index] = - (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width) - ? value - : in_data[(nc * in_height + in_h) * in_width + in_w]; - } -} - -template -__global__ void Pad2DConstNHWC(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T value, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int in_h = out_h - pad_top; - const int in_w = out_w - pad_left; - out_data[index] = - (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width) - ? value - : in_data[((n * in_height + in_h) * in_width + in_w) * channels + - c]; - } -} - -template -__global__ void Pad2DReflectNCHW(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = max(in_h, -in_h); // reflect by 0 - in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height - in_w = max(in_w, -in_w); // reflect by 0 - in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width - out_data[index] = in_data[(nc * in_height + in_h) * in_width + in_w]; - } -} - -template -__global__ void Pad2DReflectNHWC(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = max(in_h, -in_h); - in_h = min(in_h, 2 * in_height - in_h - 2); - in_w = max(in_w, -in_w); - in_w = min(in_w, 2 * in_width - in_w - 2); - out_data[index] = - in_data[((n * in_height + in_h) * in_width + in_w) * channels + c]; - } -} - -template -__global__ void Pad2DEdgeNCHW(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - out_data[index] = in_data[(nc * in_height + in_h) * in_width + in_w]; - } -} - -template -__global__ void Pad2DEdgeNHWC(const int nthreads, - const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - out_data[index] = - in_data[((n * in_height + in_h) * in_width + in_w) * channels + c]; - } -} - -template -__global__ void Pad2DGradConstNCHW(const int in_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(in_index, in_size) { - int nc = in_index / in_width; - const int out_w = in_index % in_width + pad_left; - const int out_h = nc % in_height + pad_top; - nc /= in_height; - d_in_data[in_index] = - d_out_data[(nc * out_height + out_h) * out_width + out_w]; - } -} - -template -__global__ void Pad2DGradConstNHWC(const int in_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(in_index, in_size) { - int n = in_index / channels; - const int c = in_index % channels; - const int out_w = n % in_width + pad_left; - n /= in_width; - const int out_h = n % in_height + pad_top; - n /= in_height; - d_in_data[in_index] = - d_out_data[((n * out_height + out_h) * out_width + out_w) * channels + - c]; - } -} - -template -__global__ void Pad2DGradReflectNCHW(const int out_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - int nc = out_index / out_width; - const int out_w = out_index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = max(in_h, -in_h); - in_w = max(in_w, -in_w); - in_h = min(in_h, 2 * in_height - in_h - 2); - in_w = min(in_w, 2 * in_width - in_w - 2); - phi::CudaAtomicAdd(&d_in_data[(nc * in_height + in_h) * in_width + in_w], - d_out_data[out_index]); - } -} - -template -__global__ void Pad2DGradReflectNHWC(const int out_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - const int c = out_index % channels; - int n = out_index / channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = max(in_h, -in_h); - in_w = max(in_w, -in_w); - in_h = min(in_h, in_height * 2 - in_h - 2); - in_w = min(in_w, in_width * 2 - in_w - 2); - phi::CudaAtomicAdd( - &d_in_data[((n * in_height + in_h) * in_width + in_w) * channels + c], - d_out_data[out_index]); - } -} - -template -__global__ void Pad2DGradEdgeNCHW(const int out_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - int nc = out_index / out_width; - const int out_w = out_index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - phi::CudaAtomicAdd(&d_in_data[(nc * in_height + in_h) * in_width + in_w], - d_out_data[out_index]); - } -} - -template -__global__ void Pad2DGradEdgeNHWC(const int out_size, - T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - const int c = out_index % channels; - int n = out_index / channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - phi::CudaAtomicAdd( - &d_in_data[((n * in_height + in_h) * in_width + in_w) * channels + c], - d_out_data[out_index]); - } -} - -static inline void GetPaddings(int* paddings, - const framework::ExecutionContext& context) { - auto* paddings_t = context.Input("Paddings"); - if (paddings_t) { - phi::DenseTensor pads; - framework::TensorCopySync(*paddings_t, platform::CPUPlace(), &pads); - auto pads_data = pads.data(); - paddings[0] = pads_data[0]; - paddings[1] = pads_data[1]; - paddings[2] = pads_data[2]; - paddings[3] = pads_data[3]; - } else { - auto pads = context.Attr>("paddings"); - std::copy(pads.begin(), pads.end(), paddings); - } -} - -template -class Pad2dCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - int pads[4]; - GetPaddings(pads, context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - T value = static_cast(context.Attr("pad_value")); - - auto* x = context.Input("X"); - auto in_dims = x->dims(); - const T* in_data = x->data(); - auto* out = context.Output("Out"); - auto out_dims = out->dims(); - if (data_format == "NCHW") { - out_dims[0] = in_dims[0]; - out_dims[1] = in_dims[1]; - out_dims[2] = in_dims[2] + pads[0] + pads[1]; - out_dims[3] = in_dims[3] + pads[2] + pads[3]; - } else { - out_dims[0] = in_dims[0]; - out_dims[1] = in_dims[1] + pads[0] + pads[1]; - out_dims[2] = in_dims[2] + pads[2] + pads[3]; - out_dims[3] = in_dims[3]; - } - T* out_data = out->mutable_data(out_dims, context.GetPlace()); - const int pad_top = pads[0]; - const int pad_left = pads[2]; - const int num = in_dims[0]; - - auto stream = context.cuda_device_context().stream(); - int block = PADDLE_CUDA_NUM_THREADS; - const int out_size = out->numel(); - int grid = (out_size + block - 1) / block; - - if (data_format == "NCHW") { - const int channels = in_dims[1]; - const int in_height = in_dims[2]; - const int in_width = in_dims[3]; - const int out_height = out_dims[2]; - const int out_width = out_dims[3]; - if (mode == "reflect") { - Pad2DReflectNCHW<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else if (mode == "edge") { - Pad2DEdgeNCHW<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else { - Pad2DConstNCHW<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - value, - out_data); - } - } else { - const int channels = in_dims[3]; - const int in_height = in_dims[1]; - const int in_width = in_dims[2]; - const int out_height = out_dims[1]; - const int out_width = out_dims[2]; - if (mode == "reflect") { - Pad2DReflectNHWC<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else if (mode == "edge") { - Pad2DEdgeNHWC<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - out_data); - } else { - Pad2DConstNHWC<<>>(out_size, - in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - value, - out_data); - } - } - } -}; - -template -class Pad2dGradCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - int pads[4]; - GetPaddings(pads, context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - auto* d_out = - context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); - auto d_in_dims = d_in->dims(); - auto d_out_dims = d_out->dims(); - const T* d_out_data = d_out->data(); - T* d_in_data = d_in->mutable_data(context.GetPlace()); - - phi::funcs::SetConstant set_zero; - set_zero(context.template device_context(), - d_in, - static_cast(0)); - - const int pad_top = pads[0]; - const int pad_left = pads[2]; - const int num = d_in_dims[0]; - - auto stream = context.cuda_device_context().stream(); - int block = PADDLE_CUDA_NUM_THREADS; - const int out_size = d_out->numel(); - const int in_size = d_in->numel(); - int grid = (out_size + block - 1) / block; - - if (data_format == "NCHW") { - const int channels = d_in_dims[1]; - const int in_height = d_in_dims[2]; - const int in_width = d_in_dims[3]; - const int out_height = d_out_dims[2]; - const int out_width = d_out_dims[3]; - if (mode == "reflect") { - Pad2DGradReflectNCHW<<>>(out_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else if (mode == "edge") { - Pad2DGradEdgeNCHW<<>>(out_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else { - grid = (in_size + block - 1) / block; - Pad2DGradConstNCHW<<>>(in_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } - } else { - const int channels = d_in_dims[3]; - const int in_height = d_in_dims[1]; - const int in_width = d_in_dims[2]; - const int out_height = d_out_dims[1]; - const int out_width = d_out_dims[2]; - if (mode == "reflect") { - Pad2DGradReflectNHWC<<>>(out_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else if (mode == "edge") { - Pad2DGradEdgeNHWC<<>>(out_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } else { - grid = (in_size + block - 1) / block; - Pad2DGradConstNHWC<<>>(in_size, - d_in_data, - num, - channels, - in_height, - in_width, - out_height, - out_width, - pad_top, - pad_left, - d_out_data); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -PD_REGISTER_STRUCT_KERNEL(pad2d, - GPU, - ALL_LAYOUT, - ops::Pad2dCUDAKernel, - float, - double, - int, - int64_t, - plat::float16) {} -PD_REGISTER_STRUCT_KERNEL(pad2d_grad, - GPU, - ALL_LAYOUT, - ops::Pad2dGradCUDAKernel, - float, - double, - plat::float16) {} diff --git a/test/ir/inference/test_mkldnn_pad2d_op.py b/test/ir/inference/test_mkldnn_pad2d_op.py deleted file mode 100644 index cb3b14ab3355d..0000000000000 --- a/test/ir/inference/test_mkldnn_pad2d_op.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -from functools import partial - -import hypothesis.strategies as st -import numpy as np -from auto_scan_test import MkldnnAutoScanTest -from hypothesis import given -from program_config import OpConfig, ProgramConfig, TensorConfig - - -class TestOneDNNPad2DOp(MkldnnAutoScanTest): - def sample_program_configs(self, *args, **kwargs): - def generate_input(*args, **kwargs): - return np.random.random(kwargs['in_shape']).astype(np.float32) - - pad3d_op = OpConfig( - type="pad2d", - inputs={"X": ["input_data"]}, - outputs={"Out": ["output_data"]}, - attrs={ - "mode": "constant", - "data_format": kwargs['data_format'], - "paddings": kwargs['paddings'], - }, - ) - - program_config = ProgramConfig( - ops=[pad3d_op], - weights={}, - inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input, *args, **kwargs) - ), - }, - outputs=["output_data"], - ) - - yield program_config - - def sample_predictor_configs(self, program_config): - config = self.create_inference_config(use_mkldnn=True) - yield config, (1e-5, 1e-5) - - @given( - data_format=st.sampled_from(['NCHW', 'NHWC']), - in_shape=st.sampled_from( - [[2, 3, 4, 5], [1, 4, 1, 3], [4, 3, 2, 1], [1, 1, 1, 1]] - ), - paddings=st.sampled_from( - [[0, 0, 0, 0], [1, 2, 0, 1], [2, 5, 11, 3], [0, 5, 0, 1]] - ), - ) - def test(self, *args, **kwargs): - self.run_test(quant=False, *args, **kwargs) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index 8d88d7db8de1d..96a15b04ab8a2 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -521,7 +521,6 @@ set(TEST_OPS_WITH_GC test_lod_reset_op test_lookup_table_op test_mean_op - test_pad2d_op test_scatter_op test_slice_op) diff --git a/test/legacy_test/test_fill_op.py b/test/legacy_test/test_fill_op.py deleted file mode 100644 index 679ee25e041ab..0000000000000 --- a/test/legacy_test/test_fill_op.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op import Operator -from op_test import OpTest, convert_float_to_uint16 - -import paddle -from paddle import base -from paddle.base import core - - -class TestFillOp1(OpTest): - def setUp(self): - self.op_type = "fill" - self.init_dtype() - self.init_shape() - self.init_value() - self.inputs = {} - self.attrs = { - 'value': self.val.flatten().tolist(), - 'shape': self.shape, - 'dtype': int(core.VarDesc.VarType.FP64), - 'force_cpu': False, - } - self.outputs = {'Out': self.val.astype('float64')} - - def init_shape(self): - self.shape = [100, 200] - - def init_value(self): - self.val = np.random.random(size=self.shape) - - def init_dtype(self): - self.dtype = np.float64 - - def test_check_output(self): - self.check_output() - - -class TestFillOp2(OpTest): - def setUp(self): - self.op_type = "fill" - self.val = np.random.random(size=[100, 200]) - self.inputs = {} - self.attrs = { - 'value': self.val.flatten().tolist(), - 'shape': [100, 200], - 'dtype': int(core.VarDesc.VarType.FP64), - 'force_cpu': True, - } - self.outputs = {'Out': self.val.astype('float64')} - - def test_check_output(self): - self.check_output() - - -class TestFillOp3(unittest.TestCase): - def check_with_place(self, place, f_cpu): - scope = core.Scope() - # create Out Variable - out = scope.var('Out').get_tensor() - - # create and run fill_op operator - val = np.random.random(size=[300, 200]) - fill_op = Operator( - "fill", - value=val.flatten(), - shape=[300, 200], - dtype=int(core.VarDesc.VarType.FP32), - force_cpu=f_cpu, - Out='Out', - ) - fill_op.run(scope, place) - - # get result from Out - result_array = np.array(out) - full_array = np.array(val, 'float32') - - np.testing.assert_array_equal(result_array, full_array) - - def test_fill_op(self): - places = [core.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(core.CUDAPlace(0)) - - for place in places: - self.check_with_place(place, True) - self.check_with_place(place, False) - - -class TestFillFP16OP(TestFillOp1): - def init_dtype(self): - self.dtype = np.float16 - - -class TestFillInf(TestFillOp1): - def init_value(self): - self.val = np.full(fill_value=np.inf, shape=self.shape) - - -class TestFillOpError(unittest.TestCase): - def test_errors(self): - with base.dygraph.base.guard(): - - def test_nan_fill_value(): - tensor = paddle.zeros(shape=[100, 200]) - tensor.fill_(np.nan) - - self.assertRaises(ValueError, test_nan_fill_value) - - -@unittest.skipIf( - not core.is_compiled_with_cuda() - or not core.is_bfloat16_supported(core.CUDAPlace(0)), - "core is not complied with CUDA and not support the bfloat16", -) -class TestFillBF16OP(OpTest): - def setUp(self): - self.op_type = "fill" - self.dtype = np.uint16 - val = np.random.random(size=[100, 200]) - self.inputs = {} - self.attrs = { - 'value': val.flatten().tolist(), - 'shape': [100, 200], - 'dtype': int(core.VarDesc.VarType.BF16), - 'force_cpu': False, - } - self.outputs = {'Out': convert_float_to_uint16(val)} - - def test_check_output(self): - place = core.CUDAPlace(0) - self.check_output_with_place(place) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_pad2d_op.py b/test/legacy_test/test_pad2d_op.py deleted file mode 100644 index 8c1545ec718bc..0000000000000 --- a/test/legacy_test/test_pad2d_op.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -class TestPad2dOp(OpTest): - def setUp(self): - self.pad_value = 0.0 - self.variable_paddings = False - self.initTestCase() - self.op_type = "pad2d" - self.inputs = {'X': np.random.random(self.shape).astype("float64")} - self.attrs = {} - if self.variable_paddings: - self.attrs['paddings'] = [] - self.inputs['Paddings'] = ( - np.array(self.paddings).flatten().astype("int32") - ) - else: - self.attrs['paddings'] = ( - np.array(self.paddings).flatten().astype("int32") - ) - self.attrs['pad_value'] = self.pad_value - self.attrs['mode'] = self.mode - self.attrs['data_format'] = self.data_format - if self.data_format == "NCHW": - paddings = [ - (0, 0), - (0, 0), - (self.paddings[0], self.paddings[1]), - (self.paddings[2], self.paddings[3]), - ] - else: - paddings = [ - (0, 0), - (self.paddings[0], self.paddings[1]), - (self.paddings[2], self.paddings[3]), - (0, 0), - ] - if self.mode == "constant": - out = np.pad( - self.inputs['X'], - paddings, - mode=self.mode, - constant_values=self.pad_value, - ) - else: - out = np.pad(self.inputs['X'], paddings, mode=self.mode) - self.outputs = {'Out': out} - - def test_check_output(self): - self.check_output(check_dygraph=False) - - def test_check_grad_normal(self): - self.check_grad(['X'], 'Out', check_dygraph=False) - - def initTestCase(self): - self.shape = (2, 3, 4, 5) - self.paddings = [0, 1, 2, 3] - self.mode = "constant" - self.data_format = "NCHW" - self.pad_value = 0.0 - - -class TestCase1(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 3, 4, 5) - self.paddings = [0, 1, 2, 3] - self.mode = "reflect" - self.data_format = "NCHW" - - -class TestCase2(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 3, 4, 5) - self.paddings = [0, 1, 2, 3] - self.mode = "edge" - self.data_format = "NCHW" - - -class TestCase3(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 4, 4, 4) - self.paddings = [0, 1, 2, 3] - self.mode = "reflect" - self.data_format = "NHWC" - - -class TestCase4(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 4, 4, 4) - self.paddings = [0, 1, 2, 3] - self.mode = "edge" - self.data_format = "NHWC" - - -class TestCase5(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 4, 4, 4) - self.paddings = [0, 1, 2, 3] - self.mode = "constant" - self.pad_value = 1.2 - self.data_format = "NHWC" - - -class TestCase6(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 4, 4, 4) - self.paddings = [0, 1, 2, 3] - self.mode = "constant" - self.pad_value = 1.2 - self.data_format = "NHWC" - self.variable_paddings = True - - -class TestCase7(TestPad2dOp): - def initTestCase(self): - self.shape = (2, 3, 4, 5) - self.paddings = [0, 1, 2, 3] - self.mode = "reflect" - self.data_format = "NCHW" - self.variable_paddings = True - - -if __name__ == '__main__': - unittest.main() diff --git a/test/xpu/test_fill_op_xpu.py b/test/xpu/test_fill_op_xpu.py deleted file mode 100644 index 8adb6fc08b998..0000000000000 --- a/test/xpu/test_fill_op_xpu.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from get_test_cover_info import ( - XPUOpTestWrapper, - create_test_class, - get_xpu_op_support_types, -) -from op import Operator -from op_test_xpu import XPUOpTest - -import paddle -from paddle.base import core - -paddle.enable_static() - - -class XPUTestFillOp(XPUOpTestWrapper): - def __init__(self): - self.op_name = 'fill' - self.use_dynamic_create_class = False - - class TestFillOp1(XPUOpTest): - def setUp(self): - self.op_type = "fill" - val = np.random.random(size=[100, 200]) - self.inputs = {} - self.attrs = { - 'value': val.flatten().tolist(), - 'shape': [100, 200], - 'dtype': int(core.VarDesc.VarType.FP64), - 'force_cpu': False, - } - self.outputs = {'Out': val.astype('float64')} - - def test_check_output(self): - self.check_output_with_place(paddle.XPUPlace(0)) - - class TestFillOp2(XPUOpTest): - def setUp(self): - self.op_type = "fill" - val = np.random.random(size=[100, 200]) - self.inputs = {} - self.attrs = { - 'value': val.flatten().tolist(), - 'shape': [100, 200], - 'dtype': int(core.VarDesc.VarType.FP64), - 'force_cpu': True, - } - self.outputs = {'Out': val.astype('float64')} - - def test_check_output(self): - self.check_output() - - class TestFillOp3(unittest.TestCase): - def check_with_place(self, place, f_cpu): - scope = core.Scope() - # create Out Variable - out = scope.var('Out').get_tensor() - - # create and run fill_op operator - val = np.random.random(size=[300, 200]) - fill_op = Operator( - "fill", - value=val.flatten(), - shape=[300, 200], - dtype=int(core.VarDesc.VarType.FP32), - force_cpu=f_cpu, - Out='Out', - ) - fill_op.run(scope, place) - - # get result from Out - result_array = np.array(out) - full_array = np.array(val, 'float32') - - np.testing.assert_array_equal(result_array, full_array) - - def test_fill_op(self): - places = [core.CPUPlace()] - if core.is_compiled_with_xpu(): - places.append(core.XPUPlace(0)) - - for place in places: - self.check_with_place(place, True) - self.check_with_place(place, False) - - -support_types = get_xpu_op_support_types('fill') -for stype in support_types: - create_test_class(globals(), XPUTestFillOp, stype) - -if __name__ == '__main__': - unittest.main() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index b73e4dd5dc154..aa06b939934e7 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -1069,7 +1069,6 @@ 'test_deformable_conv_v1_op', 'test_complex_grad_accumulated', 'test_sequence_mask', - 'test_fill_op', 'test_imperative_deepcf', 'test_multiply', 'test_partial_program', @@ -1310,7 +1309,6 @@ 'test_sequence_expand_as', 'test_full_name_usage', 'test_glu', - 'test_pad2d_op', 'test_read_file', 'test_erf_op', 'test_sequence_unpad_op', @@ -2577,7 +2575,6 @@ 'test_ir_fc_fuse_pass', 'test_fleet_base_single', 'test_multiprocess_dataloader_iterable_dataset_dynamic', - 'test_fill_op', 'test_slice_op', 'test_cond', 'test_ema', @@ -2711,7 +2708,6 @@ 'test_generate_proposals_v2_op', 'test_pad_constant_like', 'test_grid_sample_function', - 'test_pad2d_op', 'test_huber_loss_op', 'test_one_hot_op', 'test_normal', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index c863c1e408f2e..60b800da70460 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -27,7 +27,6 @@ 'test_lookup_table_op', 'test_lookup_table_bf16_op', 'test_lookup_table_v2_bf16_op', - 'test_pad2d_op', 'test_scatter_op', 'test_sequence_concat', 'test_sequence_conv', @@ -197,7 +196,6 @@ 'test_fetch_var', 'test_fill_any_like_op', 'test_fill_constant_op', - 'test_fill_op', 'test_fill_zeros_like_op', 'test_flatten2_op', 'test_flatten_contiguous_range_op',