From 4168872895d632be255fae24dbeb6ea6d6952255 Mon Sep 17 00:00:00 2001 From: lijiaqi Date: Wed, 1 Sep 2021 08:30:44 +0000 Subject: [PATCH 1/2] add fft_c2r op --- paddle/fluid/operators/spectral_op.cc | 11 +++++- paddle/fluid/operators/spectral_op.cu | 51 ++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/spectral_op.cc b/paddle/fluid/operators/spectral_op.cc index bad23c8e7f5fba..62fbe663d946d5 100644 --- a/paddle/fluid/operators/spectral_op.cc +++ b/paddle/fluid/operators/spectral_op.cc @@ -840,7 +840,16 @@ template struct FFTC2RFunctor { void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x, Tensor* out, const std::vector& axes, - FFTNormMode normalization, bool forward) {} + FFTNormMode normalization, bool forward) { + auto input = x; + if (axes->dims() > 1) { + auto c2c_dims = axes.slice(0, axes->dims() - 1); + input = FFTC2CFunctor(ctx, x, out, c2c_dims, normalization, forward); + axes = axes.slice(axes->dims() - 1); + } + exec_fft(ctx, input, out, axes, + normalization, forward); + } }; #elif defined(PADDLE_WITH_POCKETFFT) diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu index 2b163ff37b9a70..d616b572b17acf 100644 --- a/paddle/fluid/operators/spectral_op.cu +++ b/paddle/fluid/operators/spectral_op.cu @@ -278,7 +278,7 @@ struct KeyHash { value ^= ptr[i]; value *= 0x01000193; } - return (size_t)value; + return static_cast(value); } }; @@ -689,6 +689,19 @@ void exec_normalization(const DeviceContext& ctx, const Tensor* in, Tensor* out, } // anonymous namespace +// Use the optimized path to perform single R2C or C2R if transformation dim is +// supported by cuFFT +bool use_optimized_cufft_path(const std::vector& axes) { + // For performance reason, when axes starts with (0, 1), do not use the + // optimized path. + if (axes.size() > kMaxCUFFTNdim || + (axes.size() >= 2 && axes[0] == 0 && axes[1] == 1)) { + return false; + } else { + return true; + } +} + template struct FFTC2CFunctor { void operator()(const platform::CUDADeviceContext& ctx, const Tensor* X, @@ -730,6 +743,33 @@ struct FFTC2CFunctor { } }; +template +struct FFTC2RFunctor { + void operator()(const platform::CUDADeviceContext& ctx, const Tensor* X, + Tensor* out, const std::vector& axes, + FFTNormMode normalization, bool forward) { + framework::Tensor* p_out = out; + std::vector in_dims = framework::vectorize(X->dims()); + std::vector out_dims(in_dims.begin(), in_dims.end()); + out_dims[axes.size() - 1] = (out->dims()).value_or(X->dims()); + // framework::slice_ddim(axes->dims(),0, axes.size()-1) + + if (use_optimized_cufft_path(axes)) { + framework::Tensor temp_tensor; + exec_fft(ctx, p_out, temp_tensor, + out_dims, axes, forward); + } else { + temp_tensor = FFTC2CFunctor(ctx, X, out, axes.assign(0, axes->dims() - 1), + FFTNormMode::none, forward); + + exec_fft( + ctx, p_out, temp_tensor, out_dims, axes.size() - 1, forward); + } + exec_normalization( + ctx, p_out, out, normalization, out_dims, axes); + } +}; + } // namespace operators } // namespace paddle @@ -742,3 +782,12 @@ REGISTER_OP_CUDA_KERNEL( fft_c2c_grad, ops::FFTC2CGradKernel, ops::FFTC2CGradKernel); + +REGISTER_OP_CUDA_KERNEL( + fft_c2r, ops::FFTC2RKernel, + ops::FFTC2RKernel); + +REGISTER_OP_CUDA_KERNEL( + fft_c2r_grad, + ops::FFTC2RGradKernel, + ops::FFTC2RGradKernel); From c8b96e55da952d2329a4572fc18ba858c84b577d Mon Sep 17 00:00:00 2001 From: lijiaqi Date: Thu, 2 Sep 2021 20:39:30 +0000 Subject: [PATCH 2/2] last fft c2r functor --- paddle/fluid/operators/spectral_op.cc | 42 ++++++++++--- paddle/fluid/operators/spectral_op.cu | 88 ++++++++++++++++++++++----- paddle/fluid/operators/spectral_op.h | 10 +-- python/paddle/tensor/fft.py | 26 +++----- 4 files changed, 121 insertions(+), 45 deletions(-) diff --git a/paddle/fluid/operators/spectral_op.cc b/paddle/fluid/operators/spectral_op.cc index 62fbe663d946d5..4cfed63273fab9 100644 --- a/paddle/fluid/operators/spectral_op.cc +++ b/paddle/fluid/operators/spectral_op.cc @@ -240,6 +240,16 @@ class FFTC2ROpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("normalization", "fft_norm_type, the fft normalization type."); AddAttr("forward", "bool, the fft direction."); + AddAttr( + "last_dim_size", "int", + "Length of the transformed " + "axis of the output. For n output points, last_dim_size//2 + 1 input" + " points are necessary. If the input is longer than this," + " it is cropped. If it is shorter than this, it is padded" + " with zeros. If last_dim_size is not given, it is taken to be 2*(m-1)" + " where m is the length of the input along the axis " + "specified by axis.") + .SetDefault(0L); AddComment(R"DOC( // add doc here )DOC"); @@ -259,10 +269,15 @@ class FFTC2ROp : public framework::OperatorWithKernel { "Output(%s) of FFTC2ROp should not be null.", "Out")); const auto axes = ctx->Attrs().Get>("axes"); + const int64_t last_dim_size = ctx->Attrs().Get("last_dim_size"); framework::DDim out_dim(ctx->GetInputDim("X")); const int64_t last_fft_axis = axes.back(); - const int64_t last_fft_dim_size = out_dim.at(last_fft_axis); - out_dim.at(last_fft_axis) = (last_fft_dim_size - 1) * 2; + if (last_dim_size == 0) { + const int64_t last_fft_dim_size = out_dim.at(last_fft_axis); + out_dim.at(last_fft_axis) = (last_fft_dim_size - 1) * 2; + } else { + out_dim.at(last_fft_axis) = ctx->Attrs().Get("last_dim_size"); + } ctx->SetOutputDim("Out", out_dim); } @@ -841,14 +856,21 @@ struct FFTC2RFunctor { void operator()(const platform::CPUDeviceContext& ctx, const Tensor* x, Tensor* out, const std::vector& axes, FFTNormMode normalization, bool forward) { - auto input = x; - if (axes->dims() > 1) { - auto c2c_dims = axes.slice(0, axes->dims() - 1); - input = FFTC2CFunctor(ctx, x, out, c2c_dims, normalization, forward); - axes = axes.slice(axes->dims() - 1); + if (axes.size() > 1) { + const std::vector c2c_dims(axes.begin(), axes.end() - 1); + Tensor temp; + temp->mutable_data(x->dims(), ctx.GetPlace()); + + FFTC2CFunctor c2c_functor; + c2c_functor(ctx, x, &temp, c2c_dims, normalization, forward); + + const std::vector new_axes(axes.back()); + exec_fft(ctx, &temp, out, new_axes, + normalization, forward); + } else { + exec_fft(ctx, x, out, axes, + normalization, forward); } - exec_fft(ctx, input, out, axes, - normalization, forward); } }; @@ -964,7 +986,7 @@ struct FFTC2RFunctor { [](int64_t s) { return s * data_size; }); } - const auto* in_data = reinterpret_cast(x->data()); + const auto* in_data = reinterpret_cast(x->data()); auto* out_data = out->data(); // well, we have to use std::vector here std::vector axes_(axes.size()); diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu index d616b572b17acf..1bb1e633928875 100644 --- a/paddle/fluid/operators/spectral_op.cu +++ b/paddle/fluid/operators/spectral_op.cu @@ -34,6 +34,25 @@ using ScalarType = framework::proto::VarType::Type; const int64_t kMaxCUFFTNdim = 3; const int64_t kMaxDataNdim = kMaxCUFFTNdim + 1; +std::ostream& operator<<(std::ostream& os, FFTTransformType fft_type) { + std::string repr; + switch (fft_type) { + case FFTTransformType::C2C: + repr = "C2C"; + break; + case FFTTransformType::C2R: + repr = "C2R"; + break; + case FFTTransformType::R2C: + repr = "R2C"; + break; + default: + repr = "UNK"; + } + os << repr; + return os; +} + static inline std::string get_cufft_error_info(cufftResult error) { switch (error) { case CUFFT_SUCCESS: @@ -431,7 +450,10 @@ class PlanLRUCache { // Execute a pre-planned transform static void exec_cufft_plan(const CuFFTConfig& config, void* in_data, void* out_data, bool forward) { + std::cout << "config address:" << &config << std::endl; auto& plan = config.plan(); + std::cout << "inside exec_cufft_plan ==============--------" << std::endl; +// std::cout<<"plan ==============--------"<< *plan << std::endl; #ifdef __HIPCC__ auto value_type = config.data_type(); if (value_type == framework::proto::VarType::FP32) { @@ -450,6 +472,8 @@ static void exec_cufft_plan(const CuFFTConfig& config, void* in_data, case FFTTransformType::C2R: { CUFFT_CHECK(hipfftExecC2R(plan, static_cast(in_data), static_cast(out_data))); + std::cout << "inside FFTTransformType ==============--------" + << std::endl; return; } } @@ -478,8 +502,17 @@ static void exec_cufft_plan(const CuFFTConfig& config, void* in_data, PADDLE_THROW(platform::errors::InvalidArgument( "hipFFT only support transforms of type float32 and float64")); #else + std::cout << "after __HIPCC__ ==============--------" << std::endl; + std::cout << "plan: " << plan << std::endl; + std::cout << "input pointer: " << in_data << std::endl; + std::cout << "output pointer: " << out_data << std::endl; + size_t ws = 0; + cufftGetSize(plan, &ws); + std::cout << "workspace size: " << ws << std::endl; + CUFFT_CHECK(cufftXtExec(plan, in_data, out_data, forward ? CUFFT_FORWARD : CUFFT_INVERSE)); + std::cout << "end end end __HIPCC__ end ==============--------" << std::endl; #endif } @@ -605,6 +638,11 @@ void exec_fft(const DeviceContext& ctx, Tensor* out, const Tensor* X, PlanKey Key(framework::vectorize(input.dims()), framework::vectorize(output.dims()), signal_size, fft_type, value_type); + std::cout << "input.dims()" << input.dims() << std::endl; + std::cout << "output.dims()" << output.dims() << std::endl; + std::cout << "signal_size" << framework::make_ddim(signal_size) << std::endl; + std::cout << "fft_type" << fft_type << std::endl; + std::cout << "value_type" << value_type << std::endl; PlanLRUCache& plan_cache = cufft_get_plan_cache(static_cast( (reinterpret_cast(&tensor_place))->GetDeviceId())); std::unique_lock guard(plan_cache.mutex, std::defer_lock); @@ -632,7 +670,6 @@ void exec_fft(const DeviceContext& ctx, Tensor* out, const Tensor* X, // execute transform plan exec_cufft_plan(*config, input.data(), output.data(), forward); - // Inverting output by reshape and transpose to original batch and dimension output.Resize(framework::make_ddim(reshape_out_sizes)); out->Resize(framework::make_ddim(out_sizes)); @@ -644,7 +681,9 @@ void exec_fft(const DeviceContext& ctx, Tensor* out, const Tensor* X, reverse_dim_permute); } */ + std::cout << "before TransCompute" << std::endl; TransCompute(ndim, ctx, output, out, reverse_dim_permute); + std::cout << "after TransCompute" << std::endl; } // Calculates the normalization constant and applies it in-place to out @@ -743,33 +782,45 @@ struct FFTC2CFunctor { } }; -template -struct FFTC2RFunctor { +template +struct FFTC2RFunctor { void operator()(const platform::CUDADeviceContext& ctx, const Tensor* X, Tensor* out, const std::vector& axes, FFTNormMode normalization, bool forward) { - framework::Tensor* p_out = out; std::vector in_dims = framework::vectorize(X->dims()); - std::vector out_dims(in_dims.begin(), in_dims.end()); - out_dims[axes.size() - 1] = (out->dims()).value_or(X->dims()); - // framework::slice_ddim(axes->dims(),0, axes.size()-1) + // std::vector out_dims(in_dims.begin(), in_dims.end()); + // out_dims[axes.back()] = out->dims(); + std::vector out_dims = framework::vectorize(out->dims()); + std::cout << "axes: " << framework::make_ddim(axes) << std::endl; if (use_optimized_cufft_path(axes)) { - framework::Tensor temp_tensor; - exec_fft(ctx, p_out, temp_tensor, - out_dims, axes, forward); + std::cout << "befor exec --------" << std::endl; + std::cout << "out dims: " << out->dims() << out->type() << std::endl; + std::cout << "in dims: " << X->dims() << X->type() << std::endl; + exec_fft(ctx, out, X, out_dims, axes, + forward); } else { - temp_tensor = FFTC2CFunctor(ctx, X, out, axes.assign(0, axes->dims() - 1), - FFTNormMode::none, forward); + framework::Tensor temp_tensor; + const std::vector dims(axes.begin(), axes.end() - 1); + + FFTC2CFunctor c2c_functor; + c2c_functor(ctx, X, &temp_tensor, dims, FFTNormMode::none, forward); exec_fft( - ctx, p_out, temp_tensor, out_dims, axes.size() - 1, forward); + ctx, out, &temp_tensor, out_dims, {axes.back()}, forward); } exec_normalization( - ctx, p_out, out, normalization, out_dims, axes); + ctx, out, out, normalization, out_dims, axes); } }; +template +struct FFTR2CFunctor { + void operator()(const platform::CUDADeviceContext& ctx, const Tensor* X, + Tensor* out, const std::vector& axes, + FFTNormMode normalization, bool forward, bool onesided) {} +}; + } // namespace operators } // namespace paddle @@ -791,3 +842,12 @@ REGISTER_OP_CUDA_KERNEL( fft_c2r_grad, ops::FFTC2RGradKernel, ops::FFTC2RGradKernel); + +REGISTER_OP_CUDA_KERNEL( + fft_r2c, ops::FFTR2CKernel, + ops::FFTR2CKernel); + +REGISTER_OP_CUDA_KERNEL( + fft_r2c_grad, + ops::FFTR2CGradKernel, + ops::FFTR2CGradKernel); diff --git a/paddle/fluid/operators/spectral_op.h b/paddle/fluid/operators/spectral_op.h index fa8de8faafde49..9059e3e49b37be 100644 --- a/paddle/fluid/operators/spectral_op.h +++ b/paddle/fluid/operators/spectral_op.h @@ -31,10 +31,10 @@ enum class FFTNormMode : int64_t { FFTNormMode get_norm_from_string(const std::string& norm, bool forward); // Enum representing the FFT type -enum class FFTTransformType : int8_t { - C2C, // Complex-to-complex - R2C, // Real-to-complex - C2R, // Complex-to-real +enum class FFTTransformType : int64_t { + C2C = 0, // Complex-to-complex + R2C, // Real-to-complex + C2R, // Complex-to-real }; // Create transform type enum from bools representing if input and output are @@ -99,7 +99,7 @@ template struct FFTC2RFunctor { void operator()(const DeviceContext& ctx, const Tensor* X, Tensor* out, const std::vector& axes, FFTNormMode normalization, - bool forward, bool onesided); + bool forward); }; template diff --git a/python/paddle/tensor/fft.py b/python/paddle/tensor/fft.py index ff154d73064846..5c99a5e8d60341 100644 --- a/python/paddle/tensor/fft.py +++ b/python/paddle/tensor/fft.py @@ -409,16 +409,16 @@ def fftn_c2r(x, s, axes, norm, forward): raise ValueError( "Unexpected norm: {}. Norm should be forward, backward or ortho". form(norm)) + s = list(s) rank = x.ndim if axes is None: if s is None: axes = list(range(rank)) - s = paddle.shape(x) else: fft_ndims = len(s) axes = list(range(rank - fft_ndims, rank)) else: - axes_ = axes.copy() + axes_ = list(axes) for i in len(axes_): if axes_[i] < -rank or axes_[i] >= rank: raise ValueError( @@ -427,26 +427,20 @@ def fftn_c2r(x, s, axes, norm, forward): if axes_[i] < 0: axes_[i] += rank axes = axes_ - axes.sort() - if s is None: - shape = paddle.shape(x) - s = [shape[axis] for axis in axes] - else: - assert len(axes) == len(s) + op_type = 'fft_c2r' if in_dygraph_mode(): - attrs = ('s', s, 'axes', axes, 'normalization', norm, 'forward', - forward) + if s: + attrs = ('axes', axes, 'normalization', norm, 'forward', forward, + 'last_dim_size', s[-1]) + attrs = ('axes', axes, 'normalization', norm, 'forward', forward) out = getattr(_C_ops, op_type)(x, *attrs) else: inputs = {'X': [x], } - attrs = { - 's': s, - 'axes': axes, - 'normalization': norm, - 'forward': forward - } + attrs = {'axes': axes, 'normalization': norm, 'forward': forward} + if s: + attr["last_dim_size"] = s[-1] check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], op_type) helper = LayerHelper(op_type, **locals())