fix HIP bug

PaddlePaddle · Mar 29, 2022 · f539ed8 · f539ed8
1 parent 1fd5e60
commit f539ed8
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 80 deletions.
diff --git a/paddle/phi/kernels/cpu/rnn_functor.h b/paddle/phi/kernels/cpu/rnn_functor.h
@@ -43,11 +43,11 @@ inline void SwapPoniter(DenseTensor** a, DenseTensor** b) {
 }
 
 template <typename T>
-void create_mask_matrix(const CPUContext& dev_ctx,
-                        const DenseTensor* sequence_length,
-                        DenseTensor* mask_matrix,
-                        const bool& is_reverse,
-                        int* min_seq_len) {
+void CreateMaskMatrix(const CPUContext& dev_ctx,
+                      const DenseTensor* sequence_length,
+                      DenseTensor* mask_matrix,
+                      const bool& is_reverse,
+                      int* min_seq_len) {
   const auto& seq_len_vec =
       paddle::operators::GetDataFromTensor<int>(sequence_length);
   const int table_width = mask_matrix->dims()[0];
@@ -80,11 +80,11 @@ void create_mask_matrix(const CPUContext& dev_ctx,
 }
 
 template <typename TensorType>
-void reset_parameter_vector(const std::vector<TensorType>& raw_params_vec,
-                            int num_layers,
-                            int gate_num,
-                            bool is_bidirec,
-                            std::vector<std::vector<DenseTensor>>* params_vec) {
+void ResetParameterVector(const std::vector<TensorType>& raw_params_vec,
+                          int num_layers,
+                          int gate_num,
+                          bool is_bidirec,
+                          std::vector<std::vector<DenseTensor>>* params_vec) {
   // the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers
   // + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to
   // ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers
@@ -113,11 +113,11 @@ void reset_parameter_vector(const std::vector<TensorType>& raw_params_vec,
 }
 
 template <typename T>
-void dropout_helper(const CPUContext& dev_ctx,
-                    DenseTensor* x,
-                    DenseTensor* y,
-                    const DenseTensor* mask,
-                    float dropout_prob) {
+void DropoutHelper(const CPUContext& dev_ctx,
+                   DenseTensor* x,
+                   DenseTensor* y,
+                   const DenseTensor* mask,
+                   float dropout_prob) {
   auto& place = *dev_ctx.eigen_device();
   auto dropout_mask = EigenVector<uint8_t>::Flatten(*mask);
   auto in = EigenVector<T>::Flatten(*x);
@@ -131,14 +131,14 @@ void dropout_helper(const CPUContext& dev_ctx,
 }
 
 template <typename T>
-void dropout_cpu_function_inplace(const CPUContext& dev_ctx,
-                                  DenseTensor* x,
-                                  DenseTensor* y,
-                                  DenseTensor* mask,
-                                  const float& dropout_prob,
-                                  const int& seed_number,
-                                  bool is_test,
-                                  bool* is_has_reset) {
+void DropoutCpuFunctionInplace(const CPUContext& dev_ctx,
+                               DenseTensor* x,
+                               DenseTensor* y,
+                               DenseTensor* mask,
+                               const float& dropout_prob,
+                               const int& seed_number,
+                               bool is_test,
+                               bool* is_has_reset) {
   if (is_test) {
     return;
   }
@@ -161,7 +161,7 @@ void dropout_cpu_function_inplace(const CPUContext& dev_ctx,
     }
     *is_has_reset = true;
   }
-  dropout_helper<T>(dev_ctx, x, y, mask, dropout_prob);
+  DropoutHelper<T>(dev_ctx, x, y, mask, dropout_prob);
 }
 
 template <typename Context, typename TensorType>
@@ -302,7 +302,7 @@ void RnnFunc(const Context& dev_ctx,
 
   std::vector<std::vector<DenseTensor>> parameter_lists;
   parameter_lists.reserve(num_layers);
-  reset_parameter_vector(
+  ResetParameterVector(
       weight_list, num_layers, gate_num, is_bidirec, &parameter_lists);
 
   DenseTensor gate_data, cell_data, cell_act_data, hidden_data;
@@ -373,14 +373,14 @@ void RnnFunc(const Context& dev_ctx,
         prev_hidden_data = hidden_data.Slice(i - 1, i);
         input_holder->Resize(output->dims());
         if (dropout_prob != 0) {
-          dropout_cpu_function_inplace<T>(dev_ctx,
-                                          &prev_hidden_data,
-                                          input_holder,
-                                          dropout_mask,
-                                          dropout_prob,
-                                          seed,
-                                          is_test,
-                                          &has_dropout_reset);
+          DropoutCpuFunctionInplace<T>(dev_ctx,
+                                       &prev_hidden_data,
+                                       input_holder,
+                                       dropout_mask,
+                                       dropout_prob,
+                                       seed,
+                                       is_test,
+                                       &has_dropout_reset);
         } else {
           input_holder = &prev_hidden_data;
           input_holder->Resize(output->dims());

diff --git a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc
@@ -396,7 +396,7 @@ struct GradLayer {
     int mask_min_length = time_step;
     if (has_sequence_length) {
       mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));
-      create_mask_matrix<T>(
+      CreateMaskMatrix<T>(
           dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
       mask_tensor_list = Unbind(mask_matrix);
     }
@@ -958,7 +958,7 @@ void dropout_cpu_grad_function_inplace(const CPUContext& dev_ctx,
                                        DenseTensor* grad_x,
                                        const DenseTensor* mask,
                                        float dropout_prob) {
-  dropout_helper<T>(dev_ctx, grad_x, grad_x, mask, dropout_prob);
+  DropoutHelper<T>(dev_ctx, grad_x, grad_x, mask, dropout_prob);
 }
 
 template <typename GradCellType,
@@ -1032,19 +1032,19 @@ void RnnGradFunc(const CPUContext& dev_ctx,
   // reset the parameter to sorted order and allocate the memory
   std::vector<std::vector<DenseTensor>> parameter_lists;
   parameter_lists.reserve(num_layers);
-  reset_parameter_vector(
+  ResetParameterVector(
       weight_list, num_layers, gate_num, is_bidirec, &parameter_lists);
 
   for (unsigned int i = 0; i < weight_grad_list.size(); ++i) {
     dev_ctx.Alloc<T>(weight_grad_list[i]);
   }
   std::vector<std::vector<DenseTensor>> parameter_lists_grad;
   parameter_lists_grad.reserve(num_layers);
-  reset_parameter_vector(weight_grad_list,
-                         num_layers,
-                         gate_num,
-                         is_bidirec,
-                         &parameter_lists_grad);
+  ResetParameterVector(weight_grad_list,
+                       num_layers,
+                       gate_num,
+                       is_bidirec,
+                       &parameter_lists_grad);
 
   // resolve the state of reverse_state
   DenseTensor gate_tensor;
@@ -1148,11 +1148,11 @@ void RnnGradFunc(const CPUContext& dev_ctx,
         layer_input.Resize(hidden_tensor_unbind[i - 1].dims());
         dev_ctx.Alloc<T>(&layer_input);
       }
-      dropout_helper<T>(dev_ctx,
-                        &hidden_tensor_unbind[i - 1],
-                        &layer_input,
-                        &dropout_state,
-                        dropout_prob);
+      DropoutHelper<T>(dev_ctx,
+                       &hidden_tensor_unbind[i - 1],
+                       &layer_input,
+                       &dropout_state,
+                       dropout_prob);
     } else {
       layer_input.ShareDataWith(x);
     }

diff --git a/paddle/phi/kernels/cpu/rnn_kernel.cc b/paddle/phi/kernels/cpu/rnn_kernel.cc
@@ -357,7 +357,7 @@ struct Layer {
     if (has_sequence_length) {
       mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));
 
-      create_mask_matrix<T>(
+      CreateMaskMatrix<T>(
           dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
       mask_tensor_list = Unbind(mask_matrix);
     }
@@ -556,7 +556,7 @@ struct Layer {
     int mask_min_length = time_step;
     if (has_sequence_length) {
       mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));
-      create_mask_matrix<T>(
+      CreateMaskMatrix<T>(
           dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
       mask_tensor_list = Unbind(mask_matrix);
     }

diff --git a/paddle/phi/kernels/gpu/rnn_functor.h b/paddle/phi/kernels/gpu/rnn_functor.h
@@ -283,7 +283,7 @@ class RNNDescriptors {
 };
 
 template <typename T, typename Type>
-bool is_continuous(const Type &weight_list) {
+bool IsContinuous(const Type &weight_list) {
   bool continuous = true;
   for (size_t i = 0; i < weight_list.size() - 1; ++i) {
     auto *in_data = weight_list[i]->template data<T>();
@@ -296,10 +296,10 @@ bool is_continuous(const Type &weight_list) {
 }
 
 template <typename T>
-void weight_to_tensor(const Place &place,
-                      gpuStream_t stream,
-                      const std::vector<const DenseTensor *> &weight_list,
-                      DenseTensor *weight) {
+void WeightToTensor(const Place &place,
+                    gpuStream_t stream,
+                    const std::vector<const DenseTensor *> &weight_list,
+                    DenseTensor *weight) {
   auto weight_data = weight->data<T>();
   int weight_offset = 0;
   for (size_t i = 0; i < weight_list.size(); ++i) {
@@ -318,11 +318,11 @@ void weight_to_tensor(const Place &place,
 
 #ifdef PADDLE_WITH_HIP
 template <typename T>
-void weight_list_to_tensor(const Place &place,
-                           gpuStream_t stream,
-                           const std::vector<DenseTensor> &tensor_list,
-                           DenseTensor *weight_whole,
-                           const size_t offset = 0UL) {
+void WeightListToTensor(const Place &place,
+                        gpuStream_t stream,
+                        const std::vector<DenseTensor> &tensor_list,
+                        DenseTensor *weight_whole,
+                        const size_t offset = 0UL) {
   size_t weight_offset = offset;
   auto weight_data = weight_whole->data<T>();
 
@@ -340,12 +340,12 @@ void weight_list_to_tensor(const Place &place,
 }
 
 template <typename T>
-void weight_to_permuted_tensor(const Place &place,
-                               gpuStream_t stream,
-                               std::vector<const DenseTensor *> *weight_list,
-                               DenseTensor *weight_whole,
-                               const gpuRNNMode_t rnn_mode,
-                               const bool is_bidirec) {
+void WeightToPermutedTensor(const Place &place,
+                            gpuStream_t stream,
+                            std::vector<const DenseTensor *> *weight_list,
+                            DenseTensor *weight_whole,
+                            const gpuRNNMode_t rnn_mode,
+                            const bool is_bidirec) {
   if (is_bidirec) {
     for (size_t i = 0; i < weight_list->size(); i += 4) {
       auto tmp = (*weight_list)[i + 1];
@@ -357,22 +357,21 @@ void weight_to_permuted_tensor(const Place &place,
   for (size_t i = 0; i < weight_list->size(); ++i) {
     if (rnn_mode == miopenLSTM) {
       std::vector<DenseTensor> split_tensor = (*weight_list)[i]->Chunk(4, 0);
-      weight_list_to_tensor<T>(
+      WeightListToTensor<T>(
           place,
           stream,
           {split_tensor[0], split_tensor[1], split_tensor[3], split_tensor[2]},
           weight_whole,
           weight_offset);
     } else if (rnn_mode == miopenGRU) {
       std::vector<DenseTensor> split_tensor = (*weight_list)[i]->Chunk(3, 0);
-      weight_list_to_tensor<T>(
-          place,
-          stream,
-          {split_tensor[1], split_tensor[0], split_tensor[2]},
-          weight_whole,
-          weight_offset);
+      WeightListToTensor<T>(place,
+                            stream,
+                            {split_tensor[1], split_tensor[0], split_tensor[2]},
+                            weight_whole,
+                            weight_offset);
     } else {
-      weight_list_to_tensor<T>(
+      WeightListToTensor<T>(
           place, stream, {*(*weight_list)[i]}, weight_whole, weight_offset);
     }
     weight_offset += (*weight_list)[i]->numel();

diff --git a/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
@@ -81,7 +81,7 @@ void RnnGradKernel(const Context &dev_ctx,
       0,
       [](int64_t num, const DenseTensor *t) { return num + t->numel(); });
   bool continuous =
-      is_continuous<T, std::vector<const DenseTensor *>>(weight_list);
+      IsContinuous<T, std::vector<const DenseTensor *>>(weight_list);
   auto stream = dev_ctx.stream();
   DenseTensor weight_whole;
   T *weight_data = nullptr;
@@ -96,10 +96,11 @@ void RnnGradKernel(const Context &dev_ctx,
     dev_ctx.template Alloc<T>(&weight_whole);
 #ifdef PADDLE_WITH_HIP
     // MIOPEN need to permute weight for miopenLSTM or miopenGRU
-    weight_to_permuted_tensor<T>(
-        place, stream, &weight_list, &weight_whole, rnn_mode, is_bidirec);
+    std::vector<const DenseTensor *> weight_list_tmp = weight_list;
+    WeightToPermutedTensor<T>(
+        place, stream, &weight_list_tmp, &weight_whole, rnn_mode, is_bidirec);
 #else
-    weight_to_tensor<T>(place, stream, weight_list, &weight_whole);
+    WeightToTensor<T>(place, stream, weight_list, &weight_whole);
 #endif
     weight_data = weight_whole.data<T>();
   } else {

diff --git a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
@@ -234,7 +234,7 @@ void RnnKernel(const Context &dev_ctx,
       0,
       [](int64_t num, const DenseTensor *t) { return num + t->numel(); });
   bool continuous =
-      is_continuous<T, std::vector<const DenseTensor *>>(weight_list);
+      IsContinuous<T, std::vector<const DenseTensor *>>(weight_list);
 #ifdef PADDLE_WITH_HIP
   // Need to permute weight, set continuous to false
   continuous = false;
@@ -248,10 +248,11 @@ void RnnKernel(const Context &dev_ctx,
     dev_ctx.template Alloc<T>(&weight_whole);
 #ifdef PADDLE_WITH_HIP
     // MIOPEN need to permute weight for miopenLSTM or miopenGRU
-    weight_to_permuted_tensor<T>(
-        place, stream, &weight_list, &weight_whole, rnn_mode, is_bidirec);
+    std::vector<const DenseTensor *> weight_list_tmp = weight_list;
+    WeightToPermutedTensor<T>(
+        place, stream, &weight_list_tmp, &weight_whole, rnn_mode, is_bidirec);
 #else
-    weight_to_tensor<T>(place, stream, weight_list, &weight_whole);
+    WeightToTensor<T>(place, stream, weight_list, &weight_whole);
 #endif
     w_data = weight_whole.data<T>();
 #ifndef PADDLE_WITH_HIP