Skip to content

Commit

Permalink
fix HIP bug
Browse files Browse the repository at this point in the history
  • Loading branch information
zyfncg committed Mar 29, 2022
1 parent 1fd5e60 commit f539ed8
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 80 deletions.
66 changes: 33 additions & 33 deletions paddle/phi/kernels/cpu/rnn_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ inline void SwapPoniter(DenseTensor** a, DenseTensor** b) {
}

template <typename T>
void create_mask_matrix(const CPUContext& dev_ctx,
const DenseTensor* sequence_length,
DenseTensor* mask_matrix,
const bool& is_reverse,
int* min_seq_len) {
void CreateMaskMatrix(const CPUContext& dev_ctx,
const DenseTensor* sequence_length,
DenseTensor* mask_matrix,
const bool& is_reverse,
int* min_seq_len) {
const auto& seq_len_vec =
paddle::operators::GetDataFromTensor<int>(sequence_length);
const int table_width = mask_matrix->dims()[0];
Expand Down Expand Up @@ -80,11 +80,11 @@ void create_mask_matrix(const CPUContext& dev_ctx,
}

template <typename TensorType>
void reset_parameter_vector(const std::vector<TensorType>& raw_params_vec,
int num_layers,
int gate_num,
bool is_bidirec,
std::vector<std::vector<DenseTensor>>* params_vec) {
void ResetParameterVector(const std::vector<TensorType>& raw_params_vec,
int num_layers,
int gate_num,
bool is_bidirec,
std::vector<std::vector<DenseTensor>>* params_vec) {
// the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers
// + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to
// ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers
Expand Down Expand Up @@ -113,11 +113,11 @@ void reset_parameter_vector(const std::vector<TensorType>& raw_params_vec,
}

template <typename T>
void dropout_helper(const CPUContext& dev_ctx,
DenseTensor* x,
DenseTensor* y,
const DenseTensor* mask,
float dropout_prob) {
void DropoutHelper(const CPUContext& dev_ctx,
DenseTensor* x,
DenseTensor* y,
const DenseTensor* mask,
float dropout_prob) {
auto& place = *dev_ctx.eigen_device();
auto dropout_mask = EigenVector<uint8_t>::Flatten(*mask);
auto in = EigenVector<T>::Flatten(*x);
Expand All @@ -131,14 +131,14 @@ void dropout_helper(const CPUContext& dev_ctx,
}

template <typename T>
void dropout_cpu_function_inplace(const CPUContext& dev_ctx,
DenseTensor* x,
DenseTensor* y,
DenseTensor* mask,
const float& dropout_prob,
const int& seed_number,
bool is_test,
bool* is_has_reset) {
void DropoutCpuFunctionInplace(const CPUContext& dev_ctx,
DenseTensor* x,
DenseTensor* y,
DenseTensor* mask,
const float& dropout_prob,
const int& seed_number,
bool is_test,
bool* is_has_reset) {
if (is_test) {
return;
}
Expand All @@ -161,7 +161,7 @@ void dropout_cpu_function_inplace(const CPUContext& dev_ctx,
}
*is_has_reset = true;
}
dropout_helper<T>(dev_ctx, x, y, mask, dropout_prob);
DropoutHelper<T>(dev_ctx, x, y, mask, dropout_prob);
}

template <typename Context, typename TensorType>
Expand Down Expand Up @@ -302,7 +302,7 @@ void RnnFunc(const Context& dev_ctx,

std::vector<std::vector<DenseTensor>> parameter_lists;
parameter_lists.reserve(num_layers);
reset_parameter_vector(
ResetParameterVector(
weight_list, num_layers, gate_num, is_bidirec, &parameter_lists);

DenseTensor gate_data, cell_data, cell_act_data, hidden_data;
Expand Down Expand Up @@ -373,14 +373,14 @@ void RnnFunc(const Context& dev_ctx,
prev_hidden_data = hidden_data.Slice(i - 1, i);
input_holder->Resize(output->dims());
if (dropout_prob != 0) {
dropout_cpu_function_inplace<T>(dev_ctx,
&prev_hidden_data,
input_holder,
dropout_mask,
dropout_prob,
seed,
is_test,
&has_dropout_reset);
DropoutCpuFunctionInplace<T>(dev_ctx,
&prev_hidden_data,
input_holder,
dropout_mask,
dropout_prob,
seed,
is_test,
&has_dropout_reset);
} else {
input_holder = &prev_hidden_data;
input_holder->Resize(output->dims());
Expand Down
26 changes: 13 additions & 13 deletions paddle/phi/kernels/cpu/rnn_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ struct GradLayer {
int mask_min_length = time_step;
if (has_sequence_length) {
mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));
create_mask_matrix<T>(
CreateMaskMatrix<T>(
dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
mask_tensor_list = Unbind(mask_matrix);
}
Expand Down Expand Up @@ -958,7 +958,7 @@ void dropout_cpu_grad_function_inplace(const CPUContext& dev_ctx,
DenseTensor* grad_x,
const DenseTensor* mask,
float dropout_prob) {
dropout_helper<T>(dev_ctx, grad_x, grad_x, mask, dropout_prob);
DropoutHelper<T>(dev_ctx, grad_x, grad_x, mask, dropout_prob);
}

template <typename GradCellType,
Expand Down Expand Up @@ -1032,19 +1032,19 @@ void RnnGradFunc(const CPUContext& dev_ctx,
// reset the parameter to sorted order and allocate the memory
std::vector<std::vector<DenseTensor>> parameter_lists;
parameter_lists.reserve(num_layers);
reset_parameter_vector(
ResetParameterVector(
weight_list, num_layers, gate_num, is_bidirec, &parameter_lists);

for (unsigned int i = 0; i < weight_grad_list.size(); ++i) {
dev_ctx.Alloc<T>(weight_grad_list[i]);
}
std::vector<std::vector<DenseTensor>> parameter_lists_grad;
parameter_lists_grad.reserve(num_layers);
reset_parameter_vector(weight_grad_list,
num_layers,
gate_num,
is_bidirec,
&parameter_lists_grad);
ResetParameterVector(weight_grad_list,
num_layers,
gate_num,
is_bidirec,
&parameter_lists_grad);

// resolve the state of reverse_state
DenseTensor gate_tensor;
Expand Down Expand Up @@ -1148,11 +1148,11 @@ void RnnGradFunc(const CPUContext& dev_ctx,
layer_input.Resize(hidden_tensor_unbind[i - 1].dims());
dev_ctx.Alloc<T>(&layer_input);
}
dropout_helper<T>(dev_ctx,
&hidden_tensor_unbind[i - 1],
&layer_input,
&dropout_state,
dropout_prob);
DropoutHelper<T>(dev_ctx,
&hidden_tensor_unbind[i - 1],
&layer_input,
&dropout_state,
dropout_prob);
} else {
layer_input.ShareDataWith(x);
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/rnn_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ struct Layer {
if (has_sequence_length) {
mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));

create_mask_matrix<T>(
CreateMaskMatrix<T>(
dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
mask_tensor_list = Unbind(mask_matrix);
}
Expand Down Expand Up @@ -556,7 +556,7 @@ struct Layer {
int mask_min_length = time_step;
if (has_sequence_length) {
mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]}));
create_mask_matrix<T>(
CreateMaskMatrix<T>(
dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length);
mask_tensor_list = Unbind(mask_matrix);
}
Expand Down
47 changes: 23 additions & 24 deletions paddle/phi/kernels/gpu/rnn_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ class RNNDescriptors {
};

template <typename T, typename Type>
bool is_continuous(const Type &weight_list) {
bool IsContinuous(const Type &weight_list) {
bool continuous = true;
for (size_t i = 0; i < weight_list.size() - 1; ++i) {
auto *in_data = weight_list[i]->template data<T>();
Expand All @@ -296,10 +296,10 @@ bool is_continuous(const Type &weight_list) {
}

template <typename T>
void weight_to_tensor(const Place &place,
gpuStream_t stream,
const std::vector<const DenseTensor *> &weight_list,
DenseTensor *weight) {
void WeightToTensor(const Place &place,
gpuStream_t stream,
const std::vector<const DenseTensor *> &weight_list,
DenseTensor *weight) {
auto weight_data = weight->data<T>();
int weight_offset = 0;
for (size_t i = 0; i < weight_list.size(); ++i) {
Expand All @@ -318,11 +318,11 @@ void weight_to_tensor(const Place &place,

#ifdef PADDLE_WITH_HIP
template <typename T>
void weight_list_to_tensor(const Place &place,
gpuStream_t stream,
const std::vector<DenseTensor> &tensor_list,
DenseTensor *weight_whole,
const size_t offset = 0UL) {
void WeightListToTensor(const Place &place,
gpuStream_t stream,
const std::vector<DenseTensor> &tensor_list,
DenseTensor *weight_whole,
const size_t offset = 0UL) {
size_t weight_offset = offset;
auto weight_data = weight_whole->data<T>();

Expand All @@ -340,12 +340,12 @@ void weight_list_to_tensor(const Place &place,
}

template <typename T>
void weight_to_permuted_tensor(const Place &place,
gpuStream_t stream,
std::vector<const DenseTensor *> *weight_list,
DenseTensor *weight_whole,
const gpuRNNMode_t rnn_mode,
const bool is_bidirec) {
void WeightToPermutedTensor(const Place &place,
gpuStream_t stream,
std::vector<const DenseTensor *> *weight_list,
DenseTensor *weight_whole,
const gpuRNNMode_t rnn_mode,
const bool is_bidirec) {
if (is_bidirec) {
for (size_t i = 0; i < weight_list->size(); i += 4) {
auto tmp = (*weight_list)[i + 1];
Expand All @@ -357,22 +357,21 @@ void weight_to_permuted_tensor(const Place &place,
for (size_t i = 0; i < weight_list->size(); ++i) {
if (rnn_mode == miopenLSTM) {
std::vector<DenseTensor> split_tensor = (*weight_list)[i]->Chunk(4, 0);
weight_list_to_tensor<T>(
WeightListToTensor<T>(
place,
stream,
{split_tensor[0], split_tensor[1], split_tensor[3], split_tensor[2]},
weight_whole,
weight_offset);
} else if (rnn_mode == miopenGRU) {
std::vector<DenseTensor> split_tensor = (*weight_list)[i]->Chunk(3, 0);
weight_list_to_tensor<T>(
place,
stream,
{split_tensor[1], split_tensor[0], split_tensor[2]},
weight_whole,
weight_offset);
WeightListToTensor<T>(place,
stream,
{split_tensor[1], split_tensor[0], split_tensor[2]},
weight_whole,
weight_offset);
} else {
weight_list_to_tensor<T>(
WeightListToTensor<T>(
place, stream, {*(*weight_list)[i]}, weight_whole, weight_offset);
}
weight_offset += (*weight_list)[i]->numel();
Expand Down
9 changes: 5 additions & 4 deletions paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void RnnGradKernel(const Context &dev_ctx,
0,
[](int64_t num, const DenseTensor *t) { return num + t->numel(); });
bool continuous =
is_continuous<T, std::vector<const DenseTensor *>>(weight_list);
IsContinuous<T, std::vector<const DenseTensor *>>(weight_list);
auto stream = dev_ctx.stream();
DenseTensor weight_whole;
T *weight_data = nullptr;
Expand All @@ -96,10 +96,11 @@ void RnnGradKernel(const Context &dev_ctx,
dev_ctx.template Alloc<T>(&weight_whole);
#ifdef PADDLE_WITH_HIP
// MIOPEN need to permute weight for miopenLSTM or miopenGRU
weight_to_permuted_tensor<T>(
place, stream, &weight_list, &weight_whole, rnn_mode, is_bidirec);
std::vector<const DenseTensor *> weight_list_tmp = weight_list;
WeightToPermutedTensor<T>(
place, stream, &weight_list_tmp, &weight_whole, rnn_mode, is_bidirec);
#else
weight_to_tensor<T>(place, stream, weight_list, &weight_whole);
WeightToTensor<T>(place, stream, weight_list, &weight_whole);
#endif
weight_data = weight_whole.data<T>();
} else {
Expand Down
9 changes: 5 additions & 4 deletions paddle/phi/kernels/gpu/rnn_kernel.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void RnnKernel(const Context &dev_ctx,
0,
[](int64_t num, const DenseTensor *t) { return num + t->numel(); });
bool continuous =
is_continuous<T, std::vector<const DenseTensor *>>(weight_list);
IsContinuous<T, std::vector<const DenseTensor *>>(weight_list);
#ifdef PADDLE_WITH_HIP
// Need to permute weight, set continuous to false
continuous = false;
Expand All @@ -248,10 +248,11 @@ void RnnKernel(const Context &dev_ctx,
dev_ctx.template Alloc<T>(&weight_whole);
#ifdef PADDLE_WITH_HIP
// MIOPEN need to permute weight for miopenLSTM or miopenGRU
weight_to_permuted_tensor<T>(
place, stream, &weight_list, &weight_whole, rnn_mode, is_bidirec);
std::vector<const DenseTensor *> weight_list_tmp = weight_list;
WeightToPermutedTensor<T>(
place, stream, &weight_list_tmp, &weight_whole, rnn_mode, is_bidirec);
#else
weight_to_tensor<T>(place, stream, weight_list, &weight_whole);
WeightToTensor<T>(place, stream, weight_list, &weight_whole);
#endif
w_data = weight_whole.data<T>();
#ifndef PADDLE_WITH_HIP
Expand Down

0 comments on commit f539ed8

Please sign in to comment.