Enhance GPU kernel of sequence erase op #7603

kuke · 2018-01-17T09:59:18Z

Resolve #7430

wanghaoshuang · 2018-01-19T02:20:31Z

paddle/operators/sequence_erase_op.cu

-__global__ void LabelErasedIdx(const T* in_dat, const int in_len,
-                               const T* tokens, const int tokens_len,
-                               int* num_erased) {
+__global__ void LabelErasedIdx(const T* in_dat, const int64_t in_len,


Why in_len use int64_t while tokens_len is size_t?

They have different data type.

wanghaoshuang · 2018-01-19T02:22:35Z

paddle/operators/sequence_erase_op.cu

  int index = blockIdx.x * blockDim.x + threadIdx.x;
  if (index < in_len) {
    int erased = 0;
-    for (int i = 0; i < tokens_len; ++i) {
+    for (size_t i = 0; i < tokens_len; ++i) {
      if (in_dat[index] == tokens[i]) {
        erased = 1;


Add a break here?

wanghaoshuang · 2018-01-19T02:29:51Z

paddle/operators/sequence_erase_op.cu

+    int* dev_tokens_ptr = thrust::raw_pointer_cast(dev_tokens.data());
+
+    // Count number of elements to be erased
+    thrust::device_vector<size_t> num_erased(in_len + 1);


We can set num_erased[0]=0 here to avoid checking if index==0 in every threads,

wanghaoshuang · 2018-01-19T02:41:38Z

paddle/operators/sequence_erase_op.cu

+}
+
+template <typename T>
+std::vector<T> get_std_vector(thrust::device_vector<T>& dev_vec) {


Please ensure that Vector in LoD must be thrust::host_vector in .cu file. Is it necessary converting device_vector to std::vector?

wanghaoshuang · 2018-01-19T02:49:38Z

paddle/operators/sequence_erase_op.cu

      out_dat[index - num_erased[index]] = in_dat[index];
    }
  }
 }

+template <typename T, typename Vector>
+thrust::device_vector<T> set_device_vector(Vector& vector) {


You can have a try like this:

device_vector<int> D(vector.begin(), vector.end());

wanghaoshuang · 2018-01-19T02:53:04Z

paddle/operators/sequence_erase_op.cu

-    int* dev_in_lod_ptr = thrust::raw_pointer_cast(dev_in_lod.data());
-    int* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
+    thrust::device_vector<size_t> dev_in_lod =
+        set_device_vector<size_t, paddle::framework::Vector<size_t>>(lod0);


thrust::device_vector<size_t> dev_in_lod(lod0.begin(), lod0.end());

This should work.

wanghaoshuang · 2018-01-19T02:57:44Z

paddle/operators/sequence_erase_op.cu

@@ -72,53 +91,46 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
    PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(),
                      "The actual size mismatches with the LoD information.");
-    auto tokens = ctx.Attr<std::vector<T>>("tokens");
-    auto tokens_len = tokens.size();
+    auto tokens = ctx.Attr<std::vector<int>>("tokens");
    auto in_len = in->numel();
    auto in_dat = in->data<T>();


Additionally, We should registry an int64_t kernel.

wanghaoshuang · 2018-01-19T02:58:24Z

python/paddle/v2/fluid/tests/test_sequence_erase_op.py

+class TestSequenceEraseOpEmpty(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_erase"
+        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")


Add test for int64_t input.

kuke

Updated. Thx

kuke · 2018-01-19T03:54:53Z

paddle/operators/sequence_erase_op.cu

-__global__ void LabelErasedIdx(const T* in_dat, const int in_len,
-                               const T* tokens, const int tokens_len,
-                               int* num_erased) {
+__global__ void LabelErasedIdx(const T* in_dat, const int64_t in_len,


They have different data type.

kuke · 2018-01-19T03:55:00Z

paddle/operators/sequence_erase_op.cu

  int index = blockIdx.x * blockDim.x + threadIdx.x;
  if (index < in_len) {
    int erased = 0;
-    for (int i = 0; i < tokens_len; ++i) {
+    for (size_t i = 0; i < tokens_len; ++i) {
      if (in_dat[index] == tokens[i]) {
        erased = 1;


kuke · 2018-01-19T03:55:12Z

paddle/operators/sequence_erase_op.cu

      out_dat[index - num_erased[index]] = in_dat[index];
    }
  }
 }

+template <typename T, typename Vector>
+thrust::device_vector<T> set_device_vector(Vector& vector) {


kuke · 2018-01-19T03:55:26Z

paddle/operators/sequence_erase_op.cu

+}
+
+template <typename T>
+std::vector<T> get_std_vector(thrust::device_vector<T>& dev_vec) {


kuke · 2018-01-19T03:55:32Z

paddle/operators/sequence_erase_op.cu

@@ -72,53 +91,46 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
    PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(),
                      "The actual size mismatches with the LoD information.");
-    auto tokens = ctx.Attr<std::vector<T>>("tokens");
-    auto tokens_len = tokens.size();
+    auto tokens = ctx.Attr<std::vector<int>>("tokens");
    auto in_len = in->numel();
    auto in_dat = in->data<T>();


kuke · 2018-01-19T03:55:38Z

paddle/operators/sequence_erase_op.cu

+    int* dev_tokens_ptr = thrust::raw_pointer_cast(dev_tokens.data());
+
+    // Count number of elements to be erased
+    thrust::device_vector<size_t> num_erased(in_len + 1);


kuke · 2018-01-19T03:55:45Z

paddle/operators/sequence_erase_op.cu

-    int* dev_in_lod_ptr = thrust::raw_pointer_cast(dev_in_lod.data());
-    int* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
+    thrust::device_vector<size_t> dev_in_lod =
+        set_device_vector<size_t, paddle::framework::Vector<size_t>>(lod0);


kuke · 2018-01-19T03:55:51Z

python/paddle/v2/fluid/tests/test_sequence_erase_op.py

+class TestSequenceEraseOpEmpty(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_erase"
+        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")


wanghaoshuang

LGTM

Yibing Liu added 3 commits January 17, 2018 00:06

Fix a bug in sequence_erase_op

7d3b2e4

Refine the GPU kernel for sequence_erase_op

d1d614b

Add unit test case for no tokens to be erased

5ae0c97

kuke requested review from qingqing01 and wanghaoshuang January 17, 2018 09:59

Unify data type in sequence_erase_op

7a2aa48

wanghaoshuang reviewed Jan 19, 2018

View reviewed changes

Remove unnecessary dtype conversion & register int64 kernels

8809d43

kuke commented Jan 19, 2018

View reviewed changes

wanghaoshuang approved these changes Jan 19, 2018

View reviewed changes

kuke merged commit a1c281f into PaddlePaddle:develop Jan 19, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Enhance GPU kernel of sequence erase op #7603

Enhance GPU kernel of sequence erase op #7603

kuke commented Jan 17, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang Jan 19, 2018

kuke Jan 19, 2018

kuke left a comment

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

kuke Jan 19, 2018

wanghaoshuang left a comment

Enhance GPU kernel of sequence erase op #7603

Enhance GPU kernel of sequence erase op #7603

Conversation

kuke commented Jan 17, 2018

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

kuke left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

wanghaoshuang left a comment

Choose a reason for hiding this comment