From 4806fa8446234c5c713219058a8ab07c32aeb2a4 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Sun, 29 Oct 2017 17:16:12 -0700
Subject: [PATCH 1/3] Fix Type error

---
 paddle/operators/top_k_op.h                       | 4 ++--
 python/paddle/v2/framework/tests/test_top_k_op.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/paddle/operators/top_k_op.h b/paddle/operators/top_k_op.h
index 4b248faa120bc..494ac699cb463 100644
--- a/paddle/operators/top_k_op.h
+++ b/paddle/operators/top_k_op.h
@@ -40,7 +40,7 @@ class TopkKernel : public framework::OpKernel<T> {
     const size_t k = static_cast<int>(ctx.Attr<int>("k"));
 
     T* output_data = output->mutable_data<T>(ctx.GetPlace());
-    T* indices_data = indices->mutable_data<T>(ctx.GetPlace());
+    int64_t* indices_data = indices->mutable_data<int64_t>(ctx.GetPlace());
 
     auto eg_input = EigenMatrix<T>::From(*input);
 
@@ -66,7 +66,7 @@ class TopkKernel : public framework::OpKernel<T> {
           });
       for (size_t j = 0; j < k; j++) {
         output_data[i * k + j] = vec[j].first;
-        indices_data[i * k + j] = vec[j].second;
+        indices_data[i * k + j] = int64(vec[j].second);
       }
     }
   }
diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/framework/tests/test_top_k_op.py
index 694f37d612d4c..6e8fbefa6eafa 100644
--- a/python/paddle/v2/framework/tests/test_top_k_op.py
+++ b/python/paddle/v2/framework/tests/test_top_k_op.py
@@ -9,7 +9,7 @@ def setUp(self):
         k = 1
         input = np.random.random((32, 84)).astype("float32")
         output = np.ndarray((32, k))
-        indices = np.ndarray((32, k))
+        indices = np.ndarray((32, k)).astype("int64")
 
         self.inputs = {'X': input}
         self.attrs = {'k': k}
@@ -32,7 +32,7 @@ def setUp(self):
         input = np.random.random((32, 2, 84)).astype("float32")
         input_flat_2d = input.reshape(64, 84)
         output = np.ndarray((64, k))
-        indices = np.ndarray((64, k)).astype("int")
+        indices = np.ndarray((64, k)).astype("int64")
 
         # FIXME: should use 'X': input for a 3d input
         self.inputs = {'X': input_flat_2d}

From 37ae86bc32643c59ee006c1d99d99e63eae623bf Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Sun, 29 Oct 2017 17:23:11 -0700
Subject: [PATCH 2/3] Fix error

---
 paddle/operators/top_k_op.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/operators/top_k_op.h b/paddle/operators/top_k_op.h
index 494ac699cb463..bc8563717a21b 100644
--- a/paddle/operators/top_k_op.h
+++ b/paddle/operators/top_k_op.h
@@ -66,7 +66,7 @@ class TopkKernel : public framework::OpKernel<T> {
           });
       for (size_t j = 0; j < k; j++) {
         output_data[i * k + j] = vec[j].first;
-        indices_data[i * k + j] = int64(vec[j].second);
+        indices_data[i * k + j] = int64_t(vec[j].second);
       }
     }
   }

From 74780b1afda946abc099bb62faea5a118bfad09a Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 30 Oct 2017 11:46:42 -0700
Subject: [PATCH 3/3] Fix top_k_op GPU code data type

---
 paddle/operators/top_k_op.cu | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/paddle/operators/top_k_op.cu b/paddle/operators/top_k_op.cu
index 7be6932f1e301..7851c71bbe9fe 100644
--- a/paddle/operators/top_k_op.cu
+++ b/paddle/operators/top_k_op.cu
@@ -23,9 +23,9 @@ using Tensor = framework::Tensor;
 template <typename T>
 struct Pair {
   __device__ __forceinline__ Pair() {}
-  __device__ __forceinline__ Pair(T value, int id) : v(value), id(id) {}
+  __device__ __forceinline__ Pair(T value, int64_t id) : v(value), id(id) {}
 
-  __device__ __forceinline__ void set(T value, int id) {
+  __device__ __forceinline__ void set(T value, int64_t id) {
     v = value;
     id = id;
   }
@@ -48,7 +48,7 @@ struct Pair {
   }
 
   T v;
-  int id;
+  int64_t id;
 };
 
 template <typename T>
@@ -197,7 +197,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int& beam,
 template <typename T, int MaxLength, int BlockSize>
 __device__ __forceinline__ void BlockReduce(Pair<T>* sh_topk, int* maxid,
                                             Pair<T> topk[], T** topVal,
-                                            int** topIds, int& beam, int& k,
+                                            int64_t** topIds, int& beam, int& k,
                                             const int tid, const int warp) {
   while (true) {
     __syncthreads();
@@ -249,7 +249,7 @@ __device__ __forceinline__ void BlockReduce(Pair<T>* sh_topk, int* maxid,
  * 4. go to the first setp, until get the topk value.
  */
 template <typename T, int MaxLength, int BlockSize>
-__global__ void KeMatrixTopK(T* output, int output_stride, int* indices,
+__global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
                              const T* src, int lds, int dim, int k) {
   __shared__ Pair<T> sh_topk[BlockSize];
   __shared__ int maxid[BlockSize / 2];
@@ -293,7 +293,7 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
 
     T* output_data = output->mutable_data<T>(ctx.GetPlace());
     // FIXME(typhoonzero): data is always converted to type T?
-    int* indices_data = indices->mutable_data<int>(ctx.GetPlace());
+    int64_t* indices_data = indices->mutable_data<int64_t>(ctx.GetPlace());
 
     size_t input_height = input->dims()[0];
     size_t input_width = input->dims()[1];