add the transpose op #3920

NHZlX · 2017-09-06T09:43:05Z

QiJune · 2017-09-07T09:45:22Z

paddle/operators/transpose_op.cu

+  int* host_buffer_data = host_buffer.mutable_data<int>(buffer_dims, cpu_place);
+
+  auto offset_buffer =
+      memory::Alloc(context.GetPlace(), ndims * 3 * sizeof(int));


这里接口使用不当，context.GetPlace() 返回的是 Place，是一个boost::variant<CPUPlace, GPUPlace>，而Alloc需要接收一个确定的GPUPlace or CPUPlace

QiJune · 2017-09-07T09:46:29Z

paddle/operators/transpose_op.cu

+  }
+
+  memory::Copy(gpu_place, offset_buffer, cpu_place, host_buffer_data,
+               ndims * 3 * sizeof(int));


在memory copy的时候，需要指定对应的cuda Stream

template <typename DstPlace, typename SrcPlace> void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, cudaStream_t stream);

cuda stream 可以从context里面拿

… op_transpose

Xreki

Merge the latest develop first
Need more unit test cases. In particular, we need a unit test with large Tensor to test the CUDA kernel.

Xreki · 2017-09-14T08:53:57Z

paddle/operators/transpose_op.cc

+
+#include "paddle/operators/transpose_op.h"
+#include <vector>
+#include "paddle/framework/ddim.h"


line 16, 17 can be removed.

Xreki · 2017-09-14T09:01:02Z

paddle/operators/transpose_op.cc

+
+    PADDLE_ENFORCE_EQ(
+        in_dim_size, axis_size,
+        "the input tensor dimensions should be equal to the axis size");


Print the value of in_dim_size and axis_size in error message.
input tensor dimensions -> input tensor's dimension
axis size -> axis's size

Xreki · 2017-09-14T09:10:25Z

paddle/operators/transpose_op.cc

+    std::vector<int> axis_sorted(axis);
+    std::sort(axis_sorted.begin(), axis_sorted.end());
+    for (size_t i = 0; i < axis_sorted.size(); i++) {
+      PADDLE_ENFORCE_EQ(axis_sorted[i], (int)i,


Use static_cast<int>

Xreki · 2017-09-14T09:11:09Z

paddle/operators/transpose_op.cc

+    for (size_t i = 0; i < axis_sorted.size(); i++) {
+      PADDLE_ENFORCE_EQ(axis_sorted[i], (int)i,
+                        "the sorted axis should be [0, 1, ... dims - 1], "
+                        "the dims equals to the input tensor dimensions");


where the dims is the axis's size.

Xreki · 2017-09-14T09:13:28Z

paddle/operators/transpose_op.cc

+                   framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input of transpose op");
+    AddOutput("Out", "The output of transpose op");


Please refine the comments according to https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md.

Xreki · 2017-09-14T09:49:28Z

paddle/operators/transpose_op.h

+}
+
+template <typename Place, typename T, int Dims>
+void DoTranspose(const framework::ExecutionContext& context,


Since this function calls Eigen to do the transpose, how about renaming it to EigenCpuTranspose?

sounds great

Xreki · 2017-09-14T09:50:32Z

paddle/operators/transpose_op.h

+    out->mutable_data<T>(context.GetPlace());
+
+    auto axis_temp = context.Attr<std::vector<int>>("axis");
+    std::vector<int> axis(axis_temp);


How about : axis_temp -> axis and axis -> axis_grad

there is no grad for axis

Xreki · 2017-09-14T09:52:34Z

paddle/operators/transpose_op.h

+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* in = context.Input<framework::Tensor>(framework::GradVarName("Out"));
+    auto* out = context.Output<framework::Tensor>(framework::GradVarName("X"));


in -> in_grad, out -> out_grad

Xreki · 2017-09-14T10:01:38Z

paddle/operators/transpose_op.h

+      case 5:
+        DoTranspose<Place, T, 5>(context, *in, *out, axis);
+        break;
+      default:


When ndims is 1, calling NaiveCpuTranspose?

When ndmis > 5, calling NaiveCpuTranspose

Xreki · 2017-09-14T10:58:56Z

paddle/operators/transpose_op.cu

+namespace operators {
+
+template <typename T>
+__global__ void transpose_kernel(int nthreads, const T* in_data, T* out_data,


I think this CUDA function can be renamed to NaiveCUDATranspose, and we may need some optimized implementation in the future.

qingqing01 · 2017-09-14T12:46:57Z

paddle/operators/transpose_op.cc

+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto in_dim = ctx.Input<Tensor>("X")->dims();


InferShape里的check需要全面，对输入检查NOT_NULL，例如：https://github.com/PaddlePaddle/Paddle/pull/4086/files#diff-1fcd5ee1c1e63ed40789a0e60fdb1bf6R29

qingqing01 · 2017-09-14T12:52:28Z

paddle/operators/transpose_op.cc

+    for (size_t i = 0; i < axis.size(); i++) {
+      out_dim[i] = in_dim[axis[i]];
+    }
+    ctx.Output<Tensor>("Out")->Resize(out_dim);


现在需要在InferShape里对Output使用LoDTensor: Output< framework::Tensor>

qingqing01 · 2017-09-14T12:53:08Z

paddle/operators/transpose_op.cc

+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
+                            "Input(Out@GRAD) should not be null");
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
+    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));


Output< framework::LoDTensor>

qingqing01 · 2017-09-14T12:56:19Z

paddle/operators/transpose_op.cu

+namespace operators {
+
+template <typename T>
+__global__ void transpose_kernel(int nthreads, const T* in_data, T* out_data,


这个kernel效率太低，可以考虑Eigen::shuffle

https://github.com/RLovelett/eigen/tree/master/unsupported/Eigen/CXX11/src/Tensor#-shuffleconst-shuffle-shuffle

这样CPU，GPU同时支持。

have deleted this kernel and used the eigen interface

qingqing01 · 2017-09-14T12:59:33Z

paddle/operators/transpose_op.h

+        break;
+      case 5:
+        DoTranspose<Place, T, 5>(context, *in, *out, axis);
+        break;


感觉rank 5也不差多了，觉得可以去掉NaiveCpuTranspose，直接用Eigen::shuffle，这个不支持GPU吗？ rank > 5 时:

PADDLE_THROW("Tensors with rank at most 6 are supported").

ok，如果对与维度大于5的不支持的话，是可以的，省了很多代码，包括NaiveCpuTranspose 以及 Gpu kernel的代码

qingqing01 · 2017-09-14T12:59:51Z

python/paddle/v2/framework/tests/test_transpose_op.py

+import numpy as np
+from gradient_checker import GradientChecker
+from op_test_util import OpTestMeta
+from paddle.v2.framework.op import Operator


使用新的单测框架。

qingqing01 · 2017-09-14T13:00:23Z

python/paddle/v2/framework/tests/test_transpose_op.py

+
+        self.check_grad(op, inputs, set(["X"]), "Out", max_relative_error=0.5)
+
+


多测一些case： 2维 3维 4维 5维

… op_transpose

Xreki

我觉得总体上已经差不多OK了。主要看看输入输出以及Op的comment怎么修缮下，以后要生成文档的。

Xreki · 2017-09-18T13:59:27Z

paddle/operators/transpose_op.h

+    auto axis = context.Attr<std::vector<int>>("axis");
+    int ndims = axis.size();
+    switch (ndims) {
+      case 1:


这个op是否支持1-D的输入呢？如果支持，这里应该是copy操作；如果不支持，在InterShape里面应该使用PADDLE_ENFORCE进行检查。

这是个bug，已修复

Xreki · 2017-09-18T14:00:06Z

paddle/operators/transpose_op.cc

+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Input"),
+                            "Input(Input) should not be null");


对输出也需要检查

Xreki · 2017-09-18T14:01:17Z

paddle/operators/transpose_op.cc

+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "Input",
+        "(Tensor)The input tensor, tensors with rank at most 7 are supported");


最高只支持6-D吧？

对， 6D以上不支持

Xreki · 2017-09-18T14:02:05Z

paddle/operators/transpose_op.cc

+        ctx.Input<Tensor>(framework::GradVarName("Output"))->dims();
+    auto output_dims = ctx.Input<Tensor>("Output")->dims();
+
+    PADDLE_ENFORCE(output_grad_dims == output_dims,


可以使用PADDLE_ENFORCE_EQ

Xreki · 2017-09-18T14:03:35Z

paddle/operators/transpose_op.h

+
+    switch (ndims) {
+      case 1:
+        break;


JiayiFeng · 2017-09-18T23:11:17Z

paddle/operators/transpose_op.cc

+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Input"),
+                            "Input(Input) should not be null");
+    auto input_dim = ctx.Input<Tensor>("Input")->dims();
+    auto axis = ctx.Attr<std::vector<int>>("axis");


Writing std:vector<int> is not complex. Don't overuse auto. It may make readers confused about variable type.

JiayiFeng · 2017-09-18T23:13:12Z

paddle/operators/transpose_op.cc

+                            "Input(Input) should not be null");
+    auto input_dim = ctx.Input<Tensor>("Input")->dims();
+    auto axis = ctx.Attr<std::vector<int>>("axis");
+    size_t input_dim_size = input_dim.size();


The size of a tensor's dimensions is called rank.

JiayiFeng · 2017-09-18T23:14:44Z

paddle/operators/transpose_op.cc

+    size_t axis_size = axis.size();
+
+    PADDLE_ENFORCE_EQ(input_dim_size, axis_size,
+                      "the input tensor's dimension(%d) "


"the input tensor's rank(%d) "

JiayiFeng · 2017-09-18T23:33:04Z

paddle/operators/transpose_op.cc

+      PADDLE_ENFORCE_EQ(axis_sorted[i], static_cast<int>(i),
+                        "the sorted axis should be [0, 1, ... dims - 1], "
+                        "where the dims is the axis's size");
+    }


I think traversing axis and recording times that each number occurs is shorter and faster than the current implementation:

std::vector<int> count(axis_size, 0); for (size_t i = 0; i < axis.size(); i++) { PADDLE_ENFORCE(axis[i] < axis_size && ++count[axis[i]] == 1, "Attribute axis should be a permutation of [0, 1, ... dims - 1], " "where the dims is the axis's size"); }

JiayiFeng · 2017-09-18T23:43:38Z

paddle/operators/transpose_op.cc

+    AddAttr<std::vector<int>>(
+        "axis",
+        "(vector<int>)a list of values, and the size of the list should be "
+        "the same with the input tensor dimensions, the tensor will "


'the input tensor's rank'

JiayiFeng · 2017-09-19T00:28:50Z

paddle/operators/transpose_op.cc

+        ctx.Output<framework::LoDTensor>(framework::GradVarName("Input"));
+
+    auto output_grad_dims =
+        ctx.Input<Tensor>(framework::GradVarName("Output"))->dims();


Assert output_grad_dims is not nullptr.

JiayiFeng · 2017-09-19T00:30:23Z

paddle/operators/transpose_op.cc

+    PADDLE_ENFORCE(output_grad_dims == output_dims,
+                   "Output@GRAD dims must equal to Input(Input) dims");
+
+    input_grad->Resize(input_dims);


input_grad can be nullptr, which means it is useless for backward, and we don't need to resize and compute it.

JiayiFeng · 2017-09-19T00:56:59Z

paddle/operators/transpose_op.h

+        context.Input<framework::Tensor>(framework::GradVarName("Output"));
+    auto* input_grad =
+        context.Output<framework::Tensor>(framework::GradVarName("Input"));
+    input_grad->mutable_data<T>(context.GetPlace());


Check input_grad first. If it is nullptr we don't need to do the following computing.

An example: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cos_sim_op.h#L104

JiayiFeng · 2017-09-19T00:59:58Z

paddle/operators/transpose_op.h

+    std::vector<int> axis(axis_temp);
+
+    for (size_t i = 0; i < axis.size(); i++) {
+      axis[axis_temp[i]] = i;


How about rename axis_temp to axis and rename current axis to reversed_axis?

good choice

JiayiFeng · 2017-09-19T01:20:58Z

python/paddle/v2/framework/tests/test_transpose_op.py

+class TestCase4(TestTransposeOp):
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 5, 6, 1)
+        self.axis = (4, 2, 3, 1, 0, 5)


Here should be another test about whether our Op can throw an exception correctly. However, our framework can't support such a test right now. So I leave a comment here to remind us there is something TODO. I have created an issue about this: #4173

…ent when input_grad is null

… op_transpose

JiayiFeng

LGTM. Thank you for your work!

JiayiFeng · 2017-09-19T18:22:21Z

paddle/operators/transpose_op.cc

+    AddInput(
+        "Input",
+        "(Tensor)The input tensor, tensors with rank at most 6 are supported");
+    AddOutput("Output", "(Tensor)The output tensor");


The names of a single in/out Op's input and output should be X and Out respectively.

See the document: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md

… op_transpose

* add faq * Update README_cn.md * Update FAQ-README.md * Update FAQ第一期.md * Rename FAQ-README.md to README.md * Update README_cn.md * Update FAQ第一期.md * delete 2 files * Delete .DS_Store

add the transpose op

17b4b98

NHZlX requested a review from QiJune September 6, 2017 09:43

qingqing01 added the OpPorting label Sep 6, 2017

qingqing01 requested a review from Xreki September 7, 2017 03:19

QiJune reviewed Sep 7, 2017

View reviewed changes

NHZlX added 4 commits September 8, 2017 18:50

fixed bug of the gpu impl

d6651b9

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

828008e

… op_transpose

modify GetAttr to Attr

5599182

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

4da89f2

… op_transpose

NHZlX requested a review from qingqing01 September 11, 2017 03:33

NHZlX added 2 commits September 11, 2017 12:32

delete useless header file

61c7930

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

e129dcf

… op_transpose

Xreki reviewed Sep 14, 2017

View reviewed changes

qingqing01 reviewed Sep 14, 2017

View reviewed changes

NHZlX added 3 commits September 14, 2017 21:57

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

6b3ae01

… op_transpose

delete cuda impl, complete comments, modify variable naming

5ede6fd

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

35967e8

… op_transpose

Xreki reviewed Sep 18, 2017

View reviewed changes

JiayiFeng reviewed Sep 19, 2017

View reviewed changes

NHZlX added 2 commits September 19, 2017 13:13

fixed bug when dims.size == 1, modify the variable naming, add judgem…

9de45e1

…ent when input_grad is null

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

a9a7ba3

… op_transpose

JiayiFeng previously approved these changes Sep 19, 2017

View reviewed changes

JiayiFeng reviewed Sep 19, 2017

View reviewed changes

NHZlX added 2 commits September 20, 2017 14:31

modify the input\output name to X\Out

0cd9b8c

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

1792e58

… op_transpose

NHZlX dismissed JiayiFeng’s stale review via 1792e58 September 20, 2017 06:46

JiayiFeng approved these changes Sep 21, 2017

View reviewed changes

NHZlX merged commit c003895 into PaddlePaddle:develop Sep 21, 2017

NHZlX deleted the op_transpose branch September 21, 2017 02:44


		self.check_grad(op, inputs, set(["X"]), "Out", max_relative_error=0.5)

add the transpose op #3920

add the transpose op #3920

Conversation

NHZlX commented Sep 6, 2017 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Xreki left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Xreki left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

JiayiFeng Sep 19, 2017 • edited Loading

Choose a reason for hiding this comment

JiayiFeng left a comment • edited Loading

Choose a reason for hiding this comment

JiayiFeng Sep 19, 2017 • edited Loading

Choose a reason for hiding this comment

NHZlX commented Sep 6, 2017 •

edited

Loading

JiayiFeng Sep 19, 2017 •

edited

Loading

JiayiFeng left a comment •

edited

Loading

JiayiFeng Sep 19, 2017 •

edited

Loading