Merge pull request PaddlePaddle#44 from Superjomn/refine/buffer

add Bind to Tensor and Buffer
thisjiang · Feb 27, 2020 · d9a1874 · d9a1874
2 parents 1033f4f + 1e0e8a8
commit d9a1874
Show file tree

Hide file tree

Showing 12 changed files with 139 additions and 24 deletions.
diff --git a/cinn/ir/buffer.cc b/cinn/ir/buffer.cc
@@ -6,6 +6,7 @@ namespace cinn {
 namespace ir {
 
 const _Buffer_ *Buffer::operator->() const { return IrNodeRef::As<_Buffer_>(); }
+_Buffer_ *Buffer::operator->() { return IrNodeRef::As<_Buffer_>(); }
 
 Buffer _Buffer_::Make(Var data,
                       Type dtype,
@@ -29,8 +30,28 @@ Buffer _Buffer_::Make(Var data,
   return Buffer(node);
 }
 
+Buffer _Buffer_::Make(const std::string &name, const std::vector<Expr> &shape) {
+  auto *node  = common::make_shared<_Buffer_>();
+  node->name  = name;
+  node->shape = shape;
+  return Buffer(node);
+}
+
+Buffer _Buffer_::Make() {
+  auto *node = common::make_shared<_Buffer_>();
+  return Buffer(node);
+}
+
 void _Buffer_::Accept(IRVisitor *v) const { v->Visit(this); }
 IrNodeTy _Buffer_::node_type() const { return _node_type_; }
 
+void _Buffer_::BindTo(const Tensor &tensor) { BindTo(tensor.As<_Tensor_>()); }
+
+void _Buffer_::BindTo(const _Tensor_ *tensor) {
+  if (name.empty()) name = tensor->name;
+  if (!data.defined()) data = _Var_::Make(name, tensor->type()).As<ir::_Var_>();
+  bound_tensors_names_.insert(tensor->name);
+}
+
 }  // namespace ir
 }  // namespace cinn
diff --git a/cinn/ir/buffer.h b/cinn/ir/buffer.h
@@ -10,6 +10,8 @@ namespace cinn {
 namespace ir {
 
 class _Buffer_;
+class Tensor;
+class _Tensor_;
 
 //! The memory access mode.
 enum class AccessMask : int {
@@ -18,16 +20,18 @@ enum class AccessMask : int {
 };
 
 /**
- * Buffer is a symbolic multi-dimensional data structure.
+ * Buffer is a symbolic multi-dimensional data structure, it is a node in IR.
  * It is a composition of primitive symbolic types, used to specify the memory layout of the Tensor used in the program
- * input.
+ * input. User can create a buffer and bind to multiple Tensors to specify that the tensors are not inlined and persist
+ * data to this buffer.
  */
 class Buffer : public IrNodeRef {
  public:
   Buffer() = default;
   explicit Buffer(IrNode* n) : IrNodeRef(n) {}
 
   const _Buffer_* operator->() const;
+  _Buffer_* operator->();
 };
 
 class _Buffer_ : public ExprNode<_Buffer_> {
@@ -63,10 +67,21 @@ class _Buffer_ : public ExprNode<_Buffer_> {
                      int data_alignment,
                      int offset_factor);
 
+  static Buffer Make(const std::string& name, const std::vector<Expr>& shape = {});
+
+  //! Make an empty buffer.
+  static Buffer Make();
+
+  void BindTo(const Tensor& tensor);
+  void BindTo(const _Tensor_* tensor);
+
   void Accept(IRVisitor* v) const override;
   IrNodeTy node_type() const override;
 
   static const IrNodeTy _node_type_ = IrNodeTy::_Buffer_;
+
+ private:
+  std::set<std::string> bound_tensors_names_;
 };
 
 }  // namespace ir

diff --git a/cinn/lang/buffer.cc b/cinn/lang/buffer.cc
@@ -1,5 +1,15 @@
 #include "cinn/lang/buffer.h"
 
+#include "cinn/ir/buffer.h"
+
 namespace cinn {
-namespace lang {}  // namespace lang
+namespace lang {
+
+using ir::_Buffer_;
+
+Buffer::Buffer(const std::string &name) { buffer_ = _Buffer_::Make(name); }
+
+Buffer::Buffer() { buffer_ = _Buffer_::Make(); }
+
+}  // namespace lang
 }  // namespace cinn
diff --git a/cinn/lang/buffer.h b/cinn/lang/buffer.h
@@ -1,7 +1,26 @@
 #pragma once
 
-#include "cinn/ir/node.h"
+#include "cinn/ir/buffer.h"
 
 namespace cinn {
-namespace lang {}  // namespace lang
+namespace lang {
+
+/**
+ * This is a DSL wrapper for ir::Buffer.
+ */
+class Buffer {
+ public:
+  Buffer();
+  Buffer(const std::string& name);
+
+  ir::_Buffer_* operator->() { return buffer_->As<ir::_Buffer_>(); }
+  const ir::_Buffer_* operator->() const { return buffer_->As<ir::_Buffer_>(); }
+
+  ir::Buffer buffer() const { return buffer_; }
+
+ private:
+  ir::Buffer buffer_;
+};
+
+}  // namespace lang
 }  // namespace cinn
diff --git a/cinn/lang/lower.cc b/cinn/lang/lower.cc
@@ -56,8 +56,9 @@ std::vector<LoweredFunc> Lower(const std::string& name, const std::vector<Tensor
   std::map<std::string, Tensor> tensor_dic;
   for (auto& tensor : args) tensor_dic.emplace(tensor->name, tensor);
   for (auto& stage : stages) stage_dic.emplace(stage->id(), stage);
-  CHECK_EQ(tensor_dic.size(), stage_dic.size());
-  CHECK_EQ(args.size(), stage_dic.size()) << "tensor should duplicate name";
+  // The placeholder Tensors are ignored in stages.
+  CHECK_GE(tensor_dic.size(), stage_dic.size());
+  CHECK_GE(args.size(), stage_dic.size()) << "tensor should duplicate name";
 
   std::set<std::string> args_names;
   for (auto& arg : args) {

diff --git a/cinn/lang/lower_test.cc b/cinn/lang/lower_test.cc
@@ -2,6 +2,7 @@
 
 #include <gtest/gtest.h>
 
+#include "cinn/lang/buffer.h"
 #include "cinn/lang/compute.h"
 #include "cinn/lang/placeholder.h"
 #include "cinn/utils/string.h"
@@ -15,9 +16,13 @@ TEST(lower, basic) {
 
   Placeholder<float> A("A", {Expr(M), Expr(N)});
 
+  Buffer B_buf;
+
   auto B = Compute(
       {M, N}, [=](Var i, Var j) -> Expr { return A(i, j) + 1.f; }, "B");
 
+  B->Bind(B_buf);
+
   auto lower_funcs = Lower("cal_B", {A, B});
 
   LOG(INFO) << "lower_size " << lower_funcs.size();
@@ -32,7 +37,6 @@ TEST(lower, basic) {
   {
     poly_for (0, (c3 <= 14), 1)
     {
-      A(c1, c3)
       B[((c1 * 15) + c3)] = (A(c1, c3) + 1)
     }
   }
@@ -49,8 +53,10 @@ TEST(lower, more_complex) {
   Placeholder<float> A("A", {Expr(M), Expr(N)});
   Placeholder<float> B("B", {Expr(N), Expr(K)});
 
+  Buffer C_buf;
   auto C = Compute(
       {M, N, K}, [=](Var i, Var j, Var k) -> Expr { return A(i, j) * B(j, k); }, "C");
+  C->Bind(C_buf);
 
   auto lower_funcs = Lower("cal_C", {A, B, C});
 

diff --git a/cinn/lang/tensor.cc b/cinn/lang/tensor.cc
@@ -40,7 +40,6 @@ Tensor _Tensor_::Make(const std::string &name, const std::vector<Expr> &shape, F
   n->operaion = fn;
   n->InitStage();
   n->InitAxis();
-  n->SetDefaultBindedBuffer();
   return Tensor(n);
 }
 
@@ -55,7 +54,7 @@ Tensor _Tensor_::Make(const std::string &name,
   CHECK(!name.empty()) << "Tensor name is set empty";
 
   auto op          = ComputeOp::Make(name, tag, attrs, axis, body, shape);
-  auto *compute_op = const_cast<ComputeOp *>(op->As<ComputeOp>());
+  auto *compute_op = op->As<ComputeOp>();
 
   CHECK_EQ(axis.size(), shape.size()) << "axis not match the dimension in shape";
   compute_op->axis = axis;
@@ -66,7 +65,6 @@ Tensor _Tensor_::Make(const std::string &name,
   n->shape    = shape;
   n->set_type(dtype);
   n->InitStage();
-  n->SetDefaultBindedBuffer();
   return Tensor(n);
 }
 
@@ -168,6 +166,7 @@ _Tensor_::~_Tensor_() {
 }
 
 const _Operation_ *Operation::operator->() const { return static_cast<_Operation_ *>(get()); }
+_Operation_ *Operation::operator->() { return static_cast<_Operation_ *>(get()); }
 
 Expr _Tensor_::body() const {
   if (is_placeholder_node()) return Expr();
@@ -179,7 +178,12 @@ Expr _Tensor_::tensor_store_expanded_body() const {
   CHECK(!is_placeholder_node()) << "placeholder should not expand store";
   std::vector<Expr> axis_;
   for (auto &a : axis) axis_.push_back(Expr(a));
-  return ir::Store::Make(buffer_var, body(), detail::ExpandTo1DIndice(shape, axis_));
+  return ir::Store::Make(buffer->data, body(), detail::ExpandTo1DIndice(shape, axis_));
+}
+
+void _Tensor_::Bind(lang::Buffer &buffer) {
+  buffer->BindTo(this);
+  this->buffer = buffer.buffer();
 }
 
 }  // namespace ir

diff --git a/cinn/lang/tensor.h b/cinn/lang/tensor.h
@@ -11,6 +11,7 @@
 #include "cinn/common/graph_utils.h"
 #include "cinn/ir/function_base.h"
 #include "cinn/ir/ir.h"
+#include "cinn/lang/buffer.h"
 
 namespace cinn {
 
@@ -104,8 +105,8 @@ class _Tensor_ : public ExprNode<_Tensor_> {
   std::string name;
   //! Polyhedral element for analysis and schedule.
   poly::Stage* stage{};
-  //! The binded buffer, for each tensor if it is not inline.
-  Var buffer_var;
+  //! The bound buffer, for each tensor if it is not inline.
+  Buffer buffer;
 
   //! Generate a tensor from a computation.
   static Tensor Make(const std::string& name,
@@ -119,6 +120,12 @@ class _Tensor_ : public ExprNode<_Tensor_> {
   //! Generate a tensor from a function.
   static Tensor Make(const std::string& name, const std::vector<Expr>& shape, FunctionRef fn);
 
+  //! Tell whether this tensor is inline.
+  bool inlined() const { return (!buffer.defined()) && (!is_placeholder_node()); }
+
+  //! Bind to a buffer, will persist data to the buffer in runtime.
+  void Bind(lang::Buffer& buffer);
+
   //! Tell the operation type.
   // @{
   bool is_compute_node() const;
@@ -150,10 +157,6 @@ class _Tensor_ : public ExprNode<_Tensor_> {
   //! Initialize the axis field after the shape field is assigned.
   void InitAxis();
 
-  //! Bind the tensor to a buffer by default.
-  //! NOTE it should called by all the Make.
-  void SetDefaultBindedBuffer() { buffer_var = ir::_Var_::Make(name, type()).As<_Var_>(); }
-
   isl::set GenerateIslDomain();
 };
 
@@ -164,6 +167,7 @@ class Operation : public FunctionRef {
   explicit Operation(IrNode* n) : FunctionRef(n) {}
 
   inline const _Operation_* operator->() const;
+  inline _Operation_* operator->();
 
   //! Get the i-th output of the operation.
   // Tensor output(size_t i) const;

diff --git a/cinn/poly/graph.cc b/cinn/poly/graph.cc
@@ -151,8 +151,13 @@ std::unique_ptr<common::Graph> CreateGraph(const std::vector<Stage*>& stages) {
     auto depend_statement_names = stage->input_statements();
     VLOG(3) << stage->id() << " depend " << utils::Join(depend_statement_names, ", ");
     for (auto& depend_statement : depend_statement_names) {
-      auto& input_node = id2stage.at(depend_statement);
-      input_node->LinkTo(id2stage.at(stage->id()).get());
+      auto input_it = id2stage.find(depend_statement);
+      // We removed some node in the original stages(such as placeholders), so that there might be missing of some input
+      // nodes, just ignore the dependence.
+      if (input_it != std::end(id2stage)) {
+        auto& input_node = id2stage.at(depend_statement);
+        input_node->LinkTo(id2stage.at(stage->id()).get());
+      }
     }
   }
 

diff --git a/cinn/poly/schedule.cc b/cinn/poly/schedule.cc
@@ -224,7 +224,7 @@ std::unique_ptr<Schedule> CreateSchedule(const std::vector<Stage *> &stages) {
   return std::unique_ptr<Schedule>(new Schedule(graph.get()));
 }
 
-std::vector<Stage *> GatherStagesInTensors(const std::vector<ir::Tensor> &xs) {
+std::vector<Stage *> GatherStagesInTensors(const std::vector<ir::Tensor> &xs, bool with_placeholder) {
   // get the stages from a tensor.
   std::vector<Stage *> stages;
   std::deque<ir::Tensor> queue;
@@ -236,7 +236,7 @@ std::vector<Stage *> GatherStagesInTensors(const std::vector<ir::Tensor> &xs) {
     queue.pop_front();
     if (visited.count(Expr(top))) continue;
     visited.insert(Expr(top));
-    stages.push_back(top->stage);
+    if (!top->is_placeholder_node()) stages.push_back(top->stage);
 
     auto tensor_exprs = ir::CollectIRNodes(Expr(top), [](const Expr *expr) { return expr->As<ir::_Tensor_>(); });
     for (auto &expr : tensor_exprs) {

diff --git a/cinn/poly/schedule.h b/cinn/poly/schedule.h
@@ -129,9 +129,10 @@ std::unique_ptr<Schedule> CreateSchedule(const std::vector<Stage *> &stages);
 /**
  * Gather the stages in the input tensors and their dependencies
  * @param xs The input tensors.
+ * @param with_placeholder Whether to include placeholders(default false).
  * @returns The stages in topological order follow the connection to `xs`.
  */
-std::vector<Stage *> GatherStagesInTensors(const std::vector<ir::Tensor> &xs);
+std::vector<Stage *> GatherStagesInTensors(const std::vector<ir::Tensor> &xs, bool with_placeholder = false);
 
 /**
  * PolyScheduler - Perform schedule on polyhedral model.

diff --git a/docs/design.md b/docs/design.md
@@ -142,7 +142,7 @@ The Scheduler take the stages as input, and do the previous mentioned graph part
 
 Each schedule element owns an (ISL)iteration domain and a (ISL)schedule, and one can pass it to a ast_gen and generate code.
 
-### Lower output Tensors to LoweredFunctions
+### Lower output Tensors to LoweredFuncs
 
 First, given the output tensors, the `Lower` function will collect all the depended inputs, and lower them to a function.
 
@@ -151,3 +151,32 @@ The lower interface is
 ```c++
 std::vector<LoweredFunction> Lower(vector<Tensor>& args, DeviceAPI device);
 ```
+
+### Buffer
+Buffer represents the actual memory in host or devices.
+
+The `Buffer` node in IR represents a buffer, it can be used by binding to a Tensor.
+
+The Tensor will be noninlined only if it binds to some buffer.
+
+NOTE A buffer can be reused in multiple tensors(TODO the write-read correctness should be considered).
+
+```c++
+Buffer buffer0;
+
+Tensor x = Compute(...);
+// x will write the result to buffer0
+x->Bind(buffer0);
+
+Tensor y = Compute(..., [](Var i) {
+  return x(i) * 2; // here it will read the buffer instead, x is just a alias.
+});
+```
+
+The size of the buffer will be inferenced from the shape and data type of tensor.
+It by default can be resized to proper shape by binding to multiple tensors.
+
+#### Buffer in CodeGen
+All the buffers will be maintained in global scope, and alloc or dealloc in local scopes.
+
+The benefit is buffer is easy to shared accross multiple statements.