Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… dyn_cast_interface
  • Loading branch information
zhangbopd committed Sep 26, 2023
2 parents 307b372 + 7282acf commit cb8456a
Show file tree
Hide file tree
Showing 1,184 changed files with 32,060 additions and 16,110 deletions.
4 changes: 2 additions & 2 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ bugprone-argument-comment,
-bugprone-assert-side-effect,
-bugprone-bad-signal-to-kill-thread,
-bugprone-bool-pointer-implicit-conversion,
-bugprone-branch-clone,
bugprone-branch-clone,
bugprone-copy-constructor-init,
-bugprone-dangling-handle,
-bugprone-dynamic-static-initializers,
Expand Down Expand Up @@ -75,7 +75,7 @@ clang-analyzer-cplusplus.InnerPointer,
-clang-analyzer-cplusplus.SelfAssignment,
-clang-analyzer-cplusplus.SmartPtr,
-clang-analyzer-cplusplus.VirtualCallModeling,
-clang-analyzer-deadcode.DeadStores,
clang-analyzer-deadcode.DeadStores,
-clang-analyzer-fuchsia.HandleChecker,
-clang-analyzer-nullability.NullPassedToNonnull,
-clang-analyzer-nullability.NullReturnedFromNonnull,
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ repos:
description: Check C++ code style using cpplint.py.
entry: bash ./tools/codestyle/cpplint_pre_commit.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
files: \.(cc|cxx|cpp|cu|h|hpp|hxx)$
args:
- --extensions=c,cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps
- --extensions=cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps
- --filter=-readability/fn_size,-build/include_what_you_use,-build/c++11,-whitespace/parens
- --quiet
# Exclude third-party libraries
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/gloo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if(WITH_GPU)
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/gloo/device.cc.patch
native_dst)
set(GLOO_PATCH_COMMAND
git checkout -- . && git checkout ${GLOO_TAG} &&patch -Nd
git checkout -- . && git checkout ${GLOO_TAG} && patch -Nd
${SOURCE_DIR}/gloo/transport/tcp < ${native_dst})
endif()
endif()
Expand Down
8 changes: 6 additions & 2 deletions cmake/hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,11 @@ find_package_and_include(rocsparse)
find_package_and_include(rocfft)

# set CXX flags for HIP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__ -DROCM_NO_WRAPPER_HEADER_WARNING")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__ -DROCM_NO_WRAPPER_HEADER_WARNING"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
Expand All @@ -96,6 +99,7 @@ list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
# Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -DROCM_NO_WRAPPER_HEADER_WARNING)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
Expand Down
112 changes: 96 additions & 16 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,114 @@
#include "paddle/cinn/ir/operation.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/ir/utils/ir_printer.h"
#include "paddle/cinn/lang/compute.h"

namespace cinn {
namespace ast_gen_ius {

ir::Expr AstGen::Build(const ir::Tensor& tensor) {
ir::Expr ConvertReduceBody(ir::Expr body,
ir::Tensor tensor,
const std::vector<Expr>& axis_exprs) {
ir::Reduce* reduce_node = body.As<ir::Reduce>();
if (!reduce_node) {
return ir::Store::Make(tensor, body, axis_exprs);
}

switch (reduce_node->reduce_type) {
case ir::Reduce::kSum:
return ir::Store::Make(
tensor, tensor(axis_exprs) + reduce_node->body, axis_exprs);
case ir::Reduce::kMul:
return ir::Store::Make(
tensor, tensor(axis_exprs) * reduce_node->body, axis_exprs);
case ir::Reduce::kMax:
return ir::Store::Make(
tensor,
ir::Max::Make(tensor(axis_exprs), reduce_node->body),
axis_exprs);
case ir::Reduce::kMin:
return ir::Store::Make(
tensor,
ir::Min::Make(tensor(axis_exprs), reduce_node->body),
axis_exprs);
case ir::Reduce::kAll:
return ir::Store::Make(
tensor, tensor(axis_exprs) && reduce_node->body, axis_exprs);
case ir::Reduce::kAny:
return ir::Store::Make(
tensor, tensor(axis_exprs) || reduce_node->body, axis_exprs);
default:
CINN_NOT_IMPLEMENTED
}
}

ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
const std::vector<ir::Var>& axis = tensor->axis();
const std::vector<ir::Expr>& shape = tensor->shape;
size_t axis_len = axis.size();
CHECK_EQ(shape.size(), axis_len)
<< "Internal Error: Tensor has different shape and axis length in AstGen";

CHECK_EQ(shape.size(), axis_len) << "Internal Error: Tensor has different "
"shape and axis length in AstGen";
std::vector<ir::Expr> axis_exprs;
for (const auto& a : axis) {
axis_exprs.push_back(a);
}
ir::Expr body = ir::Store::Make(tensor, tensor->body(), axis_exprs);

for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
ir::Var loop_var = axis[i];
ir::Expr loop_extent = shape[i];
body = ir::For::Make(loop_var,
Expr(0),
loop_extent,
ir::ForType::Serial,
ir::DeviceAPI::Host,
ir::Block::Make({body}));

if (tensor->is_reduce_tensor()) {
// Make an init Tensor for domain without reduce axis
Expr init_value = tensor->GetReduceInitVal();
// TODO(zhhsplendid): Clean the handcoded "__reduce_init" string
std::string reduce_init_name = tensor->name + "__reduce_init";
const std::vector<Expr>& domain = tensor->domain_without_reduce_axis();
ir::Tensor init_tensor = lang::Compute(
domain,
[=](const std::vector<Expr>& axis) { return init_value; },
reduce_init_name);
tensor_group->Insert(init_tensor);
tensor_group->MarkShareMemBuffer(tensor, init_tensor);
tensor_group->CtrlDepend(tensor, init_tensor);
Expr init_body = ir::Store::Make(init_tensor, init_value, axis_exprs);

// For the remaining reduce axis, make reduce body
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
ir::Expr reduce_body =
ConvertReduceBody(tensor->body(), tensor, axis_exprs);
for (int i = static_cast<int>(reduce_axis.size()) - 1; i >= 0; --i) {
reduce_body = ir::For::Make(reduce_axis[i],
reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound,
ir::ForType::Serial,
ir::DeviceAPI::Host,
ir::Block::Make({reduce_body}));
}

// Put the two parts together
ir::Expr body = ir::Block::Make({init_body, reduce_body});
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
ir::Var loop_var = axis[i];
ir::Expr loop_extent = shape[i];
body = ir::For::Make(
loop_var,
Expr(0),
loop_extent,
ir::ForType::Serial,
ir::DeviceAPI::Host,
i == static_cast<int>(axis_len) - 1 ? body : ir::Block::Make({body}));
}
return body;
} else {
ir::Expr body = ir::Store::Make(tensor, tensor->body(), axis_exprs);
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
ir::Var loop_var = axis[i];
ir::Expr loop_extent = shape[i];
body = ir::For::Make(loop_var,
Expr(0),
loop_extent,
ir::ForType::Serial,
ir::DeviceAPI::Host,
ir::Block::Make({body}));
}
return body;
}
return body;
}

} // namespace ast_gen_ius
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/ast_gen_ius/ast_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace ast_gen_ius {

class AstGen {
public:
static ir::Expr Build(const ir::Tensor& tensor);
static ir::Expr Build(const ir::Tensor& tensor, TensorGroup* tensor_group);
};

} // namespace ast_gen_ius
Expand Down
4 changes: 3 additions & 1 deletion paddle/cinn/ast_gen_ius/ast_gen_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <vector>

#include "paddle/cinn/ast_gen_ius/ast_gen.h"
#include "paddle/cinn/ast_gen_ius/tensor_group.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/tensor.h"
Expand All @@ -36,7 +37,8 @@ TEST(AstGen, Build) {
shape,
[&](const std::vector<Expr>& indice) { return lang::Relu(A(indice), 0); },
"relu_test");
Expr out = AstGen::Build(B);
TensorGroup tensor_group({B});
Expr out = AstGen::Build(B, &tensor_group);
LOG(INFO) << out;
}

Expand Down
29 changes: 11 additions & 18 deletions paddle/cinn/ast_gen_ius/tensor_group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ TensorGroup::TensorGroup(const std::vector<ir::Tensor>& tensors) {

for (auto& tensor : tensors) {
output_tensor_names_.insert(tensor->name);
std::set<ir::Expr> used_tensors = ir::CollectIRNodes(
std::set<ir::Expr> used_tensors = ir::ir_utils::CollectIRNodes(
tensor->body(), [](const Expr* x) { return x->as_tensor(); });
for (const Expr& x : used_tensors) {
const ir::Tensor to_dep = x.as_tensor_ref();
Expand Down Expand Up @@ -75,10 +75,12 @@ std::vector<ir::Tensor> TensorGroup::GetGenFuncTopoOrder(
}

std::vector<ir::Tensor> ret;
std::vector<std::string> stack;

// Using set instead of vector/stack in order to get fix alaphbeta order topo
std::set<std::string> node_set;
for (const auto& name_tensor : name_to_tensor_) {
if (!in_degree.count(name_tensor.first)) {
stack.emplace_back(name_tensor.first);
node_set.insert(name_tensor.first);
}
}

Expand All @@ -90,9 +92,9 @@ std::vector<ir::Tensor> TensorGroup::GetGenFuncTopoOrder(
input_arg_names.erase(name);
}

while (!stack.empty()) {
const std::string& cur = stack.back();
stack.pop_back();
while (!node_set.empty()) {
const std::string cur = *(node_set.begin());
node_set.erase(node_set.begin());

if (!input_arg_names.count(cur)) {
ret.push_back(name_to_tensor_[cur]);
Expand All @@ -103,23 +105,14 @@ std::vector<ir::Tensor> TensorGroup::GetGenFuncTopoOrder(
if (dep_tensor_names.count(cur)) {
--in_degree[dep_pair.first];
if (in_degree[dep_pair.first] == 0) {
stack.emplace_back(dep_pair.first);
node_set.insert(dep_pair.first);
}
}
}
}
return ret;
}

bool TensorGroup::HasMarkedReduceInit(const std::string& tensor_name) const {
return tensor_name_needs_reduce_init_.count(tensor_name);
}

ir::Tensor TensorGroup::MarkReduceInit(const std::string& tensor_name) {
// TODO(zhhsplendid): add check
tensor_name_needs_reduce_init_.insert(tensor_name);
}

void TensorGroup::CtrlDepend(const ir::Tensor& tensor,
const ir::Tensor& to_dep) {
ctrl_dep_[tensor->name].insert(to_dep->name);
Expand Down Expand Up @@ -156,8 +149,8 @@ std::string TensorGroup::GetShareMemRootName(const std::string& tensor_name) {
return share_memory_tensor_[tensor_name];
}

void TensorGroup::ShareMemoryBuffer(const ir::Tensor& tensor,
const ir::Tensor& to_share) {
void TensorGroup::MarkShareMemBuffer(const ir::Tensor& tensor,
const ir::Tensor& to_share) {
share_memory_tensor_[GetShareMemRootName(to_share->name)] =
GetShareMemRootName(tensor->name);
}
Expand Down
Loading

0 comments on commit cb8456a

Please sign in to comment.