Skip to content

Commit

Permalink
resolve conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
huangjiyi committed Aug 25, 2023
1 parent c81c5ef commit 6875a7a
Show file tree
Hide file tree
Showing 470 changed files with 19,904 additions and 13,284 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,4 @@ python/paddle/incubate/fleet/parameter_server/pslib/ps_pb2.py
paddle/phi/kernels/fusion/cutlass/conv2d/generated/*
python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py
paddle/fluid/ir_adaptor/translator/op_compat_info.cc
paddle/fluid/pybind/static_op_function.*
1 change: 1 addition & 0 deletions cmake/external/openblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas)
set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
set(CBLAS_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/openblas)
set(CBLAS_TAG v0.3.7)
set(CMAKE_VERBOSE_MAKEFILE 1)

# Why use v0.3.18? The IDG business line encountered a random openblas error,
# which can be resolved after upgrading openblas.
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")

if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20230810")
set(XPU_BASE_DATE "20230823")
endif()
set(XPU_XCCL_BASE_VERSION "1.0.53.6")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
Expand Down
26 changes: 25 additions & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ function(find_fluid_modules TARGET_NAME)
endif()
endfunction()

# NOTE(Aurelius84): NOT_INFER_MODULES is used to tag
# and not considered as DEPS for inference libs.
set_property(GLOBAL PROPERTY NOT_INFER_MODULES "")

function(ignore_infer_modules TARGET_NAME)
get_property(not_infer_modules GLOBAL PROPERTY NOT_INFER_MODULES)
list(FIND not_infer_modules TARGET_NAME is_found)
if(is_found EQUAL -1) # NOT FOUND
set(not_infer_modules ${not_infer_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY NOT_INFER_MODULES "${not_infer_modules}")
endif()
endfunction()

set_property(GLOBAL PROPERTY PHI_MODULES "")
# find all phi modules is used for paddle static library
# for building inference libs
Expand Down Expand Up @@ -335,7 +348,15 @@ function(check_coverage_opt TARGET_NAME SRCS)
endfunction()

function(cc_library TARGET_NAME)
set(options STATIC static SHARED shared INTERFACE interface)
set(options
STATIC
static
SHARED
shared
INTERFACE
interface
NOT_FOR_INFER
not_for_infer)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}"
Expand All @@ -347,6 +368,9 @@ function(cc_library TARGET_NAME)
CACHE STRING "output library name for target ${TARGET_NAME}")
endif()
if(cc_library_SRCS)
if(cc_library_NOT_FOR_INFER OR cc_library_not_for_infer)
ignore_infer_modules(${TARGET_NAME})
endif()
if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
elseif(cc_library_INTERFACE OR cc_library_interface)
Expand Down
42 changes: 42 additions & 0 deletions paddle/cinn/backends/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,48 @@ using ir::Module;

static constexpr int DebugLogMaxLen = 30000;

void CompilationInfoDumper::DumpLoweredFuncByGroupIndex(
const ir::LoweredFunc& lowered_func, const int gidx) {
if (FLAGS_cinn_dump_group_lowered_func.empty() ||
lowered_func.get() == nullptr) {
return;
}
std::stringstream content;
content << lowered_func;
Dump(FLAGS_cinn_dump_group_lowered_func,
gidx,
"lowered_function.txt",
content.str());
}

void CompilationInfoDumper::DumpSourceCodeByGroupIndex(
const std::string& source_code, const int gidx) {
if (FLAGS_cinn_dump_group_source_code.empty()) {
return;
}
Dump(FLAGS_cinn_dump_group_source_code, gidx, "source_code.cu", source_code);
}

void CompilationInfoDumper::DumpPtxCodeByGroupIndex(
const std::string& source_ptx, const int gidx) {
if (FLAGS_cinn_dump_group_ptx.empty()) {
return;
}
Dump(FLAGS_cinn_dump_group_ptx, gidx, "source_ptx.ptx", source_ptx);
}

void CompilationInfoDumper::DumpInstructionByGroupIndex(
const std::unique_ptr<cinn::hlir::framework::Instruction>& instr,
const int gidx) {
if (FLAGS_cinn_dump_group_instruction.empty() || instr.get() == nullptr) {
return;
}
Dump(FLAGS_cinn_dump_group_instruction,
gidx,
"instruction.txt",
instr->DumpInstruction());
}

void CompilationInfoDumper::DumpLoweredFunc() {
if (FLAGS_cinn_dump_group_lowered_func.empty()) {
return;
Expand Down
18 changes: 14 additions & 4 deletions paddle/cinn/backends/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,25 @@ class CompilationInfoDumper {
DumpInstruction();
}

static void DumpLoweredFuncByGroupIndex(const ir::LoweredFunc& lowered_func,
const int gidx);
static void DumpSourceCodeByGroupIndex(const std::string& source_code,
const int gidx);
static void DumpPtxCodeByGroupIndex(const std::string& source_ptx,
const int gidx);
static void DumpInstructionByGroupIndex(
const std::unique_ptr<cinn::hlir::framework::Instruction>& instr,
const int gidx);

private:
void DumpLoweredFunc();
void DumpSourceCode();
void DumpPtxCode();
void DumpInstruction();
void Dump(const std::string& base_path,
const int idx,
const std::string& file_name,
const std::string& content);
static void Dump(const std::string& base_path,
const int idx,
const std::string& file_name,
const std::string& content);

const hlir::framework::CompilationResult& info_;
};
Expand Down
1 change: 1 addition & 0 deletions paddle/cinn/hlir/dialect/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
generated/**
30 changes: 22 additions & 8 deletions paddle/cinn/hlir/dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,23 @@ if(NOT CINN_ONLY)
set(CINN_DIALECT_BINARY_DIR "${PADDLE_BINARY_DIR}/paddle/cinn/hlir/dialect")

# Generate cinn_dialect files defining op using op_gen_file
set(cinn_op_gen_parsed_yaml_file
${PADDLE_SOURCE_DIR}/paddle/fluid/operators/generator/parse_op.py)

set(cinn_op_gen_file
${PADDLE_SOURCE_DIR}/paddle/fluid/ir/dialect/op_generator/op_gen.py)

set(cinn_op_compat_yaml_file
${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/op_compat.yaml)

set(cinn_op_forward_yaml_file1
${PADDLE_SOURCE_DIR}/paddle/cinn/hlir/dialect/cinn_ops.parsed.yaml)
set(cinn_op_yaml_file
${PADDLE_SOURCE_DIR}/paddle/cinn/hlir/dialect/cinn_ops.yaml)

set(parsed_op_dir ${PADDLE_SOURCE_DIR}/paddle/cinn/hlir/dialect/generated)

set(cinn_op_parsed_yaml_file ${parsed_op_dir}/cinn_ops.parsed.yaml)

set(cinn_op_yaml_files ${cinn_op_forward_yaml_file1})
set(cinn_op_parsed_yaml_files ${cinn_op_parsed_yaml_file})

set(cinn_op_namespace cinn,dialect)
set(cinn_dialect_name cinn)
Expand All @@ -23,19 +30,26 @@ if(NOT CINN_ONLY)
set(cinn_op_header_file_tmp ${cinn_op_header_file}.tmp)
set(cinn_op_source_file_tmp ${cinn_op_source_file}.tmp)

add_custom_command(
OUTPUT ${cinn_op_parsed_yaml_file}
COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
COMMAND ${PYTHON_EXECUTABLE} ${cinn_op_gen_parsed_yaml_file} --op_yaml_path
${cinn_op_yaml_file} --output_path ${cinn_op_parsed_yaml_file}
VERBATIM)

add_custom_command(
OUTPUT ${cinn_op_header_file} ${cinn_op_source_file}
COMMAND
${PYTHON_EXECUTABLE} ${cinn_op_gen_file} --op_yaml_files
${cinn_op_yaml_files} --op_compat_yaml_file ${cinn_op_compat_yaml_file}
--namespaces ${cinn_op_namespace} --dialect_name ${cinn_dialect_name}
--op_def_h_file ${cinn_op_header_file_tmp} --op_def_cc_file
${cinn_op_source_file_tmp}
${cinn_op_parsed_yaml_files} --op_compat_yaml_file
${cinn_op_compat_yaml_file} --namespaces ${cinn_op_namespace}
--dialect_name ${cinn_dialect_name} --op_def_h_file
${cinn_op_header_file_tmp} --op_def_cc_file ${cinn_op_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${cinn_op_header_file_tmp}
${cinn_op_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${cinn_op_source_file_tmp}
${cinn_op_source_file}
DEPENDS ${cinn_op_gen_file} ${cinn_op_forward_yaml_file1}
DEPENDS ${cinn_op_gen_file} ${cinn_op_parsed_yaml_file}
${cinn_op_compat_yaml_file}
VERBATIM)

Expand Down
78 changes: 0 additions & 78 deletions paddle/cinn/hlir/dialect/cinn_ops.parsed.yaml

This file was deleted.

8 changes: 8 additions & 0 deletions paddle/cinn/hlir/dialect/cinn_ops.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- op : add
args : (Tensor x, Tensor y)
output : Tensor(out)
infer_meta :
func : ElementwiseInferMeta
kernel :
func : add
inplace : (x -> out)
3 changes: 0 additions & 3 deletions paddle/cinn/hlir/framework/graph_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ CompilationResult GraphCompiler::Build(CompilationContext* context) {
parallel_compiler_ = std::make_shared<ParallelCompiler>(context);
CompilationResult result = (*parallel_compiler_.get())();

// Dump compilation result
backends::CompilationInfoDumper dumper(result);

if (context->stage != CompilationStage::DEFAULT) {
return result;
}
Expand Down
27 changes: 15 additions & 12 deletions paddle/cinn/hlir/framework/parallel_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void ParallelCompiler::SplitTask() {
context_->graph->fusion_groups.size() ==
context_->lowered_funcs.size());
for (int i = 0; i < context_->graph->fusion_groups.size(); ++i) {
tasks_.emplace_back(this, context_, i);
tasks_.emplace_back(i, this, context_);
}
}

Expand Down Expand Up @@ -114,20 +114,17 @@ void ParallelCompiler::Task::Lowering() {
if (!context->lowered_funcs.empty()) {
CHECK_EQ(context->lowered_funcs.size(),
context->graph->fusion_groups.size());
}
auto& dtype_dict =
context->graph->GetMutableAttrs<absl::flat_hash_map<std::string, Type>>(
"inferdtype");
auto& shape_dict =
context->graph
->GetMutableAttrs<absl::flat_hash_map<std::string, shape_t>>(
"infershape");

OpLowerer op_lowerer(dtype_dict, shape_dict, context->target);
if (!context->lowered_funcs.empty()) {
pcompiler->result_.lowered_funcs[group_id] =
context->lowered_funcs[group_id];
} else {
auto& dtype_dict =
context->graph->GetMutableAttrs<absl::flat_hash_map<std::string, Type>>(
"inferdtype");
auto& shape_dict =
context->graph
->GetMutableAttrs<absl::flat_hash_map<std::string, shape_t>>(
"infershape");
OpLowerer op_lowerer(dtype_dict, shape_dict, context->target);
auto& group = context->graph->fusion_groups[group_id];
VLOG(4) << "Start Lowering Group " << group_id << " at "
<< std::this_thread::get_id() << " :\n"
Expand All @@ -138,6 +135,8 @@ void ParallelCompiler::Task::Lowering() {
CHECK_EQ(lowered_group.size(), 1) << "Lowerd Function Is Not Equal 1!";
pcompiler->result_.lowered_funcs[group_id] = std::move(lowered_group);
}
backends::CompilationInfoDumper::DumpLoweredFuncByGroupIndex(
pcompiler->result_.lowered_funcs[group_id].front(), group_id);
}

void ParallelCompiler::Task::CodegenAndJit() {
Expand Down Expand Up @@ -168,6 +167,8 @@ void ParallelCompiler::Task::CodegenAndJit() {
}
CHECK(!cuda_c.empty()) << "Compile CUDA C code failed from device module:\n"
<< dmodule;
backends::CompilationInfoDumper::DumpSourceCodeByGroupIndex(cuda_c,
group_id);
pcompiler->result_.source_codes[group_id] = cuda_c;

cinn::backends::SourceCodePrint::GetInstance()->write(cuda_c);
Expand All @@ -176,6 +177,7 @@ void ParallelCompiler::Task::CodegenAndJit() {
backends::nvrtc::Compiler compiler;
auto ptx = compiler(cuda_c);
CHECK(!ptx.empty()) << "Compile PTX failed from source code:\n" << cuda_c;
backends::CompilationInfoDumper::DumpPtxCodeByGroupIndex(ptx, group_id);
pcompiler->result_.source_ptxs[group_id] = ptx;
// load cumodule
cumodule = std::make_unique<CUDAModule>(ptx,
Expand Down Expand Up @@ -217,6 +219,7 @@ void ParallelCompiler::Task::BuildInstruction() {
instr->SetLoweredFunc(reinterpret_cast<void*>(fn_ptr), group->GetFuncName());

instr->Finalize();
backends::CompilationInfoDumper::DumpInstructionByGroupIndex(instr, group_id);
pcompiler->result_.instructions[group_id] = std::move(instr);
}

Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/hlir/framework/parallel_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ namespace framework {
class ParallelCompiler {
public:
struct Task {
Task(ParallelCompiler* compiler, CompilationContext* context, int group_id)
: pcompiler(compiler), context(context), group_id(group_id) {}
Task(int group_id, ParallelCompiler* compiler, CompilationContext* context)
: group_id(group_id), pcompiler(compiler), context(context) {}
void Lowering();
void CodegenAndJit();
void BuildInstruction();
Expand Down
Loading

0 comments on commit 6875a7a

Please sign in to comment.