Skip to content

Commit

Permalink
[NewIR]Split NewIRCompiler with .h/.cc and decoupling compilation wit…
Browse files Browse the repository at this point in the history
…h cinncore (PaddlePaddle#55733)

* [NewIR]Split NewIRCompiler with .h/.cc and decoupling compilatiom with cinncore

* fix cmake

* fix CINN_ONLY
  • Loading branch information
Aurelius84 authored and wyf committed Aug 30, 2023
1 parent 3f044b6 commit 7b2318a
Show file tree
Hide file tree
Showing 9 changed files with 596 additions and 515 deletions.
8 changes: 8 additions & 0 deletions paddle/cinn/hlir/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ gather_srcs(
buffer.cc
memory.cc
instruction.cc
program.cc
parallel_compiler.cc
graph_compiler.cc
graph.cc
Expand All @@ -20,6 +21,13 @@ gather_srcs(
accuracy_checker.cc
visualize_helper.cc)

# TODO(Aurelius84): new_ir_compiler depends on pd_dialect and could
# not found under CINN_ONLY mode
if(NOT CINN_ONLY)
cinn_cc_library(new_ir_compiler SRCS new_ir_compiler.cc DEPS cinncore
pd_dialect)
endif()

if(WITH_CUDA)
cinn_nv_test(test_hlir_framework_buffer SRCS buffer_test.cc DEPS cinncore)
cinn_cc_test(test_hlir_framework_accuracy_checker SRCS
Expand Down
214 changes: 0 additions & 214 deletions paddle/cinn/hlir/framework/graph_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,220 +37,6 @@ namespace framework {
using cinn::common::bfloat16;
using cinn::common::float16;

// Store params from node to instruction
void AddAttrs(const absl::flat_hash_map<std::string, AttrType>& attrs_store,
const std::vector<std::string>& attrs_name,
Instruction* instr) {
for (auto& attr : attrs_name) {
if (attrs_store.find(attr) != attrs_store.end()) {
switch (attrs_store.at(attr).index()) {
case 2:
instr->attrs.push_back(absl::get<int>(attrs_store.at(attr)));
break;
case 3:
instr->str_attrs.push_back(
absl::get<std::string>(attrs_store.at(attr)));
break;
case 5:
auto temp = absl::get<std::vector<int>>(attrs_store.at(attr));
instr->attrs.insert(instr->attrs.end(), temp.begin(), temp.end());
break;
}
} else {
LOG(ERROR) << "Param " << attr << " missed! Please check.";
}
}
}

Program::Program(const std::shared_ptr<Scope>& scope,
std::vector<std::unique_ptr<Instruction>>&& instrs)
: scope_(scope) {
for (auto& ins : instrs) {
if (ins->pre_run) {
prerun_instrs_.push_back(std::move(ins));
} else {
instrs_.push_back(std::move(ins));
}
}
}

void Program::PreRun(
const std::map<std::string, cinn_pod_value_t>* name2podargs) {
for (auto& ins : prerun_instrs_) {
ins->Run(name2podargs);
}
for (auto& ins : instrs_) {
if (ins->size() == 4) {
ins->PreRun(name2podargs);
}
}
}

void Program::Export(const std::vector<std::string>& persistent_vars,
const std::string& filename) {
auto writeplaceholder = [=](int s, int n, FILE* f) -> int {
int pos = ftell(f);
for (int i = 0; i < s * n; i++) {
fwrite("\0", 1, 1, f);
}
return pos;
};
auto setplaceholder = [=](int p, void* b, int s, int n, FILE* f) {
int cur = ftell(f);
fseek(f, p, SEEK_SET);
fwrite(b, s, n, f);
fseek(f, cur, SEEK_SET);
};
auto tellplaceholder = [=](int p, FILE* f) {
int cur = ftell(f);
setplaceholder(p, &cur, 4, 1, f);
};
auto padding = [=](int alignment, uint8_t value, FILE* f) {
int cur = ftell(f);
int padding = (alignment - (cur % alignment)) % alignment;
for (int i = 0; i < padding; i++) {
fwrite(&value, 1, 1, f);
}
};
auto varnames = scope_->var_names();
std::unordered_map<std::string, int> varindex;
for (int i = 0; i < varnames.size(); i++) {
varindex[(std::string)varnames[i]] = i;
}

FILE* f = fopen(filename.c_str(), "w+");

fwrite("CINN", 4, 1, f);
int major_v = 0;
int minor_v = 0;
fwrite(&major_v, 4, 1, f);
fwrite(&minor_v, 4, 1, f);
int unused_v = 0;
fwrite(&unused_v, 4, 1, f);

// varname list
int varnamesec = writeplaceholder(4, 1, f);
int namesnum = varnames.size();
fwrite(&namesnum, 4, 1, f);
int nameoffset = writeplaceholder(4, namesnum, f);
for (int i = 0; i < namesnum; i++) {
int namelen = varnames[i].size();
fwrite(&namelen, 4, 1, f);
tellplaceholder(nameoffset + i * 4, f);
fwrite(varnames[i].data(), namelen, 1, f);
fwrite("\0", 1, 1, f);
}
padding(16, 0, f);
tellplaceholder(varnamesec, f);
// pod_values
int buffersec = writeplaceholder(4, 1, f);
int bufoffset = writeplaceholder(4, 1, f);
padding(alignof(cinn_buffer_t), 0, f);
tellplaceholder(bufoffset, f);
std::vector<std::pair<cinn_buffer_t*, int>> pvars;
for (auto& varname : varnames) {
std::string name = (std::string)varname;
auto t = scope_->GetTensor(name);
cinn_buffer_t buffer = *t->buffer();
buffer.memory = reinterpret_cast<uint8_t*>(0);
if (std::find(persistent_vars.begin(), persistent_vars.end(), name) !=
persistent_vars.end()) {
pvars.emplace_back(t->buffer(),
ftell(f) + offsetof(cinn_buffer_t, memory));
}
fwrite(&buffer, sizeof(cinn_buffer_t), 1, f);
}
padding(16, 0, f);
tellplaceholder(buffersec, f);
// persistent_buffers
int pbuffer = writeplaceholder(4, 1, f);
for (auto& p : pvars) {
if (p.first->align) {
padding(p.first->align, 0, f);
}
tellplaceholder(p.second, f);
fwrite(p.first->memory, p.first->memory_size, 1, f);
}
padding(16, 0, f);
tellplaceholder(pbuffer, f);
// instructions
int instsec = writeplaceholder(4, 1, f);
int insnum = 0;
for (auto& ins : instrs_) {
ins->Run(nullptr, true);
insnum += ins->GetFnNames().size();
}
fwrite(&insnum, 4, 1, f);
int instplaceholder = writeplaceholder(4 * 3, insnum, f);
int findex = 0;
for (auto& ins : instrs_) {
auto in_args = ins->GetInArgs();
auto out_args = ins->GetOutArgs();
auto fn_names = ins->GetFnNames();
for (int i = 0; i < fn_names.size(); i++, findex++) {
std::vector<std::string> all_args(in_args[i].begin(), in_args[i].end());
all_args.insert(
std::end(all_args), out_args[i].begin(), out_args[i].end());
auto fname = fn_names[i];
int fnamesize = fname.size();
fwrite(&fnamesize, 4, 1, f);
tellplaceholder(instplaceholder + findex * 12, f);
fwrite(fname.c_str(), fname.size(), 1, f);
fwrite("\0", 1, 1, f);
int argsize = all_args.size();
setplaceholder(instplaceholder + findex * 12 + 4, &argsize, 4, 1, f);
padding(alignof(cinn_pod_value_t), 0, f);
tellplaceholder(instplaceholder + findex * 12 + 8, f);
for (auto& arg : all_args) {
uintptr_t bufindex = varindex[arg];
cinn_pod_value_t v(reinterpret_cast<cinn_buffer_t*>(bufindex));
fwrite(&v, sizeof(cinn_pod_value_t), 1, f);
}
}
}
padding(16, 0, f);
tellplaceholder(instsec, f);
fclose(f);
}

void Program::Execute(
const std::map<std::string, cinn_pod_value_t>* name2podargs,
void* stream,
bool use_cache) {
for (auto& ins : instrs_) {
ins->Run(name2podargs, false, stream, use_cache);
}
#ifdef CINN_WITH_CUDA
VLOG(4) << "-- The value of the used stream: " << stream;
if (instrs_[0]->target_.arch == Target::Arch::NVGPU && stream == nullptr) {
CUDA_CALL(cudaDeviceSynchronize());
}
#endif
}

void Program::ExecuteTest(int repeat_) {
cinn::utils::Timer timer1;
for (int i = 0; i < 100; i++) {
for (auto& ins : instrs_) {
ins->Run();
}
}
timer1.Start();
for (int i = 0; i < repeat_; i++) {
for (auto& ins : instrs_) {
ins->Run();
}
}
#ifdef CINN_WITH_CUDA
if (instrs_[0]->target_.arch == Target::Arch::NVGPU) {
CUDA_CALL(cudaDeviceSynchronize());
}
#endif
double test_op_time = timer1.Stop() / repeat_;
VLOG(3) << "Repeat times: [" << repeat_ << "], average op time: ["
<< test_op_time << "] ms";
}

std::unique_ptr<Program> GraphCompiler::Build(const std::string& code) {
utils::RecordEvent("GraphCompiler::Build", utils::EventType::kGraph);
GraphCompiler::CompileOptions options;
Expand Down
51 changes: 1 addition & 50 deletions paddle/cinn/hlir/framework/graph_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "paddle/cinn/hlir/framework/instruction.h"
#include "paddle/cinn/hlir/framework/op_strategy.h"
#include "paddle/cinn/hlir/framework/parallel_compiler.h"
#include "paddle/cinn/hlir/framework/program.h"
#include "paddle/cinn/hlir/framework/scope.h"
#include "paddle/cinn/ir/lowered_func.h"
#include "paddle/cinn/lang/packed_func.h"
Expand All @@ -40,56 +41,6 @@ namespace cinn {
namespace hlir {
namespace framework {

/**
* The Program is the runtime instance for running a computation.
*/
class Program {
public:
/**
* Constructor.
* @param scope The scope containing all the runtime variables.
* @param instrs The instructions belonging to this program.
*/
Program(const std::shared_ptr<Scope>& scope,
std::vector<std::unique_ptr<Instruction>>&& instrs);

void PreRun(
const std::map<std::string, cinn_pod_value_t>* name2podargs = nullptr);

void Export(const std::vector<std::string>& persistent_vars,
const std::string& filename);

/**
* Execute the program -- that is running all the instructions inside it.
*/
void Execute(
const std::map<std::string, cinn_pod_value_t>* name2podargs = nullptr,
void* stream = nullptr,
bool use_cache = true);

void ExecuteTest(int repeat_);

/**
* Get the number of instructions.
*/
size_t size() const { return instrs_.size(); }

const std::vector<std::unique_ptr<Instruction>>& GetPreRunInstructions() {
return prerun_instrs_;
}
const std::vector<std::unique_ptr<Instruction>>& GetRunInstructions() {
return instrs_;
}

private:
// We need to hold scope to assure tensors alive used in instructions.
std::shared_ptr<Scope> scope_;
// prerun instructions
std::vector<std::unique_ptr<Instruction>> prerun_instrs_;
// only runtime instructions
std::vector<std::unique_ptr<Instruction>> instrs_;
};

/**
* GraphCompiler compiles a graph and generate the runtime Program.
*/
Expand Down
Loading

0 comments on commit 7b2318a

Please sign in to comment.