From f4ea81053b1e3de24ecfe23dd11dd94048d6bb8b Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Tue, 16 Aug 2016 10:53:45 +0100 Subject: [PATCH] bjit: free codeblocks after tiering up to the LLVM jit we free now the code blocks after a recompile in the LLVM tier (except for OSR frames) because it is likely that we will not use the code anymore. - we have to make sure we are not currently executing any code we will delete that's why I added bjit_num_inside - there were some cases where we forgot to deregister stuff - when profiling we don't actually unmap the code in order to not brake profiling --- src/asm_writing/icinfo.cpp | 8 ++++-- src/asm_writing/icinfo.h | 3 +++ src/codegen/ast_interpreter.cpp | 6 ++++- src/codegen/baseline_jit.cpp | 48 ++++++++++++++++++++++++++------- src/codegen/baseline_jit.h | 17 ++++++++---- src/codegen/codegen.h | 1 + src/codegen/irgen/hooks.cpp | 24 +++++++++++++++++ src/core/cfg.h | 2 +- src/core/types.h | 4 +++ 9 files changed, 94 insertions(+), 19 deletions(-) diff --git a/src/asm_writing/icinfo.cpp b/src/asm_writing/icinfo.cpp index 841866168..d34d2bfac 100644 --- a/src/asm_writing/icinfo.cpp +++ b/src/asm_writing/icinfo.cpp @@ -326,6 +326,7 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) { } static llvm::DenseMap ics_by_return_addr; +static llvm::DenseMap ics_by_ast_node; ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register, @@ -342,6 +343,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S times_rewritten(0), allocatable_registers(allocatable_registers), ic_global_decref_locations(std::move(ic_global_decref_locations)), + node(NULL), start_addr(start_addr), slowpath_rtn_addr(slowpath_rtn_addr), continue_addr(continue_addr) { @@ -353,6 +355,8 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S ICInfo::~ICInfo() { // if this ICInfo got created with registerCompiledPatchpoint we have to unregister this ics_by_return_addr.erase(slowpath_rtn_addr); + if (node) + ics_by_ast_node.erase(node); deregisterGCTrackedICInfo(this); for (auto& slot : slots) { @@ -469,8 +473,6 @@ bool ICInfo::isMegamorphic() { return times_rewritten >= IC_MEGAMORPHIC_THRESHOLD; } -static llvm::DenseMap ics_by_ast_node; - ICInfo* ICInfo::getICInfoForNode(AST* node) { auto&& it = ics_by_ast_node.find(node); if (it != ics_by_ast_node.end()) @@ -478,6 +480,8 @@ ICInfo* ICInfo::getICInfoForNode(AST* node) { return NULL; } void ICInfo::associateNodeWithICInfo(AST* node) { + assert(!this->node); + this->node = node; ics_by_ast_node[node] = this; } void ICInfo::appendDecrefInfosTo(std::vector& dest_decref_infos) { diff --git a/src/asm_writing/icinfo.h b/src/asm_writing/icinfo.h index 628f7d046..93d5e6732 100644 --- a/src/asm_writing/icinfo.h +++ b/src/asm_writing/icinfo.h @@ -102,6 +102,9 @@ class ICInfo { // global ones. std::vector ic_global_decref_locations; + // associated AST node for this IC + AST* node; + // for ICSlotRewrite: ICSlotInfo* pickEntryForRewrite(const char* debug_name); diff --git a/src/codegen/ast_interpreter.cpp b/src/codegen/ast_interpreter.cpp index b5ec34782..3767b505a 100644 --- a/src/codegen/ast_interpreter.cpp +++ b/src/codegen/ast_interpreter.cpp @@ -306,7 +306,7 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset, llvm::DenseSe code_block = code_blocks[code_blocks.size() - 1].get(); if (!code_block || code_block->shouldCreateNewBlock()) { - code_blocks.push_back(llvm::make_unique(source_info->getName()->s())); + code_blocks.push_back(llvm::make_unique(getMD(), source_info->getName()->s())); code_block = code_blocks[code_blocks.size() - 1].get(); exit_offset = 0; } @@ -354,12 +354,16 @@ void ASTInterpreter::finishJITing(CFGBlock* continue_block) { } Box* ASTInterpreter::execJITedBlock(CFGBlock* b) { + auto& num_inside = getMD()->bjit_num_inside; try { UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_baseline_jitted_code"); + ++num_inside; std::pair rtn = b->entry_code(this, b, vregs); + --num_inside; next_block = rtn.first; return rtn.second; } catch (ExcInfo e) { + --num_inside; AST_stmt* stmt = getCurrentStatement(); if (stmt->type != AST_TYPE::Invoke) throw e; diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index 1bdd7e3bc..1d0ac228b 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -73,14 +73,19 @@ JitCodeBlock::MemoryManager::MemoryManager() { } JitCodeBlock::MemoryManager::~MemoryManager() { - munmap(addr, JitCodeBlock::memory_size); + // unfortunately we can't free the memory when profiling otherwise we would reuse the same addresses which makes + // profiling impossible + if (!PROFILE) + munmap(addr, JitCodeBlock::memory_size); addr = NULL; - - RELEASE_ASSERT(0, "we have to unregister this block from g.func_addr_registry"); } -JitCodeBlock::JitCodeBlock(llvm::StringRef name) - : entry_offset(0), a(memory.get() + sizeof(eh_info), code_size), is_currently_writing(false), asm_failed(false) { +JitCodeBlock::JitCodeBlock(FunctionMetadata* md, llvm::StringRef name) + : md(md), + entry_offset(0), + a(memory.get() + sizeof(eh_info), code_size), + is_currently_writing(false), + asm_failed(false) { static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks"); num_jit_code_blocks.log(); static StatCounter num_jit_total_bytes("num_baselinejit_total_bytes"); @@ -116,6 +121,17 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) g.func_addr_registry.registerFunction(unique_name, code, code_size, NULL); } +JitCodeBlock::~JitCodeBlock() { + // we should not deregister the function in profiling mode because otherwise the profiler can't show it + if (!PROFILE) + g.func_addr_registry.deregisterFunction(a.getStartAddr()); + register_eh_info.deregisterFrame(); + + for (auto&& block : md->source->cfg->blocks) { + block_patch_locations.erase(block); + } +} + std::unique_ptr JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset, llvm::DenseSet known_non_null_vregs) { if (is_currently_writing || blocks_aborted.count(block)) @@ -137,7 +153,7 @@ std::unique_ptr JitCodeBlock::newFragment(CFGBlock* block, in std::unique_ptr rewrite = ic_info->startRewrite(""); return std::unique_ptr( - new JitFragmentWriter(block, std::move(ic_info), std::move(rewrite), fragment_offset, patch_jump_offset, + new JitFragmentWriter(md, block, std::move(ic_info), std::move(rewrite), fragment_offset, patch_jump_offset, a.getStartAddr(), *this, std::move(known_non_null_vregs))); } @@ -147,7 +163,7 @@ void JitCodeBlock::fragmentAbort(bool not_enough_space) { } void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping, void* next_fragment_start, - ICInfo& ic_info) { + std::vector>&& pp_ic_infos, ICInfo& ic_info) { assert(next_fragment_start == bytes_written + a.curInstPointer() - num_bytes_overlapping); a.setCurInstPointer((uint8_t*)next_fragment_start); @@ -155,14 +171,17 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping is_currently_writing = false; ic_info.appendDecrefInfosTo(decref_infos); + std::move(std::begin(pp_ic_infos), std::end(pp_ic_infos), std::back_inserter(this->pp_ic_infos)); + pp_ic_infos.clear(); } -JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr ic_info, +JitFragmentWriter::JitFragmentWriter(FunctionMetadata* md, CFGBlock* block, std::unique_ptr ic_info, std::unique_ptr rewrite, int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block, llvm::DenseSet known_non_null_vregs) : ICInfoManager(std::move(ic_info)), Rewriter(std::move(rewrite), 0, {}, /* needs_invalidation_support = */ false), + md(md), block(block), code_offset(code_offset), exit_info(), @@ -185,6 +204,13 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr ic addAction([=]() { vregs_array->bumpUse(); }, vregs_array, ActionType::NORMAL); if (LOG_BJIT_ASSEMBLY) comment("BJIT: JitFragmentWriter() end"); + + // this makes sure that we can't delete the code blocks while we are inside JitFragmentWriter + ++md->bjit_num_inside; +} + +JitFragmentWriter::~JitFragmentWriter() { + --md->bjit_num_inside; } RewriterVar* JitFragmentWriter::getInterp() { @@ -823,6 +849,7 @@ std::pair> JitFragmentWriter::finishCompilation() { block_patch_locations[side_exit_patch_location.first].push_back(patch_location); } + std::vector> ic_infos; for (auto&& pp_info : pp_infos) { SpillMap _spill_map; uint8_t* start_addr = pp_info.start_addr; @@ -837,7 +864,7 @@ std::pair> JitFragmentWriter::finishCompilation() { start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, LiveOutSet(), std::move(pp_info.decref_infos)); pp->associateNodeWithICInfo(pp_info.node); - pp.release(); + ic_infos.push_back(std::move(pp)); } #ifndef NDEBUG @@ -852,7 +879,8 @@ std::pair> JitFragmentWriter::finishCompilation() { ASSERT(assembler->curInstPointer() == (uint8_t*)exit_info.exit_start + exit_info.num_bytes, "Error! wrote more bytes out after the 'retq' that we thought was going to be the end of the assembly. " "We will end up overwriting those instructions."); - code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start, *ic_info); + code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start, + std::move(ic_infos), *ic_info); #if MOVING_GC // If JitFragmentWriter is destroyed, we don't necessarily want the ICInfo to be destroyed also, diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 09eee8346..8abaa88ca 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -168,6 +168,7 @@ class JitCodeBlock { uint8_t* get() { return addr; } }; + FunctionMetadata* md; // the memory block contains the EH frame directly followed by the generated machine code. MemoryManager memory; int entry_offset; @@ -178,15 +179,19 @@ class JitCodeBlock { // this allows us to deregister them when we release the code std::vector decref_infos; RegisterEHFrame register_eh_info; + std::vector> pp_ic_infos; + public: - JitCodeBlock(llvm::StringRef name); + JitCodeBlock(FunctionMetadata* md, llvm::StringRef name); + ~JitCodeBlock(); std::unique_ptr newFragment(CFGBlock* block, int patch_jump_offset, llvm::DenseSet known_non_null_vregs); bool shouldCreateNewBlock() const { return asm_failed || a.bytesLeft() < 128; } void fragmentAbort(bool not_enough_space); - void fragmentFinished(int bytes_witten, int num_bytes_overlapping, void* next_fragment_start, ICInfo& ic_info); + void fragmentFinished(int bytes_witten, int num_bytes_overlapping, void* next_fragment_start, + std::vector>&& pp_ic_infos, ICInfo& ic_info); }; // Hold the ICInfo of the JitFragmentWriter in a separate class from which JitFragmentWriter derives. @@ -209,6 +214,7 @@ class JitFragmentWriter : ICInfoManager, public Rewriter { static constexpr int min_patch_size = 13; + FunctionMetadata* md; CFGBlock* block; int code_offset; // offset inside the JitCodeBlock to the start of this block @@ -250,9 +256,10 @@ class JitFragmentWriter : ICInfoManager, public Rewriter { llvm::SmallVector pp_infos; public: - JitFragmentWriter(CFGBlock* block, std::unique_ptr ic_info, std::unique_ptr rewrite, - int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block, - llvm::DenseSet known_non_null_vregs); + JitFragmentWriter(FunctionMetadata* md, CFGBlock* block, std::unique_ptr ic_info, + std::unique_ptr rewrite, int code_offset, int num_bytes_overlapping, + void* entry_code, JitCodeBlock& code_block, llvm::DenseSet known_non_null_vregs); + ~JitFragmentWriter(); RewriterVar* getInterp(); RewriterVar* imm(uint64_t val); diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h index 3d3efb089..1ec58fa4e 100644 --- a/src/codegen/codegen.h +++ b/src/codegen/codegen.h @@ -50,6 +50,7 @@ class FunctionAddressRegistry { std::string getFuncNameAtAddress(void* addr, bool demangle, bool* out_success = NULL); llvm::Function* getLLVMFuncAtAddress(void* addr); void registerFunction(const std::string& name, void* addr, int length, llvm::Function* llvm_func); + void deregisterFunction(void* addr) { functions.erase(addr); } void dumpPerfMap(); }; diff --git a/src/codegen/irgen/hooks.cpp b/src/codegen/irgen/hooks.cpp index 014979b03..060506983 100644 --- a/src/codegen/irgen/hooks.cpp +++ b/src/codegen/irgen/hooks.cpp @@ -331,6 +331,10 @@ CompiledFunction* compileFunction(FunctionMetadata* f, FunctionSpecialization* s RELEASE_ASSERT(0, "%d", effort); } + // free the bjit code if this is not a OSR compilation + if (!entry_descriptor) + f->tryDeallocatingTheBJitCode(); + return cf; } @@ -841,4 +845,24 @@ void FunctionMetadata::addVersion(void* f, ConcreteCompilerType* rtn_type, FunctionSpecialization* spec = new FunctionSpecialization(processType(rtn_type), arg_types); addVersion(new CompiledFunction(this, spec, f, EffortLevel::MAXIMAL, exception_style, NULL)); } + +bool FunctionMetadata::tryDeallocatingTheBJitCode() { + // we can only delete the code object if we are not executing it currently + assert(bjit_num_inside >= 0); + if (bjit_num_inside != 0) { + // TODO: we could check later on again + static StatCounter num_baselinejit_blocks_failed_to_free("num_baselinejit_code_blocks_cant_free"); + num_baselinejit_blocks_failed_to_free.log(code_blocks.size()); + return false; + } + + static StatCounter num_baselinejit_blocks_freed("num_baselinejit_code_blocks_freed"); + num_baselinejit_blocks_freed.log(code_blocks.size()); + code_blocks.clear(); + for (CFGBlock* block : source->cfg->blocks) { + block->code = NULL; + block->entry_code = NULL; + } + return true; +} } diff --git a/src/core/cfg.h b/src/core/cfg.h index 8ab8f85c0..a398c2bac 100644 --- a/src/core/cfg.h +++ b/src/core/cfg.h @@ -67,7 +67,7 @@ template class DefaultedInt { class CFGBlock { public: - CFG* cfg; + CFG* const cfg; // Baseline JIT helper fields: // contains address to the start of the code of this basic block diff --git a/src/core/types.h b/src/core/types.h index cad5d1046..cf381856f 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -477,6 +477,7 @@ class FunctionMetadata { // For use by the interpreter/baseline jit: int times_interpreted; + long bjit_num_inside = 0; std::vector> code_blocks; ICInvalidator dependent_interp_callsites; @@ -530,6 +531,9 @@ class FunctionMetadata { ExceptionStyle exception_style = CXX) { return create(f, rtn_type, nargs, false, false, param_names, exception_style); } + + // tries to free the bjit allocated code. returns true on success + bool tryDeallocatingTheBJitCode(); };