Skip to content

Commit

Permalink
new frame introspection using vregs for non compiler generated names
Browse files Browse the repository at this point in the history
This splits up the handling of deopts and normal frame introspection (e.g. for a traceback).
We have to add to nearly all call sites frame introspection which makes it very important that it does not introduce much overhead over a normal call instruction.
By always storing the user visible variables into a vregs array (layout the same as in the interpreter/bjit) we can make introspection cheaper.
Frame introspection only needs to access user facing variables therefore we don't have to generate extra bytes for spilling variables which get clobbered in the callee because all values we need to access are inside the vregs array.
This let's use remove the 95byte overhead and reduces the stackmap size.
It adds a slight cost of maintaining the vregs array but we were already doing some of this work before with our manual spilling with the additional benefit of faster frame introspection.

The deopts case stays pretty much the same with the exception that we don't add the user visible vars to the stackmap because they are already in the vreg.
We could reduce the overhead by implementing a special "deopt()" function in asm which stores and restores all variables thereby we would not have to manualy spill the registers when filling the deopt IC.
Alternatively we could handle it inside llvm by either switching to a stackmap intrinsic which already supports this case or adding it it does not exist...
But I think it's not worth it because deopts should be uncommen...
  • Loading branch information
undingen committed Nov 18, 2015
1 parent b1d9ab9 commit 029b567
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 96 deletions.
12 changes: 8 additions & 4 deletions src/codegen/ast_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
should_jit(false) {

scope_info = source_info->getScopeInfo();
frame_info.vregs = vregs;

assert(scope_info);
}
Expand Down Expand Up @@ -1965,12 +1966,15 @@ FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr) {
return interpreter->getFrameInfo();
}

Box** getVRegsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getVRegs();
}

BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map) {
if (only_user_visible && (l.first.s()[0] == '!' || l.first.s()[0] == '#'))
continue;

for (auto& l : only_user_visible ? cfg->sym_vreg_map_user_visible : cfg->sym_vreg_map) {
Box* val = vregs[l.second];
if (val) {
assert(gc::isValidGCObject(val));
Expand Down
1 change: 1 addition & 0 deletions src/codegen/ast_interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct FrameInfo;
FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr);
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr);

Box** getVRegsForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible);
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible);

Expand Down
2 changes: 1 addition & 1 deletion src/codegen/baseline_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::

// make space for patchpoint
uint8_t* pp_start = rewrite->getSlotStart() + assembler->bytesWritten();
constexpr int call_size = 16;
constexpr int call_size = 13;
assembler->skipBytes(pp_size + call_size);
uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten();
assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end));
Expand Down
2 changes: 2 additions & 0 deletions src/codegen/irgen.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class IREmitter {
virtual void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val,
llvm::Value* exc_val, bool double_check = false) = 0;

virtual llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) = 0;

virtual Box* getIntConstant(int64_t n) = 0;
virtual Box* getFloatConstant(double d) = 0;
};
Expand Down
85 changes: 78 additions & 7 deletions src/codegen/irgen/irgenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ IRGenState::IRGenState(FunctionMetadata* md, CompiledFunction* cf, SourceInfo* s
frame_info(NULL),
frame_info_arg(NULL),
globals(NULL),
vregs(NULL),
scratch_size(0) {
assert(cf->func);
assert(!cf->md); // in this case don't need to pass in sourceinfo
Expand Down Expand Up @@ -143,7 +144,7 @@ static llvm::Value* getExcinfoGep(llvm::IRBuilder<true>& builder, llvm::Value* v
return builder.CreateConstInBoundsGEP2_32(v, 0, 0);
}

static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value* v) {
template <typename Builder> static llvm::Value* getFrameObjGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, exc) == 0, "");
static_assert(sizeof(ExcInfo) == 24, "");
static_assert(sizeof(Box*) == 8, "");
Expand Down Expand Up @@ -180,10 +181,6 @@ llvm::Value* IRGenState::getFrameInfoVar() {
if (entry_block.begin() != entry_block.end())
builder.SetInsertPoint(&entry_block, entry_block.getFirstInsertionPt());


llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());

if (entry_block.getTerminator())
builder.SetInsertPoint(entry_block.getTerminator());
else
Expand All @@ -194,13 +191,26 @@ llvm::Value* IRGenState::getFrameInfoVar() {

this->frame_info = frame_info_arg;

// set vregs
vregs = builder.CreateLoad(builder.CreateConstInBoundsGEP2_32(frame_info_arg, 0, 3));

if (getScopeInfo()->usesNameLookup()) {
// load frame_info.boxedLocals
this->boxed_locals = builder.CreateLoad(getBoxedLocalsGep(builder, this->frame_info));
}
} else {
// The "normal" case

assert(!vregs);
getMD()->calculateNumVRegs();
int num_user_visible_vregs = getMD()->source->cfg->sym_vreg_map_user_visible.size();
vregs = builder.CreateAlloca(g.llvm_value_type_ptr, getConstantInt(num_user_visible_vregs), "vregs");
builder.CreateMemSet(vregs, getConstantInt(0, g.i8), getConstantInt(num_user_visible_vregs * sizeof(Box*)),
1);

llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());

// frame_info.exc.type = NULL
llvm::Constant* null_value = getNullPtr(g.llvm_value_type_ptr);
llvm::Value* exc_info = getExcinfoGep(builder, al);
Expand All @@ -223,6 +233,9 @@ llvm::Value* IRGenState::getFrameInfoVar() {
= llvm::cast<llvm::StructType>(g.llvm_frame_info_type)->getElementType(2);
builder.CreateStore(getNullPtr(llvm_frame_obj_type_ptr), getFrameObjGep(builder, al));

// set vregs
builder.CreateStore(vregs, builder.CreateConstInBoundsGEP2_32(al, 0, 3));

this->frame_info = al;
}
}
Expand All @@ -237,6 +250,15 @@ llvm::Value* IRGenState::getBoxedLocalsVar() {
return this->boxed_locals;
}

llvm::Value* IRGenState::getVRegsVar() {
if (!vregs) {
// calling this set's also the vregs member
getFrameInfoVar();
assert(vregs);
}
return vregs;
}

ScopeInfo* IRGenState::getScopeInfo() {
return getSourceInfo()->getScopeInfo();
}
Expand Down Expand Up @@ -477,6 +499,14 @@ class IREmitterImpl : public IREmitter {
return rtn.getInstruction();
}

llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) override {
ICSetupInfo* pp = createDeoptIC();
llvm::Value* v
= createIC(pp, (void*)pyston::deopt, { embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value },
UnwindInfo(current_stmt, NULL));
return getBuilder()->CreateIntToPtr(v, g.llvm_value_type_ptr);
}

void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val, llvm::Value* exc_val,
bool double_check = false) override {
assert(!double_check); // need to call PyErr_Occurred
Expand Down Expand Up @@ -529,6 +559,24 @@ const std::string PASSED_GENERATOR_NAME = "#passed_generator";
const std::string FRAME_INFO_PTR_NAME = "#frame_info_ptr";
const std::string PASSED_GLOBALS_NAME = "#passed_globals";

bool isNameRequiredInsideFrameStackmap(InternedString name, bool is_deopt) {
// We never have to include non compiler generated vars because the user visible variables are stored inside the
// vregs array.
if (!name.isCompilerCreatedName())
return false;

// Deoptimization points must include all compiler created vars.
if (is_deopt)
return true;

// For normal patchpoints we can ignore most compiler created names except the following ones which always have to
// be included if available.
// TODO: we should proberly just store them inside the vregs array or the frame info
llvm::StringRef s = name.s();
return s == CREATED_CLOSURE_NAME || s == PASSED_CLOSURE_NAME || s == PASSED_GENERATOR_NAME
|| s == FRAME_INFO_PTR_NAME || s == PASSED_GLOBALS_NAME;
}

bool isIsDefinedName(llvm::StringRef name) {
return startswith(name, "!is_defined_");
}
Expand Down Expand Up @@ -625,8 +673,8 @@ class IRGeneratorImpl : public IRGenerator {

curblock = deopt_bb;
emitter.getBuilder()->SetInsertPoint(curblock);
llvm::Value* v = emitter.createCall2(UnwindInfo(current_statement, NULL), g.funcs.deopt,
embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value);

llvm::Value* v = emitter.createDeopt(current_statement, (AST_expr*)node, node_value);
emitter.getBuilder()->CreateRet(v);

curblock = success_bb;
Expand Down Expand Up @@ -1701,6 +1749,20 @@ class IRGeneratorImpl : public IRGenerator {
return rtn;
}

void _setVRegIfNeeded(InternedString name, llvm::Value* val) {
irstate->getMD()->calculateNumVRegs();
assert(irstate->getSourceInfo()->cfg->sym_vreg_map.count(name));
int vreg = irstate->getSourceInfo()->cfg->sym_vreg_map[name];
assert(vreg >= 0);

if (vreg < irstate->getSourceInfo()->cfg->sym_vreg_map_user_visible.size()) {
// looks like this store don't have to be volatile because llvm knows that the vregs are visible thru the
// FrameInfo which escapes.
auto* gep = emitter.getBuilder()->CreateConstInBoundsGEP1_64(irstate->getVRegsVar(), vreg);
emitter.getBuilder()->CreateStore(val, gep);
}
}

// only updates symbol_table if we're *not* setting a global
void _doSet(InternedString name, CompilerVariable* val, const UnwindInfo& unw_info) {
assert(name.s() != "None");
Expand Down Expand Up @@ -1755,6 +1817,9 @@ class IRGeneratorImpl : public IRGenerator {
llvm::Value* gep = getClosureElementGep(emitter, closureValue, offset);
emitter.getBuilder()->CreateStore(val->makeConverted(emitter, UNKNOWN)->getValue(), gep);
}

auto* llvm_val = val->makeConverted(emitter, UNKNOWN)->getValue();
_setVRegIfNeeded(name, llvm_val);
}
}

Expand Down Expand Up @@ -1947,6 +2012,8 @@ class IRGeneratorImpl : public IRGenerator {
// SyntaxError: can not delete variable 'x' referenced in nested scope
assert(vst == ScopeInfo::VarScopeType::FAST);

_setVRegIfNeeded(target->id, getNullPtr(g.llvm_value_type_ptr));

if (symbol_table.count(target->id) == 0) {
llvm::CallSite call
= emitter.createCall(unw_info, g.funcs.assertNameDefined,
Expand Down Expand Up @@ -2553,7 +2620,11 @@ class IRGeneratorImpl : public IRGenerator {
std::vector<Entry> sorted_symbol_table(symbol_table.begin(), symbol_table.end());
std::sort(sorted_symbol_table.begin(), sorted_symbol_table.end(),
[](const Entry& lhs, const Entry& rhs) { return lhs.first < rhs.first; });
bool is_deopt = pp->isDeopt();
for (const auto& p : sorted_symbol_table) {
if (!isNameRequiredInsideFrameStackmap(p.first, is_deopt))
continue;

CompilerVariable* v = p.second;
v->serializeToFrame(stackmap_args);
pp->addFrameVar(p.first.s(), v->getType());
Expand Down
2 changes: 2 additions & 0 deletions src/codegen/irgen/irgenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class IRGenState {
llvm::Value* boxed_locals;
llvm::Value* frame_info_arg;
llvm::Value* globals;
llvm::Value* vregs;
int scratch_size;

public:
Expand All @@ -93,6 +94,7 @@ class IRGenState {
llvm::Value* getScratchSpace(int min_bytes);
llvm::Value* getFrameInfoVar();
llvm::Value* getBoxedLocalsVar();
llvm::Value* getVRegsVar();

ConcreteCompilerType* getReturnType() { return cf->getReturnType(); }

Expand Down
10 changes: 9 additions & 1 deletion src/codegen/patchpoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
}

int ICSetupInfo::totalSize() const {
if (isDeopt())
return DEOPT_CALL_ONLY_SIZE;

int call_size = CALL_ONLY_SIZE;
if (getCallingConvention() != llvm::CallingConv::C) {
// 14 bytes per reg that needs to be spilled
Expand Down Expand Up @@ -198,7 +201,8 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (spilled)
nspills++;
}
ASSERT(nspills <= MAX_FRAME_SPILLS, "did %d spills but expected only %d!", nspills, MAX_FRAME_SPILLS);
ASSERT(nspills <= pp->numFrameSpillsSupported(), "did %d spills but expected only %d!", nspills,
pp->numFrameSpillsSupported());

assert(scratch_size % sizeof(void*) == 0);
assert(scratch_rbp_offset % sizeof(void*) == 0);
Expand Down Expand Up @@ -351,4 +355,8 @@ ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(true, 2, 64, ICSetupInfo::Hasnext, type_recorder);
}

ICSetupInfo* createDeoptIC() {
return ICSetupInfo::initialize(true, 1, 0, ICSetupInfo::Deopt, NULL);
}

} // namespace pyston
9 changes: 8 additions & 1 deletion src/codegen/patchpoints.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class TypeRecorder;

static const int MAX_FRAME_SPILLS = 9; // TODO this shouldn't have to be larger than the set of non-callee-save args (9)
// except that will we currently spill the same reg multiple times
static const int CALL_ONLY_SIZE
static const int CALL_ONLY_SIZE = 13 + 1; // 13 for the call, + 1 if we want to nop/trap

static const int DEOPT_CALL_ONLY_SIZE
= 13 + (MAX_FRAME_SPILLS * 9)
+ 1; // 13 for the call, 9 bytes per spill (7 for GP, 9 for XMM), + 1 if we want to nop/trap

Expand All @@ -53,6 +55,7 @@ class ICSetupInfo {
Binexp,
Nonzero,
Hasnext,
Deopt,
};

private:
Expand All @@ -72,6 +75,7 @@ class ICSetupInfo {

int totalSize() const;
bool hasReturnValue() const { return has_return_value; }
bool isDeopt() const { return type == Deopt; }

llvm::CallingConv::ID getCallingConvention() const {
// FIXME: we currently have some issues with using PreserveAll (the rewriter currently
Expand Down Expand Up @@ -124,6 +128,8 @@ struct PatchpointInfo {

int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
bool isDeopt() const { return icinfo ? icinfo->isDeopt() : false; }
int numFrameSpillsSupported() const { return isDeopt() ? MAX_FRAME_SPILLS : 0; }

void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
Expand Down Expand Up @@ -164,6 +170,7 @@ ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder);
ICSetupInfo* createBinexpIC(TypeRecorder* type_recorder, ICInfo* bjit_ic_info);
ICSetupInfo* createNonzeroIC(TypeRecorder* type_recorder);
ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder);
ICSetupInfo* createDeoptIC();

} // namespace pyston

Expand Down
Loading

0 comments on commit 029b567

Please sign in to comment.