Skip to content

Commit

Permalink
new frame introspection using vregs for non compiler generated names
Browse files Browse the repository at this point in the history
This splits up the handling of deopts and normal frame introspection (e.g. for a traceback).
We have to add to nearly all call sites frame introspection which makes it very important that it does not introduce much overhead over a normal call instruction.
By always storing the user visible variables into a vregs array (layout the same as in the interpreter/bjit) we can make introspection cheaper.
Frame introspection only needs to access user facing variables therefore we don't have to generate extra bytes for spilling variables which get clobbered in the callee because all values we need to access are inside the vregs array.
This let's use remove the 95byte overhead and reduces the stackmap size.
It adds a slight cost of maintaining the vregs array but we were already doing some of this work before with our manual spilling with the additional benefit of faster frame introspection.

The deopts case stays pretty much the same with the exception that we don't add the user visible vars to the stackmap because they are already in the vreg.
We could reduce the overhead by implementing a special "deopt()" function in asm which stores and restores all variables thereby we would not have to manualy spill the registers when filling the deopt IC.
Alternatively we could handle it inside llvm by either switching to a stackmap intrinsic which already supports this case or adding it it does not exist...
But I think it's not worth it because deopts should be uncommen...
  • Loading branch information
undingen committed Nov 20, 2015
1 parent 1a07ff8 commit e218661
Show file tree
Hide file tree
Showing 13 changed files with 206 additions and 155 deletions.
53 changes: 25 additions & 28 deletions src/codegen/ast_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class ASTInterpreter {

Box** vregs;
ExcInfo last_exception;
BoxedClosure* passed_closure, *created_closure;
BoxedClosure* created_closure;
BoxedGenerator* generator;
unsigned edgecount;
FrameInfo frame_info;
Expand Down Expand Up @@ -174,7 +174,7 @@ class ASTInterpreter {

FunctionMetadata* getMD() { return md; }
FrameInfo* getFrameInfo() { return &frame_info; }
BoxedClosure* getPassedClosure() { return passed_closure; }
BoxedClosure* getPassedClosure() { return frame_info.passed_closure; }
Box** getVRegs() { return vregs; }
const ScopeInfo* getScopeInfo() { return scope_info; }

Expand Down Expand Up @@ -203,9 +203,9 @@ void ASTInterpreter::setGenerator(Box* gen) {
}

void ASTInterpreter::setPassedClosure(Box* closure) {
assert(!this->passed_closure); // This should only used for initialization
assert(closure->cls == closure_cls);
this->passed_closure = static_cast<BoxedClosure*>(closure);
assert(!frame_info.passed_closure); // This should only used for initialization
assert(!closure || closure->cls == closure_cls);
frame_info.passed_closure = static_cast<BoxedClosure*>(closure);
}

void ASTInterpreter::setCreatedClosure(Box* closure) {
Expand Down Expand Up @@ -236,7 +236,6 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
phis(NULL),
vregs(vregs),
last_exception(NULL, NULL, NULL),
passed_closure(0),
created_closure(0),
generator(0),
edgecount(0),
Expand All @@ -246,17 +245,18 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
should_jit(false) {

scope_info = source_info->getScopeInfo();
frame_info.vregs = vregs;

assert(scope_info);
}

void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _generator, Box* arg1, Box* arg2, Box* arg3,
Box** args) {
passed_closure = _closure;
setPassedClosure(_closure);
generator = _generator;

if (scope_info->createsClosure())
created_closure = createClosure(passed_closure, scope_info->getClosureSize());
created_closure = createClosure(_closure, scope_info->getClosureSize());

const ParamNames& param_names = md->param_names;

Expand Down Expand Up @@ -724,8 +724,8 @@ Box* ASTInterpreter::doOSR(AST_Jump* node) {
if (generator)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_GENERATOR_NAME)] = generator;

if (passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = passed_closure;
if (frame_info.passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = frame_info.passed_closure;

if (created_closure)
sorted_symbol_table[source_info->getInternedStrings().get(CREATED_CLOSURE_NAME)] = created_closure;
Expand Down Expand Up @@ -1038,9 +1038,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, created_closure));
} else {
assert(scope_info->passesThroughClosure());
closure = passed_closure;
closure = frame_info.passed_closure;
if (jit)
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, passed_closure));
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, frame_info.passed_closure));
}
assert(closure);
}
Expand Down Expand Up @@ -1105,7 +1105,7 @@ Value ASTInterpreter::visit_makeClass(AST_MakeClass* mkclass) {
BoxedClosure* closure = NULL;
if (scope_info->takesClosure()) {
if (this->scope_info->passesThroughClosure())
closure = passed_closure;
closure = getPassedClosure();
else
closure = created_closure;
assert(closure);
Expand Down Expand Up @@ -1633,8 +1633,8 @@ void ASTInterpreterJitInterface::delNameHelper(void* _interpreter, InternedStrin
Box* ASTInterpreterJitInterface::derefHelper(void* _interpreter, InternedString s) {
ASTInterpreter* interpreter = (ASTInterpreter*)_interpreter;
DerefInfo deref_info = interpreter->scope_info->getDerefInfo(s);
assert(interpreter->passed_closure);
BoxedClosure* closure = interpreter->passed_closure;
assert(interpreter->getPassedClosure());
BoxedClosure* closure = interpreter->getPassedClosure();
for (int i = 0; i < deref_info.num_parents_from_passed_closure; i++) {
closure = closure->parent;
}
Expand Down Expand Up @@ -1965,12 +1965,15 @@ FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr) {
return interpreter->getFrameInfo();
}

BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map) {
if (only_user_visible && (l.first.s()[0] == '!' || l.first.s()[0] == '#'))
continue;
Box** getVRegsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getVRegs();
}

BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map_user_visible) {
Box* val = vregs[l.second];
if (val) {
assert(gc::isValidGCObject(val));
Expand All @@ -1981,15 +1984,9 @@ BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visib
return rtn;
}

BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return localsForInterpretedFrame(interpreter->getVRegs(), interpreter->getMD()->source->cfg, only_user_visible);
}

BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr) {
BoxedDict* localsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
assert(interpreter);
return interpreter->getPassedClosure();
return localsForInterpretedFrame(interpreter->getVRegs(), interpreter->getMD()->source->cfg);
}
}
6 changes: 3 additions & 3 deletions src/codegen/ast_interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ Box* getGlobalsForInterpretedFrame(void* frame_ptr);
FunctionMetadata* getMDForInterpretedFrame(void* frame_ptr);
struct FrameInfo;
FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr);
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr);

BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg, bool only_user_visible);
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible);
Box** getVRegsForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg);
BoxedDict* localsForInterpretedFrame(void* frame_ptr);

// Executes the equivalent of CPython's PRINT_EXPR opcode (call sys.displayhook)
extern "C" void printExprHelper(Box* b);
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/baseline_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::

// make space for patchpoint
uint8_t* pp_start = rewrite->getSlotStart() + assembler->bytesWritten();
constexpr int call_size = 16;
constexpr int call_size = 13;
assembler->skipBytes(pp_size + call_size);
uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten();
assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end));
Expand Down
1 change: 1 addition & 0 deletions src/codegen/entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ void PystonObjectCache::calculateModuleHash(const llvm::Module* M, EffortLevel e
HashOStream hash_stream;
llvm::WriteBitcodeToFile(M, hash_stream);
hash_stream << (int)effort;
hash_stream << USE_REGALLOC_BASIC;
hash_before_codegen = hash_stream.getHash();
}

Expand Down
2 changes: 2 additions & 0 deletions src/codegen/irgen.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class IREmitter {
virtual void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val,
llvm::Value* exc_val, bool double_check = false) = 0;

virtual llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) = 0;

virtual Box* getIntConstant(int64_t n) = 0;
virtual Box* getFloatConstant(double d) = 0;
};
Expand Down
97 changes: 89 additions & 8 deletions src/codegen/irgen/irgenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ IRGenState::IRGenState(FunctionMetadata* md, CompiledFunction* cf, SourceInfo* s
frame_info(NULL),
frame_info_arg(NULL),
globals(NULL),
vregs(NULL),
scratch_size(0) {
assert(cf->func);
assert(!cf->md); // in this case don't need to pass in sourceinfo
Expand Down Expand Up @@ -143,7 +144,7 @@ static llvm::Value* getExcinfoGep(llvm::IRBuilder<true>& builder, llvm::Value* v
return builder.CreateConstInBoundsGEP2_32(v, 0, 0);
}

static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value* v) {
template <typename Builder> static llvm::Value* getFrameObjGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, exc) == 0, "");
static_assert(sizeof(ExcInfo) == 24, "");
static_assert(sizeof(Box*) == 8, "");
Expand All @@ -153,6 +154,16 @@ static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value*
// gep->accumulateConstantOffset(g.tm->getDataLayout(), ap_offset)
}

template <typename Builder> static llvm::Value* getPassedClosureGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, passed_closure) == 40, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 3);
}

template <typename Builder> static llvm::Value* getVRegsGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, vregs) == 48, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 4);
}

llvm::Value* IRGenState::getFrameInfoVar() {
/*
There is a matrix of possibilities here.
Expand Down Expand Up @@ -180,10 +191,6 @@ llvm::Value* IRGenState::getFrameInfoVar() {
if (entry_block.begin() != entry_block.end())
builder.SetInsertPoint(&entry_block, entry_block.getFirstInsertionPt());


llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());

if (entry_block.getTerminator())
builder.SetInsertPoint(entry_block.getTerminator());
else
Expand All @@ -194,13 +201,34 @@ llvm::Value* IRGenState::getFrameInfoVar() {

this->frame_info = frame_info_arg;

// use vrags array from the interpreter
vregs = builder.CreateLoad(getVRegsGep(builder, frame_info_arg));

if (getScopeInfo()->usesNameLookup()) {
// load frame_info.boxedLocals
this->boxed_locals = builder.CreateLoad(getBoxedLocalsGep(builder, this->frame_info));
}

} else {
// The "normal" case

assert(!vregs);
getMD()->calculateNumVRegs();
int num_user_visible_vregs = getMD()->source->cfg->sym_vreg_map_user_visible.size();
if (num_user_visible_vregs > 0) {
auto* vregs_alloca
= builder.CreateAlloca(g.llvm_value_type_ptr, getConstantInt(num_user_visible_vregs), "vregs");
// Clear the vregs array because 0 means undefined valued.
builder.CreateMemSet(vregs_alloca, getConstantInt(0, g.i8),
getConstantInt(num_user_visible_vregs * sizeof(Box*)),
vregs_alloca->getAlignment());
vregs = vregs_alloca;
} else
vregs = getNullPtr(g.llvm_value_type_ptr_ptr);

llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
assert(al->isStaticAlloca());

// frame_info.exc.type = NULL
llvm::Constant* null_value = getNullPtr(g.llvm_value_type_ptr);
llvm::Value* exc_info = getExcinfoGep(builder, al);
Expand All @@ -223,6 +251,11 @@ llvm::Value* IRGenState::getFrameInfoVar() {
= llvm::cast<llvm::StructType>(g.llvm_frame_info_type)->getElementType(2);
builder.CreateStore(getNullPtr(llvm_frame_obj_type_ptr), getFrameObjGep(builder, al));

// frame_info.passed_closure = NULL
builder.CreateStore(getNullPtr(g.llvm_closure_type_ptr), getPassedClosureGep(builder, al));
// set frame_info.vregs
builder.CreateStore(vregs, getVRegsGep(builder, al));

this->frame_info = al;
}
}
Expand All @@ -237,6 +270,15 @@ llvm::Value* IRGenState::getBoxedLocalsVar() {
return this->boxed_locals;
}

llvm::Value* IRGenState::getVRegsVar() {
if (!vregs) {
// calling this sets also the vregs member
getFrameInfoVar();
assert(vregs);
}
return vregs;
}

ScopeInfo* IRGenState::getScopeInfo() {
return getSourceInfo()->getScopeInfo();
}
Expand Down Expand Up @@ -477,6 +519,14 @@ class IREmitterImpl : public IREmitter {
return rtn.getInstruction();
}

llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) override {
ICSetupInfo* pp = createDeoptIC();
llvm::Value* v
= createIC(pp, (void*)pyston::deopt, { embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value },
UnwindInfo(current_stmt, NULL));
return getBuilder()->CreateIntToPtr(v, g.llvm_value_type_ptr);
}

void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val, llvm::Value* exc_val,
bool double_check = false) override {
assert(!double_check); // need to call PyErr_Occurred
Expand Down Expand Up @@ -625,8 +675,7 @@ class IRGeneratorImpl : public IRGenerator {

curblock = deopt_bb;
emitter.getBuilder()->SetInsertPoint(curblock);
llvm::Value* v = emitter.createCall2(UnwindInfo(current_statement, NULL), g.funcs.deopt,
embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value);
llvm::Value* v = emitter.createDeopt(current_statement, (AST_expr*)node, node_value);
emitter.getBuilder()->CreateRet(v);

curblock = success_bb;
Expand Down Expand Up @@ -1701,6 +1750,22 @@ class IRGeneratorImpl : public IRGenerator {
return rtn;
}

template <typename GetLLVMValCB> void _setVRegIfUserVisible(InternedString name, GetLLVMValCB get_llvm_val_cb) {
auto cfg = irstate->getSourceInfo()->cfg;
if (!cfg->hasVregsAssigned())
irstate->getMD()->calculateNumVRegs();
assert(cfg->sym_vreg_map.count(name));
int vreg = cfg->sym_vreg_map[name];
assert(vreg >= 0);

if (vreg < cfg->sym_vreg_map_user_visible.size()) {
// looks like this store don't have to be volatile because llvm knows that the vregs are visible thru the
// FrameInfo which escapes.
auto* gep = emitter.getBuilder()->CreateConstInBoundsGEP1_64(irstate->getVRegsVar(), vreg);
emitter.getBuilder()->CreateStore(get_llvm_val_cb(), gep);
}
}

// only updates symbol_table if we're *not* setting a global
void _doSet(InternedString name, CompilerVariable* val, const UnwindInfo& unw_info) {
assert(name.s() != "None");
Expand Down Expand Up @@ -1755,6 +1820,9 @@ class IRGeneratorImpl : public IRGenerator {
llvm::Value* gep = getClosureElementGep(emitter, closureValue, offset);
emitter.getBuilder()->CreateStore(val->makeConverted(emitter, UNKNOWN)->getValue(), gep);
}

auto&& get_llvm_val = [&]() { return val->makeConverted(emitter, UNKNOWN)->getValue(); };
_setVRegIfUserVisible(name, get_llvm_val);
}
}

Expand Down Expand Up @@ -1947,6 +2015,8 @@ class IRGeneratorImpl : public IRGenerator {
// SyntaxError: can not delete variable 'x' referenced in nested scope
assert(vst == ScopeInfo::VarScopeType::FAST);

_setVRegIfUserVisible(target->id, []() { return getNullPtr(g.llvm_value_type_ptr); });

if (symbol_table.count(target->id) == 0) {
llvm::CallSite call
= emitter.createCall(unw_info, g.funcs.assertNameDefined,
Expand Down Expand Up @@ -2546,14 +2616,20 @@ class IRGeneratorImpl : public IRGenerator {

pp->addFrameVar("!current_stmt", UNBOXED_INT);

if (ENABLE_FRAME_INTROSPECTION) {
// For deopts we need to add the compiler created names to the stackmap
if (ENABLE_FRAME_INTROSPECTION && pp->isDeopt()) {
// TODO: don't need to use a sorted symbol table if we're explicitly recording the names!
// nice for debugging though.
typedef std::pair<InternedString, CompilerVariable*> Entry;
std::vector<Entry> sorted_symbol_table(symbol_table.begin(), symbol_table.end());
std::sort(sorted_symbol_table.begin(), sorted_symbol_table.end(),
[](const Entry& lhs, const Entry& rhs) { return lhs.first < rhs.first; });
for (const auto& p : sorted_symbol_table) {
// We never have to include non compiler generated vars because the user visible variables are stored
// inside the vregs array.
if (!p.first.isCompilerCreatedName())
continue;

CompilerVariable* v = p.second;
v->serializeToFrame(stackmap_args);
pp->addFrameVar(p.first.s(), v->getType());
Expand Down Expand Up @@ -2687,6 +2763,11 @@ class IRGeneratorImpl : public IRGenerator {
passed_closure = AI;
symbol_table[internString(PASSED_CLOSURE_NAME)]
= new ConcreteCompilerVariable(getPassedClosureType(), AI, true);

// store the passed_closure inside the frame info so that frame introspection can access it without needing
// a stackmap entry
emitter.getBuilder()->CreateStore(passed_closure,
getPassedClosureGep(*emitter.getBuilder(), irstate->getFrameInfoVar()));
++AI;
}

Expand Down
Loading

0 comments on commit e218661

Please sign in to comment.