-
Notifications
You must be signed in to change notification settings - Fork 756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
asm2wasm debuginfo #895
asm2wasm debuginfo #895
Changes from 4 commits
e5fe58e
4c1e369
7d800b2
3e75a78
f372841
85070ec
e7775bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
#include "ast_utils.h" | ||
#include "wasm-builder.h" | ||
#include "wasm-emscripten.h" | ||
#include "wasm-printing.h" | ||
#include "wasm-validator.h" | ||
#include "wasm-module-building.h" | ||
|
||
|
@@ -109,7 +110,8 @@ Name I32_CTTZ("i32_cttz"), | |
FTCALL("ftCall_"), | ||
MFTCALL("mftCall_"), | ||
MAX_("max"), | ||
MIN_("min"); | ||
MIN_("min"), | ||
EMSCRIPTEN_DEBUGINFO("emscripten_debuginfo"); | ||
|
||
// Utilities | ||
|
||
|
@@ -155,6 +157,16 @@ std::vector<Ref> AstStackHelper::astStack; | |
|
||
struct Asm2WasmPreProcessor { | ||
bool memoryGrowth = false; | ||
bool debugInfo = false; | ||
|
||
std::vector<std::string> debugInfoFileNames; | ||
std::unordered_map<std::string, Index> debugInfoFileIndices; | ||
|
||
char* allocatedCopy = nullptr; | ||
|
||
~Asm2WasmPreProcessor() { | ||
if (allocatedCopy) free(allocatedCopy); | ||
} | ||
|
||
char* process(char* input) { | ||
// emcc --separate-asm modules can look like | ||
|
@@ -206,6 +218,78 @@ struct Asm2WasmPreProcessor { | |
*marker = START_FUNCS[0]; | ||
} | ||
|
||
// handle debug info, if this build wants that. | ||
if (debugInfo) { | ||
// asm.js debug info comments look like | ||
// ..command..; //@line 4 "tests/hello_world.c" | ||
// we convert those into emscripten_debuginfo(file, line) | ||
// calls, where the params are indices into a mapping. then | ||
// the compiler and optimizer can operate on them. after | ||
// that, we can apply the debug info to the wasm node right | ||
// before it - this is guaranteed to be correct without opts, | ||
// and is usually decently accurate with them. | ||
auto size = strlen(input); | ||
auto upperBound = Index(size * 1.25) + 100; | ||
char* copy = allocatedCopy = (char*)malloc(upperBound); | ||
char* end = copy + upperBound; | ||
char* out = copy; | ||
std::string DEBUGINFO_INTRINSIC = EMSCRIPTEN_DEBUGINFO.str; | ||
auto DEBUGINFO_INTRINSIC_SIZE = DEBUGINFO_INTRINSIC.size(); | ||
bool seenUseAsm = false; | ||
while (input[0]) { | ||
if (out + 100 >= end) { | ||
Fatal() << "error in handling debug info"; | ||
} | ||
if (input[0] == '/' && input[1] == '/' && input[2] == '@' && input[3] == 'l' && input[4] == 'i' && input[5] == 'n' && input[6] == 'e') { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixing. I worry it will be less efficient, but I guess a modern compiler should be able to do this well... |
||
char* linePos = input + 8; | ||
char* lineEnd = strchr(input + 8, ' '); | ||
char* filePos = strchr(lineEnd, '"') + 1; | ||
char* fileEnd = strchr(filePos, '"'); | ||
input = fileEnd + 1; | ||
*lineEnd = 0; | ||
*fileEnd = 0; | ||
std::string line = linePos, file = filePos; | ||
auto iter = debugInfoFileIndices.find(file); | ||
if (iter == debugInfoFileIndices.end()) { | ||
Index index = debugInfoFileNames.size(); | ||
debugInfoFileNames.push_back(file); | ||
debugInfoFileIndices[file] = index; | ||
} | ||
std::string fileIndex = std::to_string(debugInfoFileIndices[file]); | ||
// write out the intrinsic | ||
strcpy(out, DEBUGINFO_INTRINSIC.c_str()); | ||
out += DEBUGINFO_INTRINSIC_SIZE; | ||
*out++ = '('; | ||
strcpy(out, fileIndex.c_str()); | ||
out += fileIndex.size(); | ||
*out++ = ','; | ||
strcpy(out, line.c_str()); | ||
out += line.size(); | ||
*out++ = ')'; | ||
*out++ = ';'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing to do here just a thought/comment/observation: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, might be a better way. The code is not expected to change much if at all, though, so I think it's maintainable as it is. If it were evolving code definitely it would need to be refactored. |
||
} else if (!seenUseAsm && input[0] == 'a' && input[1] == 's' && input[2] == 'm' && (input[3] == '"' || input[3] == '\'') && input[4] == ';') { | ||
// end of "use asm" or "almost asm" | ||
seenUseAsm = true; | ||
*out++ = *input++; | ||
*out++ = *input++; | ||
*out++ = *input++; | ||
*out++ = *input++; | ||
*out++ = *input++; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. memcpy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. |
||
// add a fake import for the intrinsic, so the module validates | ||
std::string import = "\n var emscripten_debuginfo = env.emscripten_debuginfo;"; | ||
strcpy(out, import.c_str()); | ||
out += import.size(); | ||
} else { | ||
*out++ = *input++; | ||
} | ||
} | ||
if (out >= end) { | ||
Fatal() << "error in handling debug info"; | ||
} | ||
*out = 0; | ||
input = copy; | ||
} | ||
|
||
return input; | ||
} | ||
}; | ||
|
@@ -237,7 +321,7 @@ class Asm2WasmBuilder { | |
// function table | ||
std::map<IString, int> functionTableStarts; // each asm function table gets a range in the one wasm table, starting at a location | ||
|
||
bool memoryGrowth; | ||
Asm2WasmPreProcessor& preprocessor; | ||
bool debug; | ||
bool imprecise; | ||
PassOptions passOptions; | ||
|
@@ -343,11 +427,11 @@ class Asm2WasmBuilder { | |
} | ||
|
||
public: | ||
Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) | ||
Asm2WasmBuilder(Module& wasm, Asm2WasmPreProcessor& preprocessor, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) | ||
: wasm(wasm), | ||
allocator(wasm.allocator), | ||
builder(wasm), | ||
memoryGrowth(memoryGrowth), | ||
preprocessor(preprocessor), | ||
debug(debug), | ||
imprecise(imprecise), | ||
passOptions(passOptions), | ||
|
@@ -565,6 +649,16 @@ class Asm2WasmBuilder { | |
} | ||
|
||
Function* processFunction(Ref ast); | ||
|
||
public: | ||
CallImport* isDebugInfo(Expression* curr) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, yeah. Changing to |
||
if (auto* call = curr->dynCast<CallImport>()) { | ||
if (call->target == EMSCRIPTEN_DEBUGINFO) { | ||
return call; | ||
} | ||
} | ||
return nullptr; | ||
} | ||
}; | ||
|
||
void Asm2WasmBuilder::processAsm(Ref ast) { | ||
|
@@ -1014,6 +1108,43 @@ void Asm2WasmBuilder::processAsm(Ref ast) { | |
} | ||
}; | ||
|
||
// apply debug info, reducing intrinsic calls into annotations on the ast nodes | ||
struct ApplyDebugInfo : public WalkerPass<PostWalker<ApplyDebugInfo, UnifiedExpressionVisitor<ApplyDebugInfo>>> { | ||
bool isFunctionParallel() override { return true; } | ||
|
||
Pass* create() override { return new ApplyDebugInfo(parent); } | ||
|
||
Asm2WasmBuilder* parent; | ||
|
||
ApplyDebugInfo(Asm2WasmBuilder* parent) : parent(parent) { | ||
name = "apply-debug-info"; | ||
} | ||
|
||
Expression* lastExpression = nullptr; | ||
|
||
void visitExpression(Expression* curr) { | ||
if (auto* call = parent->isDebugInfo(curr)) { | ||
// this is a debuginfo node. turn it into an annotation on the last stack | ||
auto* last = lastExpression; | ||
lastExpression = nullptr; | ||
auto& annotations = getFunction()->annotations; | ||
if (last) { | ||
auto fileIndex = call->operands[0]->cast<Const>()->value.geti32(); | ||
auto lineNumber = call->operands[1]->cast<Const>()->value.geti32(); | ||
annotations[last] = parent->preprocessor.debugInfoFileNames[fileIndex] + ":" + std::to_string(lineNumber); | ||
} | ||
// eliminate the debug info call | ||
ExpressionManipulator::nop(curr); | ||
return; | ||
} | ||
// ignore const nodes, as they may be the children of the debug info calls, and they | ||
// don't really need debug info anyhow | ||
if (!curr->is<Const>()) { | ||
lastExpression = curr; | ||
} | ||
} | ||
}; | ||
|
||
PassRunner passRunner(&wasm); | ||
if (debug) { | ||
passRunner.setDebug(true); | ||
|
@@ -1030,13 +1161,22 @@ void Asm2WasmBuilder::processAsm(Ref ast) { | |
passRunner.add("optimize-instructions"); | ||
passRunner.add("post-emscripten"); | ||
} | ||
if (preprocessor.debugInfo) { | ||
passRunner.add<ApplyDebugInfo>(this); | ||
passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing? | ||
} | ||
// make sure to not emit unreachable code at all, even in -O0, as wasm rules for it are complex | ||
// and changing. | ||
passRunner.add("dce"); | ||
passRunner.run(); | ||
|
||
// remove the debug info intrinsic | ||
if (preprocessor.debugInfo) { | ||
wasm.removeImport(EMSCRIPTEN_DEBUGINFO); | ||
} | ||
|
||
// apply memory growth, if relevant | ||
if (memoryGrowth) { | ||
if (preprocessor.memoryGrowth) { | ||
emscripten::generateMemoryGrowthFunction(wasm); | ||
wasm.memory.max = Memory::kMaxSize; | ||
} | ||
|
@@ -2273,6 +2413,27 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { | |
}; | ||
// body | ||
function->body = processStatements(body, start); | ||
// debug info cleanup: we add debug info calls after each instruction; as | ||
// a result, | ||
// return 0; //@line file.cpp | ||
// will have code after the return. if the function body is a block, | ||
// it will be forced to the return type of the function, and then | ||
// the unreachable type of the return makes things work, which we break | ||
// if we add a none debug intrinsic call afterwards. so we need to fix | ||
// that up. | ||
if (preprocessor.debugInfo) { | ||
if (function->result != none) { | ||
if (auto* block = function->body->dynCast<Block>()) { | ||
if (block->list.size() > 0) { | ||
if (isDebugInfo(block->list.back())) { | ||
// add an unreachable. both the debug info and it could be dce'd, | ||
// but it makes us validate properly. | ||
block->list.push_back(builder.makeUnreachable()); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
// cleanups/checks | ||
assert(breakStack.size() == 0 && continueStack.size() == 0); | ||
assert(parentLabel.isNull()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
#include <atomic> | ||
#include <cassert> | ||
#include <cstdlib> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems unused There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's needed for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, makes sense |
||
#include <memory> | ||
#include <mutex> | ||
#include <thread> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic numbers are magic :)
Does this 100 match the 100 a few lines down?
Would be good to replace these with named constants for clarity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, good point, fixing.