-
Notifications
You must be signed in to change notification settings - Fork 756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
asm2wasm debuginfo #895
asm2wasm debuginfo #895
Changes from 6 commits
e5fe58e
4c1e369
7d800b2
3e75a78
f372841
85070ec
e7775bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
#include "ast_utils.h" | ||
#include "wasm-builder.h" | ||
#include "wasm-emscripten.h" | ||
#include "wasm-printing.h" | ||
#include "wasm-validator.h" | ||
#include "wasm-module-building.h" | ||
|
||
|
@@ -109,7 +110,8 @@ Name I32_CTTZ("i32_cttz"), | |
FTCALL("ftCall_"), | ||
MFTCALL("mftCall_"), | ||
MAX_("max"), | ||
MIN_("min"); | ||
MIN_("min"), | ||
EMSCRIPTEN_DEBUGINFO("emscripten_debuginfo"); | ||
|
||
// Utilities | ||
|
||
|
@@ -148,13 +150,30 @@ struct AstStackHelper { | |
|
||
std::vector<Ref> AstStackHelper::astStack; | ||
|
||
static bool startsWith(const char* string, const char *prefix) { | ||
while (1) { | ||
if (*prefix == 0) return true; | ||
if (*string++ != *prefix++) return false; | ||
} | ||
}; | ||
|
||
// | ||
// Asm2WasmPreProcessor - does some initial parsing/processing | ||
// of asm.js code. | ||
// | ||
|
||
struct Asm2WasmPreProcessor { | ||
bool memoryGrowth = false; | ||
bool debugInfo = false; | ||
|
||
std::vector<std::string> debugInfoFileNames; | ||
std::unordered_map<std::string, Index> debugInfoFileIndices; | ||
|
||
char* allocatedCopy = nullptr; | ||
|
||
~Asm2WasmPreProcessor() { | ||
if (allocatedCopy) free(allocatedCopy); | ||
} | ||
|
||
char* process(char* input) { | ||
// emcc --separate-asm modules can look like | ||
|
@@ -206,6 +225,78 @@ struct Asm2WasmPreProcessor { | |
*marker = START_FUNCS[0]; | ||
} | ||
|
||
// handle debug info, if this build wants that. | ||
if (debugInfo) { | ||
// asm.js debug info comments look like | ||
// ..command..; //@line 4 "tests/hello_world.c" | ||
// we convert those into emscripten_debuginfo(file, line) | ||
// calls, where the params are indices into a mapping. then | ||
// the compiler and optimizer can operate on them. after | ||
// that, we can apply the debug info to the wasm node right | ||
// before it - this is guaranteed to be correct without opts, | ||
// and is usually decently accurate with them. | ||
const auto SCALE_FACTOR = 1.25; // an upper bound on how much more space we need as a multiple of the original | ||
const auto ADD_FACTOR = 100; // an upper bound on how much we write for each debug info element itself | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These comments are pretty helpful, thanks! |
||
auto size = strlen(input); | ||
auto upperBound = Index(size * SCALE_FACTOR) + ADD_FACTOR; | ||
char* copy = allocatedCopy = (char*)malloc(upperBound); | ||
char* end = copy + upperBound; | ||
char* out = copy; | ||
std::string DEBUGINFO_INTRINSIC = EMSCRIPTEN_DEBUGINFO.str; | ||
auto DEBUGINFO_INTRINSIC_SIZE = DEBUGINFO_INTRINSIC.size(); | ||
bool seenUseAsm = false; | ||
while (input[0]) { | ||
if (out + ADD_FACTOR >= end) { | ||
Fatal() << "error in handling debug info"; | ||
} | ||
if (startsWith(input, "//@line")) { | ||
char* linePos = input + 8; | ||
char* lineEnd = strchr(input + 8, ' '); | ||
char* filePos = strchr(lineEnd, '"') + 1; | ||
char* fileEnd = strchr(filePos, '"'); | ||
input = fileEnd + 1; | ||
*lineEnd = 0; | ||
*fileEnd = 0; | ||
std::string line = linePos, file = filePos; | ||
auto iter = debugInfoFileIndices.find(file); | ||
if (iter == debugInfoFileIndices.end()) { | ||
Index index = debugInfoFileNames.size(); | ||
debugInfoFileNames.push_back(file); | ||
debugInfoFileIndices[file] = index; | ||
} | ||
std::string fileIndex = std::to_string(debugInfoFileIndices[file]); | ||
// write out the intrinsic | ||
strcpy(out, DEBUGINFO_INTRINSIC.c_str()); | ||
out += DEBUGINFO_INTRINSIC_SIZE; | ||
*out++ = '('; | ||
strcpy(out, fileIndex.c_str()); | ||
out += fileIndex.size(); | ||
*out++ = ','; | ||
strcpy(out, line.c_str()); | ||
out += line.size(); | ||
*out++ = ')'; | ||
*out++ = ';'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing to do here just a thought/comment/observation: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, might be a better way. The code is not expected to change much if at all, though, so I think it's maintainable as it is. If it were evolving code definitely it would need to be refactored. |
||
} else if (!seenUseAsm && (startsWith(input, "asm'") || startsWith(input, "asm\""))) { | ||
// end of "use asm" or "almost asm" | ||
seenUseAsm = true; | ||
memcpy(out, input, 5); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably worth pulling the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Heh, it's actually the semicolon at the end, |
||
out += 5; | ||
input += 5; | ||
// add a fake import for the intrinsic, so the module validates | ||
std::string import = "\n var emscripten_debuginfo = env.emscripten_debuginfo;"; | ||
strcpy(out, import.c_str()); | ||
out += import.size(); | ||
} else { | ||
*out++ = *input++; | ||
} | ||
} | ||
if (out >= end) { | ||
Fatal() << "error in handling debug info"; | ||
} | ||
*out = 0; | ||
input = copy; | ||
} | ||
|
||
return input; | ||
} | ||
}; | ||
|
@@ -237,7 +328,7 @@ class Asm2WasmBuilder { | |
// function table | ||
std::map<IString, int> functionTableStarts; // each asm function table gets a range in the one wasm table, starting at a location | ||
|
||
bool memoryGrowth; | ||
Asm2WasmPreProcessor& preprocessor; | ||
bool debug; | ||
bool imprecise; | ||
PassOptions passOptions; | ||
|
@@ -343,11 +434,11 @@ class Asm2WasmBuilder { | |
} | ||
|
||
public: | ||
Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) | ||
Asm2WasmBuilder(Module& wasm, Asm2WasmPreProcessor& preprocessor, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) | ||
: wasm(wasm), | ||
allocator(wasm.allocator), | ||
builder(wasm), | ||
memoryGrowth(memoryGrowth), | ||
preprocessor(preprocessor), | ||
debug(debug), | ||
imprecise(imprecise), | ||
passOptions(passOptions), | ||
|
@@ -565,6 +656,16 @@ class Asm2WasmBuilder { | |
} | ||
|
||
Function* processFunction(Ref ast); | ||
|
||
public: | ||
CallImport* checkDebugInfo(Expression* curr) { | ||
if (auto* call = curr->dynCast<CallImport>()) { | ||
if (call->target == EMSCRIPTEN_DEBUGINFO) { | ||
return call; | ||
} | ||
} | ||
return nullptr; | ||
} | ||
}; | ||
|
||
void Asm2WasmBuilder::processAsm(Ref ast) { | ||
|
@@ -1014,6 +1115,43 @@ void Asm2WasmBuilder::processAsm(Ref ast) { | |
} | ||
}; | ||
|
||
// apply debug info, reducing intrinsic calls into annotations on the ast nodes | ||
struct ApplyDebugInfo : public WalkerPass<PostWalker<ApplyDebugInfo, UnifiedExpressionVisitor<ApplyDebugInfo>>> { | ||
bool isFunctionParallel() override { return true; } | ||
|
||
Pass* create() override { return new ApplyDebugInfo(parent); } | ||
|
||
Asm2WasmBuilder* parent; | ||
|
||
ApplyDebugInfo(Asm2WasmBuilder* parent) : parent(parent) { | ||
name = "apply-debug-info"; | ||
} | ||
|
||
Expression* lastExpression = nullptr; | ||
|
||
void visitExpression(Expression* curr) { | ||
if (auto* call = parent->checkDebugInfo(curr)) { | ||
// this is a debuginfo node. turn it into an annotation on the last stack | ||
auto* last = lastExpression; | ||
lastExpression = nullptr; | ||
auto& annotations = getFunction()->annotations; | ||
if (last) { | ||
auto fileIndex = call->operands[0]->cast<Const>()->value.geti32(); | ||
auto lineNumber = call->operands[1]->cast<Const>()->value.geti32(); | ||
annotations[last] = parent->preprocessor.debugInfoFileNames[fileIndex] + ":" + std::to_string(lineNumber); | ||
} | ||
// eliminate the debug info call | ||
ExpressionManipulator::nop(curr); | ||
return; | ||
} | ||
// ignore const nodes, as they may be the children of the debug info calls, and they | ||
// don't really need debug info anyhow | ||
if (!curr->is<Const>()) { | ||
lastExpression = curr; | ||
} | ||
} | ||
}; | ||
|
||
PassRunner passRunner(&wasm); | ||
if (debug) { | ||
passRunner.setDebug(true); | ||
|
@@ -1030,13 +1168,22 @@ void Asm2WasmBuilder::processAsm(Ref ast) { | |
passRunner.add("optimize-instructions"); | ||
passRunner.add("post-emscripten"); | ||
} | ||
if (preprocessor.debugInfo) { | ||
passRunner.add<ApplyDebugInfo>(this); | ||
passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing? | ||
} | ||
// make sure to not emit unreachable code at all, even in -O0, as wasm rules for it are complex | ||
// and changing. | ||
passRunner.add("dce"); | ||
passRunner.run(); | ||
|
||
// remove the debug info intrinsic | ||
if (preprocessor.debugInfo) { | ||
wasm.removeImport(EMSCRIPTEN_DEBUGINFO); | ||
} | ||
|
||
// apply memory growth, if relevant | ||
if (memoryGrowth) { | ||
if (preprocessor.memoryGrowth) { | ||
emscripten::generateMemoryGrowthFunction(wasm); | ||
wasm.memory.max = Memory::kMaxSize; | ||
} | ||
|
@@ -2273,6 +2420,27 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { | |
}; | ||
// body | ||
function->body = processStatements(body, start); | ||
// debug info cleanup: we add debug info calls after each instruction; as | ||
// a result, | ||
// return 0; //@line file.cpp | ||
// will have code after the return. if the function body is a block, | ||
// it will be forced to the return type of the function, and then | ||
// the unreachable type of the return makes things work, which we break | ||
// if we add a none debug intrinsic call afterwards. so we need to fix | ||
// that up. | ||
if (preprocessor.debugInfo) { | ||
if (function->result != none) { | ||
if (auto* block = function->body->dynCast<Block>()) { | ||
if (block->list.size() > 0) { | ||
if (checkDebugInfo(block->list.back())) { | ||
// add an unreachable. both the debug info and it could be dce'd, | ||
// but it makes us validate properly. | ||
block->list.push_back(builder.makeUnreachable()); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
// cleanups/checks | ||
assert(breakStack.size() == 0 && continueStack.size() == 0); | ||
assert(parentLabel.isNull()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
#include <atomic> | ||
#include <cassert> | ||
#include <cstdlib> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems unused There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's needed for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, makes sense |
||
#include <memory> | ||
#include <mutex> | ||
#include <thread> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should also probably return false if
*string == 0
for completeness, e.g.startsWith("foo", "foobar")
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, fixing.