Skip to content

Commit

Permalink
asm2wasm debuginfo (#895)
Browse files Browse the repository at this point in the history
* parse file/line comments in asm.js into debug intrinsics

* convert debug intrinsics into annotations, and print them

* ignore --debuginfo if not emitting text, as wasm binaries don't support that yet

* emit full debug info when -g and emitting text; when -g and emitting binary, all we can do is the Names section

* update wasm.js
  • Loading branch information
kripken authored Feb 7, 2017
1 parent 985bcba commit 76c8f82
Show file tree
Hide file tree
Showing 17 changed files with 511 additions and 18 deletions.
2 changes: 2 additions & 0 deletions auto_update_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
cmd += ['-O0'] # test that -O0 does nothing
else:
cmd += ['-O']
if 'debugInfo' in asm:
cmd += ['-g']
if precise and opts:
# test mem init importing
open('a.mem', 'wb').write(asm)
Expand Down
21 changes: 11 additions & 10 deletions bin/wasm.js

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions build-js.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,13 @@ echo "building wasm.js"
"$EMSCRIPTEN/em++" \
$EMCC_ARGS \
src/wasm-js.cpp \
src/ast/ExpressionAnalyzer.cpp \
src/ast/ExpressionManipulator.cpp \
src/passes/pass.cpp \
src/passes/DeadCodeElimination.cpp \
src/passes/Print.cpp \
src/passes/LegalizeJSInterface.cpp \
src/passes/Vacuum.cpp \
src/emscripten-optimizer/parser.cpp \
src/emscripten-optimizer/simple_ast.cpp \
src/emscripten-optimizer/optimizer-shared.cpp \
Expand Down
2 changes: 2 additions & 0 deletions check.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
cmd += ['-O0'] # test that -O0 does nothing
else:
cmd += ['-O']
if 'debugInfo' in asm:
cmd += ['-g']
if precise and opts:
# test mem init importing
open('a.mem', 'wb').write(asm)
Expand Down
180 changes: 175 additions & 5 deletions src/asm2wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "ast_utils.h"
#include "wasm-builder.h"
#include "wasm-emscripten.h"
#include "wasm-printing.h"
#include "wasm-validator.h"
#include "wasm-module-building.h"

Expand Down Expand Up @@ -109,7 +110,8 @@ Name I32_CTTZ("i32_cttz"),
FTCALL("ftCall_"),
MFTCALL("mftCall_"),
MAX_("max"),
MIN_("min");
MIN_("min"),
EMSCRIPTEN_DEBUGINFO("emscripten_debuginfo");

// Utilities

Expand Down Expand Up @@ -148,13 +150,31 @@ struct AstStackHelper {

std::vector<Ref> AstStackHelper::astStack;

static bool startsWith(const char* string, const char *prefix) {
while (1) {
if (*prefix == 0) return true;
if (*string == 0) return false;
if (*string++ != *prefix++) return false;
}
};

//
// Asm2WasmPreProcessor - does some initial parsing/processing
// of asm.js code.
//

struct Asm2WasmPreProcessor {
bool memoryGrowth = false;
bool debugInfo = false;

std::vector<std::string> debugInfoFileNames;
std::unordered_map<std::string, Index> debugInfoFileIndices;

char* allocatedCopy = nullptr;

~Asm2WasmPreProcessor() {
if (allocatedCopy) free(allocatedCopy);
}

char* process(char* input) {
// emcc --separate-asm modules can look like
Expand Down Expand Up @@ -206,6 +226,79 @@ struct Asm2WasmPreProcessor {
*marker = START_FUNCS[0];
}

// handle debug info, if this build wants that.
if (debugInfo) {
// asm.js debug info comments look like
// ..command..; //@line 4 "tests/hello_world.c"
// we convert those into emscripten_debuginfo(file, line)
// calls, where the params are indices into a mapping. then
// the compiler and optimizer can operate on them. after
// that, we can apply the debug info to the wasm node right
// before it - this is guaranteed to be correct without opts,
// and is usually decently accurate with them.
const auto SCALE_FACTOR = 1.25; // an upper bound on how much more space we need as a multiple of the original
const auto ADD_FACTOR = 100; // an upper bound on how much we write for each debug info element itself
auto size = strlen(input);
auto upperBound = Index(size * SCALE_FACTOR) + ADD_FACTOR;
char* copy = allocatedCopy = (char*)malloc(upperBound);
char* end = copy + upperBound;
char* out = copy;
std::string DEBUGINFO_INTRINSIC = EMSCRIPTEN_DEBUGINFO.str;
auto DEBUGINFO_INTRINSIC_SIZE = DEBUGINFO_INTRINSIC.size();
bool seenUseAsm = false;
while (input[0]) {
if (out + ADD_FACTOR >= end) {
Fatal() << "error in handling debug info";
}
if (startsWith(input, "//@line")) {
char* linePos = input + 8;
char* lineEnd = strchr(input + 8, ' ');
char* filePos = strchr(lineEnd, '"') + 1;
char* fileEnd = strchr(filePos, '"');
input = fileEnd + 1;
*lineEnd = 0;
*fileEnd = 0;
std::string line = linePos, file = filePos;
auto iter = debugInfoFileIndices.find(file);
if (iter == debugInfoFileIndices.end()) {
Index index = debugInfoFileNames.size();
debugInfoFileNames.push_back(file);
debugInfoFileIndices[file] = index;
}
std::string fileIndex = std::to_string(debugInfoFileIndices[file]);
// write out the intrinsic
strcpy(out, DEBUGINFO_INTRINSIC.c_str());
out += DEBUGINFO_INTRINSIC_SIZE;
*out++ = '(';
strcpy(out, fileIndex.c_str());
out += fileIndex.size();
*out++ = ',';
strcpy(out, line.c_str());
out += line.size();
*out++ = ')';
*out++ = ';';
} else if (!seenUseAsm && (startsWith(input, "asm'") || startsWith(input, "asm\""))) {
// end of "use asm" or "almost asm"
const auto SKIP = 5; // skip the end of "use asm"; (5 chars, a,s,m," or ',;)
seenUseAsm = true;
memcpy(out, input, SKIP);
out += SKIP;
input += SKIP;
// add a fake import for the intrinsic, so the module validates
std::string import = "\n var emscripten_debuginfo = env.emscripten_debuginfo;";
strcpy(out, import.c_str());
out += import.size();
} else {
*out++ = *input++;
}
}
if (out >= end) {
Fatal() << "error in handling debug info";
}
*out = 0;
input = copy;
}

return input;
}
};
Expand Down Expand Up @@ -237,7 +330,7 @@ class Asm2WasmBuilder {
// function table
std::map<IString, int> functionTableStarts; // each asm function table gets a range in the one wasm table, starting at a location

bool memoryGrowth;
Asm2WasmPreProcessor& preprocessor;
bool debug;
bool imprecise;
PassOptions passOptions;
Expand Down Expand Up @@ -343,11 +436,11 @@ class Asm2WasmBuilder {
}

public:
Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly)
Asm2WasmBuilder(Module& wasm, Asm2WasmPreProcessor& preprocessor, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly)
: wasm(wasm),
allocator(wasm.allocator),
builder(wasm),
memoryGrowth(memoryGrowth),
preprocessor(preprocessor),
debug(debug),
imprecise(imprecise),
passOptions(passOptions),
Expand Down Expand Up @@ -565,6 +658,16 @@ class Asm2WasmBuilder {
}

Function* processFunction(Ref ast);

public:
CallImport* checkDebugInfo(Expression* curr) {
if (auto* call = curr->dynCast<CallImport>()) {
if (call->target == EMSCRIPTEN_DEBUGINFO) {
return call;
}
}
return nullptr;
}
};

void Asm2WasmBuilder::processAsm(Ref ast) {
Expand Down Expand Up @@ -1014,6 +1117,43 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
}
};

// apply debug info, reducing intrinsic calls into annotations on the ast nodes
struct ApplyDebugInfo : public WalkerPass<PostWalker<ApplyDebugInfo, UnifiedExpressionVisitor<ApplyDebugInfo>>> {
bool isFunctionParallel() override { return true; }

Pass* create() override { return new ApplyDebugInfo(parent); }

Asm2WasmBuilder* parent;

ApplyDebugInfo(Asm2WasmBuilder* parent) : parent(parent) {
name = "apply-debug-info";
}

Expression* lastExpression = nullptr;

void visitExpression(Expression* curr) {
if (auto* call = parent->checkDebugInfo(curr)) {
// this is a debuginfo node. turn it into an annotation on the last stack
auto* last = lastExpression;
lastExpression = nullptr;
auto& annotations = getFunction()->annotations;
if (last) {
auto fileIndex = call->operands[0]->cast<Const>()->value.geti32();
auto lineNumber = call->operands[1]->cast<Const>()->value.geti32();
annotations[last] = parent->preprocessor.debugInfoFileNames[fileIndex] + ":" + std::to_string(lineNumber);
}
// eliminate the debug info call
ExpressionManipulator::nop(curr);
return;
}
// ignore const nodes, as they may be the children of the debug info calls, and they
// don't really need debug info anyhow
if (!curr->is<Const>()) {
lastExpression = curr;
}
}
};

PassRunner passRunner(&wasm);
if (debug) {
passRunner.setDebug(true);
Expand All @@ -1030,13 +1170,22 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
passRunner.add("optimize-instructions");
passRunner.add("post-emscripten");
}
if (preprocessor.debugInfo) {
passRunner.add<ApplyDebugInfo>(this);
passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing?
}
// make sure to not emit unreachable code at all, even in -O0, as wasm rules for it are complex
// and changing.
passRunner.add("dce");
passRunner.run();

// remove the debug info intrinsic
if (preprocessor.debugInfo) {
wasm.removeImport(EMSCRIPTEN_DEBUGINFO);
}

// apply memory growth, if relevant
if (memoryGrowth) {
if (preprocessor.memoryGrowth) {
emscripten::generateMemoryGrowthFunction(wasm);
wasm.memory.max = Memory::kMaxSize;
}
Expand Down Expand Up @@ -2273,6 +2422,27 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) {
};
// body
function->body = processStatements(body, start);
// debug info cleanup: we add debug info calls after each instruction; as
// a result,
// return 0; //@line file.cpp
// will have code after the return. if the function body is a block,
// it will be forced to the return type of the function, and then
// the unreachable type of the return makes things work, which we break
// if we add a none debug intrinsic call afterwards. so we need to fix
// that up.
if (preprocessor.debugInfo) {
if (function->result != none) {
if (auto* block = function->body->dynCast<Block>()) {
if (block->list.size() > 0) {
if (checkDebugInfo(block->list.back())) {
// add an unreachable. both the debug info and it could be dce'd,
// but it makes us validate properly.
block->list.push_back(builder.makeUnreachable());
}
}
}
}
}
// cleanups/checks
assert(breakStack.size() == 0 && continueStack.size() == 0);
assert(parentLabel.isNull());
Expand Down
1 change: 1 addition & 0 deletions src/mixed_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <atomic>
#include <cassert>
#include <cstdlib>
#include <memory>
#include <mutex>
#include <thread>
Expand Down
13 changes: 13 additions & 0 deletions src/passes/Print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ struct PrintSExpression : public Visitor<PrintSExpression> {
}
}

void visit(Expression* curr) {
if (currFunction) {
// show an annotation, if there is one
auto& annotations = currFunction->annotations;
auto iter = annotations.find(curr);
if (iter != annotations.end()) {
o << ";; " << iter->second << '\n';
doIndent(o, indent);
}
}
Visitor<PrintSExpression>::visit(curr);
}

void setMinify(bool minify_) {
minify = minify_;
maybeSpace = minify ? "" : " ";
Expand Down
12 changes: 10 additions & 2 deletions src/tools/asm2wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ int main(int argc, const char *argv[]) {
[&wasmOnly](Options *o, const std::string &) {
wasmOnly = true;
})
.add("--debuginfo", "-g", "Emit names section and debug info",
.add("--debuginfo", "-g", "Emit names section and debug info (for debug info you must emit text, -S, for this to work)",
Options::Arguments::Zero,
[&](Options *o, const std::string &arguments) { debugInfo = true; })
.add("--symbolmap", "-s", "Emit a symbol map (indexes => names)",
Expand All @@ -99,6 +99,12 @@ int main(int argc, const char *argv[]) {
});
options.parse(argc, argv);

// finalize arguments
if (options.extra["output"].size() == 0) {
// when no output file is specified, we emit text to stdout
emitBinary = false;
}

const auto &tm_it = options.extra.find("total memory");
size_t totalMemory =
tm_it == options.extra.end() ? 16 * 1024 * 1024 : atoi(tm_it->second.c_str());
Expand All @@ -109,6 +115,8 @@ int main(int argc, const char *argv[]) {
}

Asm2WasmPreProcessor pre;
// wasm binaries can contain a names section, but not full debug info
pre.debugInfo = debugInfo && !emitBinary;
auto input(
read_file<std::vector<char>>(options.extra["infile"], Flags::Text, options.debug ? Flags::Debug : Flags::Release));
char *start = pre.process(input.data());
Expand All @@ -120,7 +128,7 @@ int main(int argc, const char *argv[]) {
if (options.debug) std::cerr << "wasming..." << std::endl;
Module wasm;
wasm.memory.initial = wasm.memory.max = totalMemory / Memory::kPageSize;
Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise, passOptions, runOptimizationPasses, wasmOnly);
Asm2WasmBuilder asm2wasm(wasm, pre, options.debug, imprecise, passOptions, runOptimizationPasses, wasmOnly);
asm2wasm.processAsm(asmjs);

// import mem init file, if provided
Expand Down
3 changes: 2 additions & 1 deletion src/wasm-js.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) {
prepare2wasm();

Asm2WasmPreProcessor pre;
pre.debugInfo = true; // FIXME: we must do this, as the input asm.js might have debug info
input = pre.process(input);

// proceed to parse and wasmify
Expand All @@ -79,7 +80,7 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) {
module->memory.max = pre.memoryGrowth ? Address(Memory::kMaxSize) : module->memory.initial;

if (wasmJSDebug) std::cerr << "wasming...\n";
asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug, false /* TODO: support imprecise? */, PassOptions(), false /* TODO: support optimizing? */, false /* TODO: support asm2wasm-i64? */);
asm2wasm = new Asm2WasmBuilder(*module, pre, debug, false /* TODO: support imprecise? */, PassOptions(), false /* TODO: support optimizing? */, false /* TODO: support asm2wasm-i64? */);
asm2wasm->processAsm(asmjs);
}

Expand Down
3 changes: 3 additions & 0 deletions src/wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,9 @@ class Function {
std::vector<Name> localNames;
std::map<Name, Index> localIndices;

// node annotations, printed alongside the node in the text format
std::unordered_map<Expression*, std::string> annotations;

Function() : result(none) {}

size_t getNumParams() {
Expand Down
Loading

0 comments on commit 76c8f82

Please sign in to comment.