From f44912bf234d5e19a1adb34c770335f4b3190e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 6 Mar 2024 13:20:38 -0500 Subject: [PATCH] Add sourcemap support to wasm-metadce and wasm-merge (#6372) --- src/ir/debug.h | 4 ++ src/ir/module-utils.cpp | 60 +++++++++++++++++++++++++++-- src/ir/module-utils.h | 10 ++++- src/passes/Inlining.cpp | 4 +- src/tools/wasm-merge.cpp | 49 ++++++++++++++++++++++- src/tools/wasm-metadce.cpp | 33 +++++++++++++++- test/lit/help/wasm-merge.test | 12 ++++++ test/lit/help/wasm-metadce.test | 7 ++++ test/lit/merge/sourcemap.wat | 57 +++++++++++++++++++++++++++ test/lit/merge/sourcemap.wat.second | 8 ++++ test/lit/metadce/sourcemap.wat | 26 +++++++++++++ test/lit/metadce/sourcemap.wat.json | 13 +++++++ 12 files changed, 271 insertions(+), 12 deletions(-) create mode 100644 test/lit/merge/sourcemap.wat create mode 100644 test/lit/merge/sourcemap.wat.second create mode 100644 test/lit/metadce/sourcemap.wat create mode 100644 test/lit/metadce/sourcemap.wat.json diff --git a/src/ir/debug.h b/src/ir/debug.h index 6dfd379d6c6..04838137e63 100644 --- a/src/ir/debug.h +++ b/src/ir/debug.h @@ -27,6 +27,10 @@ inline void copyDebugInfo(Expression* origin, Expression* copy, Function* originFunc, Function* copyFunc) { + if (originFunc->debugLocations.empty()) { + return; // No debug info to copy + } + struct Lister : public PostWalker> { std::vector list; void visitExpression(Expression* curr) { list.push_back(curr); } diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index c2f48eec852..fdee4f86995 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -15,6 +15,7 @@ */ #include "module-utils.h" +#include "ir/debug.h" #include "ir/intrinsics.h" #include "ir/manipulation.h" #include "ir/properties.h" @@ -23,17 +24,46 @@ namespace wasm::ModuleUtils { +// Update the file name indices when moving a set of debug locations from one +// module to another. +static void updateLocationSet(std::set& locations, + std::vector& fileIndexMap) { + std::set updatedLocations; + + for (auto iter : locations) { + iter.fileIndex = fileIndexMap[iter.fileIndex]; + updatedLocations.insert(iter); + } + locations.clear(); + std::swap(locations, updatedLocations); +} + // Copies a function into a module. If newName is provided it is used as the -// name of the function (otherwise the original name is copied). -Function* copyFunction(Function* func, Module& out, Name newName) { +// name of the function (otherwise the original name is copied). If fileIndexMap +// is specified, it is used to rename source map filename indices when copying +// the function from one module to another one. +Function* copyFunction(Function* func, + Module& out, + Name newName, + std::optional> fileIndexMap) { auto ret = std::make_unique(); ret->name = newName.is() ? newName : func->name; ret->type = func->type; ret->vars = func->vars; ret->localNames = func->localNames; ret->localIndices = func->localIndices; - ret->debugLocations = func->debugLocations; ret->body = ExpressionManipulator::copy(func->body, out); + debug::copyDebugInfo(func->body, ret->body, func, ret.get()); + ret->prologLocation = func->prologLocation; + ret->epilogLocation = func->epilogLocation; + // Update file indices if needed + if (fileIndexMap) { + for (auto& iter : ret->debugLocations) { + iter.second.fileIndex = (*fileIndexMap)[iter.second.fileIndex]; + } + updateLocationSet(ret->prologLocation, *fileIndexMap); + updateLocationSet(ret->epilogLocation, *fileIndexMap); + } ret->module = func->module; ret->base = func->base; ret->noFullInline = func->noFullInline; @@ -136,8 +166,30 @@ DataSegment* copyDataSegment(const DataSegment* segment, Module& out) { // Copies named toplevel module items (things of kind ModuleItemKind). See // copyModule() for something that also copies exports, the start function, etc. void copyModuleItems(const Module& in, Module& out) { + // If the source module has some debug information, we first compute how + // to map file name indices from this modules to file name indices in + // the target module. + std::optional> fileIndexMap; + if (!in.debugInfoFileNames.empty()) { + std::unordered_map debugInfoFileIndices; + for (Index i = 0; i < out.debugInfoFileNames.size(); i++) { + debugInfoFileIndices[out.debugInfoFileNames[i]] = i; + } + fileIndexMap.emplace(); + for (Index i = 0; i < in.debugInfoFileNames.size(); i++) { + std::string file = in.debugInfoFileNames[i]; + auto iter = debugInfoFileIndices.find(file); + if (iter == debugInfoFileIndices.end()) { + Index index = out.debugInfoFileNames.size(); + out.debugInfoFileNames.push_back(file); + debugInfoFileIndices[file] = index; + } + fileIndexMap->push_back(debugInfoFileIndices[file]); + } + } + for (auto& curr : in.functions) { - copyFunction(curr.get(), out); + copyFunction(curr.get(), out, Name(), fileIndexMap); } for (auto& curr : in.globals) { copyGlobal(curr.get(), out); diff --git a/src/ir/module-utils.h b/src/ir/module-utils.h index d33a47673ea..eabcd036c2b 100644 --- a/src/ir/module-utils.h +++ b/src/ir/module-utils.h @@ -24,8 +24,14 @@ namespace wasm::ModuleUtils { // Copies a function into a module. If newName is provided it is used as the -// name of the function (otherwise the original name is copied). -Function* copyFunction(Function* func, Module& out, Name newName = Name()); +// name of the function (otherwise the original name is copied). If fileIndexMap +// is specified, it is used to rename source map filename indices when copying +// the function from one module to another one. +Function* +copyFunction(Function* func, + Module& out, + Name newName = Name(), + std::optional> fileIndexMap = std::nullopt); Global* copyGlobal(Global* global, Module& out); diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 3ae63face6c..51dddaaa9bf 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -456,9 +456,7 @@ static Expression* doInlining(Module* module, } // Generate and update the inlined contents auto* contents = ExpressionManipulator::copy(from->body, *module); - if (!from->debugLocations.empty()) { - debug::copyDebugInfo(from->body, contents, from, into); - } + debug::copyDebugInfo(from->body, contents, from, into); updater.walk(contents); block->list.push_back(contents); block->type = retType; diff --git a/src/tools/wasm-merge.cpp b/src/tools/wasm-merge.cpp index 0884350cb5b..ec67d56fc5a 100644 --- a/src/tools/wasm-merge.cpp +++ b/src/tools/wasm-merge.cpp @@ -452,6 +452,9 @@ int main(int argc, const char* argv[]) { std::vector inputFileNames; bool emitBinary = true; bool debugInfo = false; + std::map inputSourceMapFilenames; + std::string outputSourceMapFilename; + std::string outputSourceMapUrl; const std::string WasmMergeOption = "wasm-merge options"; @@ -464,7 +467,11 @@ For example, will read foo.wasm and bar.wasm, with names 'foo' and 'bar' respectively, so if the second imports from 'foo', we will see that as an import from the first module after the merge. The merged output will be written to merged.wasm. -Note that filenames and modules names are interleaved (which is hopefully less confusing).)"); +Note that filenames and modules names are interleaved (which is hopefully less confusing). + +Input source maps can be specified by adding an -ism option right after the module name: + + wasm-merge foo.wasm foo -ism foo.wasm.map ...)"); options .add("--output", @@ -485,6 +492,37 @@ Note that filenames and modules names are interleaved (which is hopefully less c inputFileNames.push_back(argument); } }) + .add("--input-source-map", + "-ism", + "Consume source maps from the specified files", + WasmMergeOption, + Options::Arguments::N, + [&](Options* o, const std::string& argument) { + size_t pos = inputFiles.size(); + if (pos == 0 || pos != inputFileNames.size() || + inputSourceMapFilenames.count(pos - 1)) { + std::cerr << "Option '-ism " << argument + << "' should be right after the module name\n"; + exit(EXIT_FAILURE); + } + inputSourceMapFilenames.insert({pos - 1, argument}); + }) + .add("--output-source-map", + "-osm", + "Emit source map to the specified file", + WasmMergeOption, + Options::Arguments::One, + [&outputSourceMapFilename](Options* o, const std::string& argument) { + outputSourceMapFilename = argument; + }) + .add("--output-source-map-url", + "-osu", + "Emit specified string as source map URL", + WasmMergeOption, + Options::Arguments::One, + [&outputSourceMapUrl](Options* o, const std::string& argument) { + outputSourceMapUrl = argument; + }) .add("--rename-export-conflicts", "-rec", "Rename exports to avoid conflicts (rather than error)", @@ -529,6 +567,9 @@ Note that filenames and modules names are interleaved (which is hopefully less c for (Index i = 0; i < inputFiles.size(); i++) { auto inputFile = inputFiles[i]; auto inputFileName = inputFileNames[i]; + auto iter = inputSourceMapFilenames.find(i); + auto inputSourceMapFilename = + (iter == inputSourceMapFilenames.end()) ? "" : iter->second; if (options.debug) { std::cerr << "reading input '" << inputFile << "' as '" << inputFileName @@ -550,7 +591,7 @@ Note that filenames and modules names are interleaved (which is hopefully less c ModuleReader reader; try { - reader.read(inputFile, *currModule); + reader.read(inputFile, *currModule, inputSourceMapFilename); } catch (ParseException& p) { p.dump(std::cerr); Fatal() << "error in parsing wasm input: " << inputFile; @@ -606,6 +647,10 @@ Note that filenames and modules names are interleaved (which is hopefully less c ModuleWriter writer; writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); + if (outputSourceMapFilename.size()) { + writer.setSourceMapFilename(outputSourceMapFilename); + writer.setSourceMapUrl(outputSourceMapUrl); + } writer.write(merged, options.extra["output"]); } } diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 5be60c2986d..cd8c8546a45 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -365,6 +365,9 @@ int main(int argc, const char* argv[]) { bool debugInfo = false; std::string graphFile; bool dump = false; + std::string inputSourceMapFilename; + std::string outputSourceMapFilename; + std::string outputSourceMapUrl; const std::string WasmMetaDCEOption = "wasm-opt options"; @@ -423,6 +426,30 @@ int main(int argc, const char* argv[]) { o->extra["output"] = argument; Colors::setEnabled(false); }) + .add("--input-source-map", + "-ism", + "Consume source map from the specified file", + WasmMetaDCEOption, + Options::Arguments::One, + [&inputSourceMapFilename](Options* o, const std::string& argument) { + inputSourceMapFilename = argument; + }) + .add("--output-source-map", + "-osm", + "Emit source map to the specified file", + WasmMetaDCEOption, + Options::Arguments::One, + [&outputSourceMapFilename](Options* o, const std::string& argument) { + outputSourceMapFilename = argument; + }) + .add("--output-source-map-url", + "-osu", + "Emit specified string as source map URL", + WasmMetaDCEOption, + Options::Arguments::One, + [&outputSourceMapUrl](Options* o, const std::string& argument) { + outputSourceMapUrl = argument; + }) .add("--emit-text", "-S", "Emit text instead of binary for the output file", @@ -470,7 +497,7 @@ int main(int argc, const char* argv[]) { ModuleReader reader; reader.setDWARF(debugInfo); try { - reader.read(options.extra["infile"], wasm); + reader.read(options.extra["infile"], wasm, inputSourceMapFilename); } catch (ParseException& p) { p.dump(std::cerr); Fatal() << "error in parsing wasm input"; @@ -578,6 +605,10 @@ int main(int argc, const char* argv[]) { ModuleWriter writer; writer.setBinary(emitBinary); writer.setDebugInfo(debugInfo); + if (outputSourceMapFilename.size()) { + writer.setSourceMapFilename(outputSourceMapFilename); + writer.setSourceMapUrl(outputSourceMapUrl); + } writer.write(wasm, options.extra["output"]); } diff --git a/test/lit/help/wasm-merge.test b/test/lit/help/wasm-merge.test index 50719f9882d..7d55074c38d 100644 --- a/test/lit/help/wasm-merge.test +++ b/test/lit/help/wasm-merge.test @@ -14,6 +14,11 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: Note that filenames and modules names are interleaved (which is hopefully less ;; CHECK-NEXT: confusing). +;; CHECK-NEXT: +;; CHECK-NEXT: Input source maps can be specified by adding an -ism option right after the +;; CHECK-NEXT: module name: +;; CHECK-NEXT: +;; CHECK-NEXT: wasm-merge foo.wasm foo -ism foo.wasm.map ... ;; CHECK-NEXT: ================================================================================ ;; CHECK-NEXT: ;; CHECK-NEXT: @@ -22,6 +27,13 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --output,-o Output file (stdout if not specified) ;; CHECK-NEXT: +;; CHECK-NEXT: --input-source-map,-ism Consume source maps from the specified +;; CHECK-NEXT: file +;; CHECK-NEXT: +;; CHECK-NEXT: --output-source-map,-osm Emit source map to the specified file +;; CHECK-NEXT: +;; CHECK-NEXT: --output-source-map-url,-osu Emit specified string as source map URL +;; CHECK-NEXT: ;; CHECK-NEXT: --rename-export-conflicts,-rec Rename exports to avoid conflicts (rather ;; CHECK-NEXT: than error) ;; CHECK-NEXT: diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test index 4334dbd0166..4d706dc0f1d 100644 --- a/test/lit/help/wasm-metadce.test +++ b/test/lit/help/wasm-metadce.test @@ -53,6 +53,13 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --output,-o Output file (stdout if not specified) ;; CHECK-NEXT: +;; CHECK-NEXT: --input-source-map,-ism Consume source map from the specified +;; CHECK-NEXT: file +;; CHECK-NEXT: +;; CHECK-NEXT: --output-source-map,-osm Emit source map to the specified file +;; CHECK-NEXT: +;; CHECK-NEXT: --output-source-map-url,-osu Emit specified string as source map URL +;; CHECK-NEXT: ;; CHECK-NEXT: --emit-text,-S Emit text instead of binary for the ;; CHECK-NEXT: output file ;; CHECK-NEXT: diff --git a/test/lit/merge/sourcemap.wat b/test/lit/merge/sourcemap.wat new file mode 100644 index 00000000000..7457d4307c4 --- /dev/null +++ b/test/lit/merge/sourcemap.wat @@ -0,0 +1,57 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-merge %s first %s.second second -S -o - | filecheck %s --check-prefix=CHECK-TEXT +;; RUN: wasm-as %s -o %t.wasm --source-map %t.map +;; RUN: wasm-as %s.second -o %t.second.wasm --source-map %t.second.map +;; RUN: wasm-merge %t.wasm first --input-source-map %t.map %t.second.wasm second --input-source-map %t.second.map -o %t.merged.wasm --output-source-map %t.merged.map +;; RUN: wasm-dis %t.merged.wasm --source-map %t.merged.map -o - | filecheck %s --check-prefix=CHECK-BIN + +;; Test that sourcemap information is preserved + +(module + ;;@ a:1:1 + (func (export "f") + ;;@ a:2:1 + (nop) + ;;@ a:3:1 + ) +) +;; CHECK-TEXT: (type $0 (func)) + +;; CHECK-TEXT: (export "f" (func $0)) + +;; CHECK-TEXT: (export "g" (func $0_1)) + +;; CHECK-TEXT: ;;@ a:1:1 +;; CHECK-TEXT-NEXT: (func $0 +;; CHECK-TEXT-NEXT: ;;@ a:2:1 +;; CHECK-TEXT-NEXT: (nop) +;; CHECK-TEXT-NEXT: ;;@ a:3:1 +;; CHECK-TEXT-NEXT: ) + +;; CHECK-TEXT: ;;@ b:1:2 +;; CHECK-TEXT-NEXT: (func $0_1 +;; CHECK-TEXT-NEXT: ;;@ b:2:2 +;; CHECK-TEXT-NEXT: (nop) +;; CHECK-TEXT-NEXT: ;;@ b:3:2 +;; CHECK-TEXT-NEXT: ) + +;; CHECK-BIN: (type $0 (func)) + +;; CHECK-BIN: (export "f" (func $0)) + +;; CHECK-BIN: (export "g" (func $1)) + +;; CHECK-BIN: ;;@ a:1:1 +;; CHECK-BIN-NEXT: (func $0 +;; CHECK-BIN-NEXT: ;;@ a:2:1 +;; CHECK-BIN-NEXT: (nop) +;; CHECK-BIN-NEXT: ;;@ a:3:1 +;; CHECK-BIN-NEXT: ) + +;; CHECK-BIN: ;;@ b:1:2 +;; CHECK-BIN-NEXT: (func $1 +;; CHECK-BIN-NEXT: ;;@ b:2:2 +;; CHECK-BIN-NEXT: (nop) +;; CHECK-BIN-NEXT: ;;@ b:3:2 +;; CHECK-BIN-NEXT: ) diff --git a/test/lit/merge/sourcemap.wat.second b/test/lit/merge/sourcemap.wat.second new file mode 100644 index 00000000000..0ea7c75fa03 --- /dev/null +++ b/test/lit/merge/sourcemap.wat.second @@ -0,0 +1,8 @@ +(module + ;;@ b:1:2 + (func (export "g") + ;;@ b:2:2 + (nop) + ;;@ b:3:2 + ) +) diff --git a/test/lit/metadce/sourcemap.wat b/test/lit/metadce/sourcemap.wat new file mode 100644 index 00000000000..bf695fd2139 --- /dev/null +++ b/test/lit/metadce/sourcemap.wat @@ -0,0 +1,26 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-metadce %s --graph-file %s.json -S -o - | filecheck %s +;; RUN: wasm-as %s -o %t.wasm --source-map %t.map +;; RUN: wasm-metadce %t.wasm --input-source-map %t.map --graph-file %s.json -o %t.out.wasm --output-source-map %t.out.map +;; RUN: wasm-dis %t.out.wasm --source-map %t.out.map -o - | filecheck %s + +;; Test that sourcemap information is preserved + +(module + ;;@ a:1:1 + (func (export "f") + ;;@ a:2:1 + (nop) + ;;@ a:3:1 + ) +) +;; CHECK: (type $0 (func)) + +;; CHECK: (export "f" (func $0)) + +;; CHECK: ;;@ a:1:1 +;; CHECK-NEXT: (func $0 +;; CHECK-NEXT: ;;@ a:2:1 +;; CHECK-NEXT: (nop) +;; CHECK-NEXT: ;;@ a:3:1 +;; CHECK-NEXT: ) diff --git a/test/lit/metadce/sourcemap.wat.json b/test/lit/metadce/sourcemap.wat.json new file mode 100644 index 00000000000..4b2028140e7 --- /dev/null +++ b/test/lit/metadce/sourcemap.wat.json @@ -0,0 +1,13 @@ +[ + { + "name": "root", + "reaches": [ + "f" + ], + "root": true + }, + { + "name": "f", + "export": "f" + } +]