From ec75c3a962078b8bf034a874b350c1d977214217 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 24 Oct 2023 14:23:39 -0400 Subject: [PATCH] Compile with WASI --- .github/workflows/javascript-bindings.yml | 48 ++++ .gitignore | 4 + Makefile | 6 + docs/configuration.md | 2 + javascript/package.json | 13 + javascript/src/index.js | 57 ++++ javascript/test.js | 78 ++++++ rakelib/check_manifest.rake | 1 + templates/javascript/src/deserialize.js.erb | 294 ++++++++++++++++++++ templates/javascript/src/nodes.js.erb | 109 ++++++++ templates/src/serialize.c.erb | 4 +- templates/template.rb | 2 + 12 files changed, 616 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/javascript-bindings.yml create mode 100644 javascript/package.json create mode 100644 javascript/src/index.js create mode 100644 javascript/test.js create mode 100644 templates/javascript/src/deserialize.js.erb create mode 100644 templates/javascript/src/nodes.js.erb diff --git a/.github/workflows/javascript-bindings.yml b/.github/workflows/javascript-bindings.yml new file mode 100644 index 00000000000..c381b4d5c6a --- /dev/null +++ b/.github/workflows/javascript-bindings.yml @@ -0,0 +1,48 @@ +name: JavaScript Bindings + +on: + push: + paths: + - ".github/workflows/javascript-bindings.yml" + - "include/" + - "src/" + - "*akefile*" + branches: + - main + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: head + bundler-cache: true + + - name: rake templates + run: bundle exec rake templates + + - name: Set up WASI-SDK + run: | + wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-20/wasi-sdk-20.0-linux.tar.gz + tar xvf wasi-sdk-20.0-linux.tar.gz + + - name: Build the project + run: make wasm WASI_SDK_PATH=$(pwd)/wasi-sdk-20.0 + + - uses: actions/setup-node@v3 + with: + node-version: 20.x + + - name: Run the tests + run: npm test + working-directory: javascript + + - uses: actions/upload-artifact@v3 + with: + name: prism.wasm + path: javascript/src/prism.wasm diff --git a/.gitignore b/.gitignore index a98181371e6..79331e1c4cd 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,10 @@ a.out /ext/prism/api_node.c /fuzz/output/ /include/prism/ast.h +/javascript/src/deserialize.js +/javascript/src/nodes.js +/javascript/src/prism.wasm +/javascript/types/ /java/org/prism/AbstractNodeVisitor.java /java/org/prism/Loader.java /java/org/prism/Nodes.java diff --git a/Makefile b/Makefile index 8e1c4b20d47..dd08ce3b616 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ SOEXT := $(shell ruby -e 'puts RbConfig::CONFIG["SOEXT"]') CPPFLAGS := -Iinclude CFLAGS := -g -O2 -std=c99 -Wall -Werror -Wextra -Wpedantic -Wundef -Wconversion -fPIC -fvisibility=hidden CC := cc +WASI_SDK_PATH := /opt/wasi-sdk HEADERS := $(shell find include -name '*.h') SOURCES := $(shell find src -name '*.c') @@ -23,6 +24,7 @@ all: shared static shared: build/librubyparser.$(SOEXT) static: build/librubyparser.a +wasm: javascript/src/prism.wasm build/librubyparser.$(SOEXT): $(SHARED_OBJECTS) $(ECHO) "linking $@" @@ -32,6 +34,10 @@ build/librubyparser.a: $(STATIC_OBJECTS) $(ECHO) "building $@" $(Q) $(AR) $(ARFLAGS) $@ $(STATIC_OBJECTS) $(Q1:0=>/dev/null) +javascript/src/prism.wasm: Makefile $(SOURCES) $(HEADERS) + $(ECHO) "building $@" + $(Q) $(WASI_SDK_PATH)/bin/clang --sysroot=$(WASI_SDK_PATH)/share/wasi-sysroot/ $(DEBUG_FLAGS) -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN -lwasi-emulated-mman $(CPPFLAGS) $(CFLAGS) -Wl,--export-all -Wl,--no-entry -mexec-model=reactor -o $@ $(SOURCES) + build/shared/%.o: src/%.c Makefile $(HEADERS) $(ECHO) "compiling $@" $(Q) mkdir -p $(@D) diff --git a/docs/configuration.md b/docs/configuration.md index fe072b8dfa1..5b5744fc56b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -4,6 +4,8 @@ A lot of code in prism's repository is templated from a single configuration fil * `ext/prism/api_node.c` - for defining how to build Ruby objects for the nodes out of C structs * `include/prism/ast.h` - for defining the C structs that represent the nodes +* `javascript/src/deserialize.js` - for defining how to deserialize the nodes in JavaScript +* `javascript/src/nodes.js` - for defining the nodes in JavaScript * `java/org/prism/AbstractNodeVisitor.java` - for defining the visitor interface for the nodes in Java * `java/org/prism/Loader.java` - for defining how to deserialize the nodes in Java * `java/org/prism/Nodes.java` - for defining the nodes in Java diff --git a/javascript/package.json b/javascript/package.json new file mode 100644 index 00000000000..4f12a698f13 --- /dev/null +++ b/javascript/package.json @@ -0,0 +1,13 @@ +{ + "name": "ruby-prism", + "version": "0.15.1", + "description": "Prism Ruby parser", + "type": "module", + "main": "src/index.js", + "scripts": { + "test": "node test.js", + "type": "tsc --allowJs -d --emitDeclarationOnly --outDir types src/index.js" + }, + "author": "Shopify ", + "license": "MIT" +} diff --git a/javascript/src/index.js b/javascript/src/index.js new file mode 100644 index 00000000000..ee49b5103f4 --- /dev/null +++ b/javascript/src/index.js @@ -0,0 +1,57 @@ +import { readFile } from "node:fs/promises"; +import url from "node:url"; +import { WASI } from "node:wasi"; +import { ParseResult, deserialize } from "./deserialize.js"; + +/** + * The exports of the Prism wasm module. + * + * @type {WebAssembly.Exports | null} + */ +let prism = null; + +/** + * Load the Prism wasm module. + * + * @returns {Promise} + */ +async function loadPrism() { + const bytes = await readFile(url.fileURLToPath(new URL("prism.wasm", import.meta.url))); + const wasm = await WebAssembly.compile(bytes); + + const wasi = new WASI({ version: "preview1" }); + const instance = await WebAssembly.instantiate(wasm, wasi.getImportObject()); + wasi.initialize(instance); + + return instance.exports; +} + +/** + * Parse the given source code. + * + * @param {string} source + * @returns {Promise} + */ +export async function parse(source) { + if (prism === null) { + prism = await loadPrism(); + } + + const sourceArray = new TextEncoder().encode(source); + const sourcePointer = prism.calloc(1, sourceArray.length); + + const bufferPointer = prism.calloc(prism.pm_buffer_sizeof(), 1); + prism.pm_buffer_init(bufferPointer); + + const sourceView = new Uint8Array(prism.memory.buffer, sourcePointer, sourceArray.length); + sourceView.set(sourceArray); + + prism.pm_parse_serialize(sourcePointer, sourceArray.length, bufferPointer); + const serializedView = new Uint8Array(prism.memory.buffer, prism.pm_buffer_value(bufferPointer), prism.pm_buffer_length(bufferPointer)); + const result = deserialize(sourceArray, serializedView); + + prism.pm_buffer_free(bufferPointer); + prism.free(sourcePointer); + prism.free(bufferPointer); + return result; +} diff --git a/javascript/test.js b/javascript/test.js new file mode 100644 index 00000000000..e6d2e7f7be3 --- /dev/null +++ b/javascript/test.js @@ -0,0 +1,78 @@ +import test from "node:test"; +import assert from "node:assert"; +import { parse } from "./src/index.js"; +import * as nodes from "./src/nodes.js"; + +test("node", async () => { + const result = await parse("foo"); + assert(result.value instanceof nodes.ProgramNode); +}); + +test("node? present", async () => { + const result = await parse("foo.bar"); + assert(result.value.statements.body[0].receiver instanceof nodes.CallNode); +}); + +test("node? absent", async () => { + const result = await parse("foo"); + assert(result.value.statements.body[0].receiver === null); +}); + +test("node[]", async () => { + const result = await parse("foo.bar"); + assert(result.value.statements.body instanceof Array); +}); + +test("string", async () => { + const result = await parse('"foo"'); + assert(result.value.statements.body[0].unescaped === "foo"); +}); + +test("constant", async () => { + const result = await parse("foo = 1"); + assert(result.value.locals[0] === "foo"); +}); + +test("constant? present", async () => { + const result = await parse("def foo(*bar); end"); + assert(result.value.statements.body[0].parameters.rest.name === "bar"); +}); + +test("constant? absent", async () => { + const result = await parse("def foo(*); end"); + assert(result.value.statements.body[0].parameters.rest.name === null); +}); + +test("constant[]", async() => { + const result = await parse("foo = 1"); + assert(result.value.locals instanceof Array); +}); + +test("location", async () => { + const result = await parse("foo = 1"); + assert(typeof result.value.location.startOffset === "number"); +}); + +test("location? present", async () => { + const result = await parse("def foo = bar"); + assert(result.value.statements.body[0].equalLoc !== null); +}); + +test("location? absent", async () => { + const result = await parse("def foo; bar; end"); + assert(result.value.statements.body[0].equalLoc === null); +}); + +test("uint32", async () => { + const result = await parse("foo = 1"); + assert(result.value.statements.body[0].depth === 0); +}); + +test("flags", async () => { + const result = await parse("/foo/mi"); + const regexp = result.value.statements.body[0]; + + assert(regexp.isIgnoreCase()); + assert(regexp.isMultiLine()); + assert(!regexp.isExtended()); +}); diff --git a/rakelib/check_manifest.rake b/rakelib/check_manifest.rake index 1f62c0957fb..531e4c6894b 100644 --- a/rakelib/check_manifest.rake +++ b/rakelib/check_manifest.rake @@ -15,6 +15,7 @@ task :check_manifest => [:templates] do build doc fuzz + javascript java pkg rakelib diff --git a/templates/javascript/src/deserialize.js.erb b/templates/javascript/src/deserialize.js.erb new file mode 100644 index 00000000000..5dbfcf78ed3 --- /dev/null +++ b/templates/javascript/src/deserialize.js.erb @@ -0,0 +1,294 @@ +import * as nodes from "./nodes.js"; + +const MAJOR_VERSION = 0; +const MINOR_VERSION = 15; +const PATCH_VERSION = 1; + +class SerializationBuffer { + constructor(source, array) { + this.source = source; + this.array = array; + this.index = 0; + } + + readByte() { + const result = this.array[this.index]; + this.index += 1; + return result; + } + + readBytes(length) { + const result = this.array.slice(this.index, this.index + length); + this.index += length; + return result; + } + + readString(length) { + return new TextDecoder().decode(this.readBytes(length)); + } + + // Read a 32-bit unsigned integer in little-endian format. + readUint32() { + const result = this.scanUint32(this.index); + this.index += 4; + return result; + } + + scanUint32(offset) { + const bytes = this.array.slice(offset, offset + 4); + return bytes[0] | (bytes[1] << 8) | (bytes[2] << 16) | (bytes[3] << 24); + } + + readVarInt() { + let result = 0; + let shift = 0; + + while (true) { + const byte = this.readByte(); + result += (byte & 0x7f) << shift; + shift += 7; + + if ((byte & 0x80) === 0) { + break; + } + } + + return result; + } + + readLocation() { + return { startOffset: this.readVarInt(), length: this.readVarInt() }; + } + + readOptionalLocationField() { + if (this.readByte() != 0) { + return this.readLocation(); + } else { + return null; + } + } + + readStringField() { + const type = this.readByte(); + + switch (type) { + case 1: { + const startOffset = this.readVarInt(); + const length = this.readVarInt(); + return new TextDecoder().decode(this.source.slice(startOffset, startOffset + length)); + } + case 2: + return this.readString(this.readVarInt()); + default: + throw new Error(`Unknown serialized string type: ${type}`); + } + } + + scanConstant(constantPoolOffset, constantIndex) { + const offset = constantPoolOffset + constantIndex * 8; + const startOffset = this.scanUint32(offset); + const length = this.scanUint32(offset + 4); + + if (startOffset & (1 << 31)) { + startOffset &= (1 << 31) - 1; + return new TextDecoder().decode(this.array.slice(startOffset, startOffset + length)); + } else { + return new TextDecoder().decode(this.source.slice(startOffset, startOffset + length)); + } + } +} + +/** + * A location in the source code. + * + * @typedef {{ startOffset: number, length: number }} Location + */ + +/** + * A comment in the source code. + * + * @typedef {{ type: number, location: Location }} Comment + */ + +/** + * A magic comment in the source code. + * + * @typedef {{ startLocation: Location, endLocation: Location }} MagicComment + */ + +/** + * An error in the source code. + * + * @typedef {{ message: string, location: Location }} ParseError + */ + +/** + * A warning in the source code. + * + * @typedef {{ message: string, location: Location }} ParseWarning + */ + +/** + * The result of parsing the source code. + * + * @typedef {{ value: ProgramNode, comments: Comment[], magicComments: MagicComment[], errors: ParseError[], warnings: ParseWarning[] }} ParseResult + */ + +/** + * The result of calling parse. + */ +export class ParseResult { + /** + * @type {nodes.ProgramNode} + */ + value; + + /** + * @type {Comment[]} + */ + comments; + + /** + * @type {MagicComment[]} + */ + magicComments; + + /** + * @type {ParseError[]} + */ + errors; + + /** + * @type {ParseWarning[]} + */ + warnings; + + /** + * @param {nodes.ProgramNode} value + * @param {Comment[]} comments + * @param {MagicComment[]} magicComments + * @param {ParseError[]} errors + * @param {ParseWarning[]} warnings + */ + constructor(value, comments, magicComments, errors, warnings) { + this.value = value; + this.comments = comments; + this.magicComments = magicComments; + this.errors = errors; + this.warnings = warnings; + } +} + +/** + * Accept two Uint8Arrays, one for the source and one for the serialized format. + * Return the AST corresponding to the serialized form. + * + * @param {Uint8Array} source + * @param {Uint8Array} array + * @returns {ParseResult} + * @throws {Error} + */ +export function deserialize(source, array) { + const buffer = new SerializationBuffer(source, array); + + if (buffer.readString(5) !== "PRISM") { + throw new Error("Invalid serialization"); + } + + if ((buffer.readByte() != MAJOR_VERSION) || (buffer.readByte() != MINOR_VERSION) || (buffer.readByte() != PATCH_VERSION)) { + throw new Error("Invalid serialization"); + } + + if (buffer.readByte() != 0) { + throw new Error("Invalid serialization (location fields must be included but are not)"); + } + + // Skip past the encoding, it means nothing to us in JavaScript. + buffer.readString(buffer.readVarInt()); + + const comments = Array.from({ length: buffer.readVarInt() }, () => ({ + type: buffer.readVarInt(), + location: buffer.readLocation() + })); + + const magicComments = Array.from({ length: buffer.readVarInt() }, () => ({ + startLocation: buffer.readLocation(), + endLocation: buffer.readLocation() + })); + + const errors = Array.from({ length: buffer.readVarInt() }, () => ({ + message: buffer.readString(buffer.readVarInt()), + location: buffer.readLocation() + })); + + const warnings = Array.from({ length: buffer.readVarInt() }, () => ({ + message: buffer.readString(buffer.readVarInt()), + location: buffer.readLocation() + })); + + const constantPoolOffset = buffer.readUint32(); + const constants = Array.from({ length: buffer.readVarInt() }, () => null); + + return new ParseResult(readRequiredNode(), comments, magicComments, errors, warnings); + + function readRequiredNode() { + const type = buffer.readByte(); + const location = buffer.readLocation(); + + switch (type) { + <%- nodes.each.with_index(1) do |node, index| -%> + case <%= index %>: + <%- if node.needs_serialized_length? -%> + buffer.readUint32(); + <%- end -%> + return new nodes.<%= node.name %>(<%= (node.fields.map { |field| + case field + when Prism::NodeField then "readRequiredNode()" + when Prism::OptionalNodeField then "readOptionalNode()" + when Prism::StringField then "buffer.readStringField()" + when Prism::NodeListField then "Array.from({ length: buffer.readVarInt() }, readRequiredNode)" + when Prism::ConstantField then "readRequiredConstant()" + when Prism::OptionalConstantField then "readOptionalConstant()" + when Prism::ConstantListField then "Array.from({ length: buffer.readVarInt() }, readRequiredConstant)" + when Prism::LocationField then "buffer.readLocation()" + when Prism::OptionalLocationField then "buffer.readOptionalLocationField()" + when Prism::UInt32Field, Prism::FlagsField then "buffer.readVarInt()" + else raise + end + } + ["location"]).join(", ") -%>); + <%- end -%> + default: + throw new Error(`Unknown node type: ${type}`); + } + } + + function readOptionalNode() { + if (buffer.readByte() != 0) { + buffer.index -= 1; + return readRequiredNode(); + } else { + return null; + } + } + + function scanConstant(constantIndex) { + if (constants[constantIndex] === null) { + constants[constantIndex] = buffer.scanConstant(constantPoolOffset, constantIndex); + } + + return constants[constantIndex]; + } + + function readRequiredConstant() { + return scanConstant(buffer.readVarInt() - 1); + } + + function readOptionalConstant() { + const index = buffer.readVarInt(); + if (index === 0) { + return null; + } else { + return scanConstant(index - 1); + } + } +} diff --git a/templates/javascript/src/nodes.js.erb b/templates/javascript/src/nodes.js.erb new file mode 100644 index 00000000000..bca1fe241ae --- /dev/null +++ b/templates/javascript/src/nodes.js.erb @@ -0,0 +1,109 @@ +<%- +def prop(field) + field.name == "arguments" ? "arguments_" : field.name.gsub(/_([a-z])/) { $1.upcase } +end + +def jstype(field) + case field + when Prism::NodeField then field.ruby_type + when Prism::OptionalNodeField then "#{field.ruby_type} | null" + when Prism::NodeListField then "Node[]" + when Prism::StringField then "string" + when Prism::ConstantField then "string" + when Prism::OptionalConstantField then "string | null" + when Prism::ConstantListField then "string[]" + when Prism::LocationField then "Location" + when Prism::OptionalLocationField then "Location | null" + when Prism::UInt32Field then "number" + when Prism::FlagsField then "number" + end +end +-%> +<%- flags.each do |flag| -%> + +const <%= flag.name %> = { +<%- flag.values.each_with_index do |value, index| -%> + <%= value.name %>: 1 << <%= index %>, +<%- end -%> +}; +<%- end -%> + +/** + * A location in the source code. + * + * @typedef {{ startOffset: number, length: number }} Location + */ + +/** + * A generic node in the tree. + * + * @typedef {(<%= nodes.map(&:name).join("|") %>)} Node + */ +<%- nodes.each do |node| -%> + +/** +<%= "#{node.comment.split("\n").map { |line| line.empty? ? " *" : " * #{line}" }.join("\n")}" %> + */ +export class <%= node.name -%> { + <%- node.fields.each do |field| -%> + /** + * @type <%= jstype(field) %> + */ + <%= field.is_a?(Prism::FlagsField) ? "#flags" : prop(field) %>; + + <%- end -%> + /** + * @type {Location} + */ + location; + + /** + * Construct a new <%= node.name %>. + * + <%- node.fields.each do |field| -%> + * @param {<%= jstype(field) %>} <%= prop(field) %> + <%- end -%> + * @param {Location} location + */ + constructor(<%= (node.fields.map { |field| prop(field) } + ["location"]).join(", ") %>) { + <%- node.fields.each do |field| -%> + <%- if field.is_a?(Prism::FlagsField) -%> + this.#flags = flags; + <%- else -%> + this.<%= prop(field) %> = <%= prop(field) %>; + <%- end -%> + <%- end -%> + this.location = location; + } + <%- if (flags_field = node.fields.find { |field| field.is_a?(Prism::FlagsField) }) -%> + <%- flag = flags.find { |flag| flag.name == flags_field.kind }.tap { |flag| raise "Expected to find #{field.kind}" unless flag } -%> + <%- flag.values.each do |value| -%> + + /** + * True if this node has the <%= value.name %> flag. + * + * @returns {boolean} + */ + is<%= value.camelcase %>() { + return (this.#flags & <%= flag.name %>.<%= value.name %>) !== 0; + } + <%- end -%> + <%- end -%> + + toJSON() { + return { + type: "<%= node.name %>", + <%- node.fields.each do |field| -%> + <%- if field.is_a?(Prism::FlagsField) -%> + flags: this.#flags, + <%- elsif field.name == "arguments" -%> + arguments: this.<%= prop(field) %>, + <%- else -%> + <%= prop(field) %>: this.<%= prop(field) %>, + <%- end -%> + <%- end -%> + location: this.location + }; + } +} +<%- end -%> diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index e985e72ec38..9528496c7df 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -150,11 +150,11 @@ static void pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { // serialize key location pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start)); - pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length)); + pm_buffer_append_u32(buffer, pm_sizet_to_u32(magic_comment->key_length)); // serialize value location pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start)); - pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length)); + pm_buffer_append_u32(buffer, pm_sizet_to_u32(magic_comment->value_length)); } static void diff --git a/templates/template.rb b/templates/template.rb index 79ad84c732e..4435333900c 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -361,6 +361,8 @@ def locals TEMPLATES = [ "ext/prism/api_node.c", "include/prism/ast.h", + "javascript/src/deserialize.js", + "javascript/src/nodes.js", "java/org/prism/Loader.java", "java/org/prism/Nodes.java", "java/org/prism/AbstractNodeVisitor.java",