diff --git a/.github/workflows/rust-bindings.yml b/.github/workflows/rust-bindings.yml new file mode 100644 index 00000000000..80fa75bce50 --- /dev/null +++ b/.github/workflows/rust-bindings.yml @@ -0,0 +1,101 @@ +--- +name: Rust Bindings + +on: + push: + paths: + - ".github/workflows/rust-bindings.yml" + - "include/" + - "src/" + - "rust/" + - "*akefile*" + branches: + - main + pull_request: + +env: + RUSTFLAGS: "-D warnings" + +jobs: + cargo-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: head + bundler-cache: true + - uses: actions/cache@v3 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-${{ hashFiles('Cargo.toml') }} + ${{ runner.os }}-cargo + - name: rake compile + run: bundle exec rake compile + - name: cargo test + working-directory: rust/yarp-sys + run: cargo test + + cargo-clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: head + bundler-cache: true + - uses: actions/cache@v3 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-${{ hashFiles('Cargo.toml') }} + ${{ runner.os }}-cargo + - name: rake compile + run: bundle exec rake compile + - name: cargo clippy + working-directory: rust/yarp-sys + run: cargo clippy --tests -- -W "clippy::pedantic" + + sanitizer-test: + name: Test with -Zsanitizer=${{ matrix.sanitizer }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + sanitizer: [address, leak] + steps: + - uses: actions/checkout@v3 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: head + bundler-cache: true + - name: rake compile + run: bundle exec rake compile + - uses: dtolnay/rust-toolchain@nightly + with: + target: "x86_64-unknown-linux-gnu" + components: "rust-src" + - name: Test with sanitizer + env: + RUSTFLAGS: -Zsanitizer=${{ matrix.sanitizer }} + RUSTDOCFLAGS: -Zsanitizer=${{ matrix.sanitizer }} + # only needed by asan + ASAN_OPTIONS: detect_stack_use_after_return=1 + # Asan's leak detection occasionally complains + # about some small leaks if backtraces are captured, + # so ensure they're not + RUST_BACKTRACE: 0 + working-directory: rust/yarp-sys + run: cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu diff --git a/README.md b/README.md index 884bd70297f..dfb1f29cdc4 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ The repository contains the infrastructure for both a shared library (librubypar │   ├── yarp Ruby library files │   └── yarp.rb main entrypoint for the Ruby library ├── rakelib various Rake tasks for the project +├── rust +│   └── yarp-sys FFI binding for Rust ├── src │   ├── enc various encoding files │   ├── util various utility files @@ -45,7 +47,7 @@ The repository contains the infrastructure for both a shared library (librubypar To compile the shared library, you will need: * A C99 compiler -* autotools (autoconf, automake, libtool) +* autotools autoconf, automake, libtool) * make * Ruby 3.3.0-preview1 or later @@ -84,3 +86,4 @@ See the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. We additio * [Ripper](docs/ripper.md) * [Serialization](docs/serialization.md) * [Testing](docs/testing.md) + diff --git a/rakelib/check_manifest.rake b/rakelib/check_manifest.rake index 94928c452ea..4db9327897f 100644 --- a/rakelib/check_manifest.rake +++ b/rakelib/check_manifest.rake @@ -16,6 +16,7 @@ task :check_manifest => [:templates] do java pkg rakelib + rust templates test tmp diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 00000000000..b83d22266ac --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/rust/yarp-sys/.gitignore b/rust/yarp-sys/.gitignore new file mode 100644 index 00000000000..7f88f54acde --- /dev/null +++ b/rust/yarp-sys/.gitignore @@ -0,0 +1,9 @@ +# will have compiled files and executables +debug/ +target/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb diff --git a/rust/yarp-sys/Cargo.lock b/rust/yarp-sys/Cargo.lock new file mode 100644 index 00000000000..2fa042d4da0 --- /dev/null +++ b/rust/yarp-sys/Cargo.lock @@ -0,0 +1,273 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8f9420f797f2d9e935edf629310eb938a0d839f984e25327f3c7eed22300c" +dependencies = [ + "memchr", +] + +[[package]] +name = "bindgen" +version = "0.66.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "log" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "prettyplease" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + +[[package]] +name = "syn" +version = "2.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "which" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +dependencies = [ + "either", + "libc", + "once_cell", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yarp-sys" +version = "0.1.0" +dependencies = [ + "bindgen", +] diff --git a/rust/yarp-sys/Cargo.toml b/rust/yarp-sys/Cargo.toml new file mode 100644 index 00000000000..91bfe0ec935 --- /dev/null +++ b/rust/yarp-sys/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "yarp-sys" +version = "0.1.0" +edition = "2021" +license-file = "../../LICENSE.md" +repository = "https://github.com/ruby/yarp" +description = "Rust bindings to Ruby's YARP parsing library" +links = "yarp" +authors = [ + "Steve Loveless ", + "Ian Ker-Seymer ", +] +keywords = ["ruby", "parser", "ffi", "bindings"] +categories = [ + "api-bindings", + "development-tools::ffi", + "external-ffi-bindings", + "parsing", +] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] +bindgen = "0.66" diff --git a/rust/yarp-sys/README.md b/rust/yarp-sys/README.md new file mode 100644 index 00000000000..b72f7dd404f --- /dev/null +++ b/rust/yarp-sys/README.md @@ -0,0 +1,40 @@ +# yarp-sys + +Rust bindings to [ruby/yarp](https://github.com/ruby/yarp)'s C API. + +## Examples + +Currently the best examples are found in the integration tests (in `tests/`). + +## Documentation + +Since this crate has not been released, docs are not yet online anywhere. You can generate them, +however, from this directory in this repo by running `cargo doc`, then opening +`target/doc/yarp_sys/index.html` in your browser. (You could, instead, combine those two steps by +doing `cargo doc --open`!) + +## Development + +### Dependencies + +In addition to the Ruby YARP depedencies, you shouldn't need anything else besides Rust. + +### Updating bindings + +`build.rs` (which gets called as part of running `cargo build`, `cargo test`, etc) is where we tell +`bindgen` which types, functions, etc. that we want it to generate for us. It's smart enough to know +to generate dependencies for items we specify in there (ex. `yp_parser_t` has fields of type +`yp_token_t`, but we don't need to tell `bindgen` about `yp_token_t`--it'll figure it out and +generate bindings for that type too). + +If you want to generate new bindings, update `build.rs` accordingly, then run `cargo doc` and check +the docs; that should tell you if `bindgen` generated all the things you need or not. + +### Testing + +Since almost all of the code is generated by the well-tested +[`bindgen`](https://github.com/rust-lang/rust-bindgen) crate, we only have some cursory integration +tests in `tests/`, really just validating types and functions got generated appropriately. (They +also give some hints about how to use the API from Rust!) To run the tests, run `cargo test`. + +Any new publicly exposed C API additions should get a test or two. diff --git a/rust/yarp-sys/build.rs b/rust/yarp-sys/build.rs new file mode 100644 index 00000000000..96e0da545d4 --- /dev/null +++ b/rust/yarp-sys/build.rs @@ -0,0 +1,146 @@ +use std::path::{Path, PathBuf}; + +fn main() { + let ruby_build_path = ruby_build_path(); + let ruby_include_path = ruby_include_path(); + + // Tell cargo/rustc that we want to link against `librubyparser.a`. + println!("cargo:rustc-link-lib=static=rubyparser"); + + // Add `[root]/build/` to the search paths, so it can find `librubyparser.a`. + println!( + "cargo:rustc-link-search=native={}", + ruby_build_path.to_str().unwrap() + ); + + // This is where the magic happens. + let bindings = generate_bindings(&ruby_include_path); + + // Write the bindings to file. + write_bindngs(&bindings); +} + +/// Gets the path to project files (`librubyparser*`) at `[root]/build/`. +/// +fn ruby_build_path() -> PathBuf { + cargo_manifest_path() + .join("../../build/") + .canonicalize() + .unwrap() +} + +/// Gets the path to the header files that `bindgen` needs for doing code generation. +/// +fn ruby_include_path() -> PathBuf { + cargo_manifest_path() + .join("../../include/") + .canonicalize() + .unwrap() +} + +fn cargo_manifest_path() -> PathBuf { + PathBuf::from(std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) +} + +/// Uses `bindgen` to generate bindings to the C API. Update this to allow new types/functions/etc +/// to be generated (it's allowlisted to only expose functions that'd make sense for public +/// consumption). +/// +/// This method only generates code in memory here--it doesn't write it to file. +/// +fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { + bindgen::Builder::default() + .derive_default(true) + .generate_block(true) + .generate_comments(true) + .header(ruby_include_path.join("yarp/defines.h").to_str().unwrap()) + .header(ruby_include_path.join("yarp.h").to_str().unwrap()) + .clang_arg(format!("-I{}", ruby_include_path.to_str().unwrap())) + .clang_arg("-fparse-all-comments") + .impl_debug(true) + .layout_tests(true) + .merge_extern_blocks(true) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .prepend_enum_name(false) + .size_t_is_usize(true) + .sort_semantically(true) + // Structs + .allowlist_type(r#"^yp_\w+_node_t"#) + .allowlist_type("yp_buffer_t") + .allowlist_type("yp_comment_t") + .allowlist_type("yp_diagnostic_t") + .allowlist_type("yp_encoding_changed_callback_t") + .allowlist_type("yp_encoding_decode_callback_t") + .allowlist_type("yp_memsize_t") + .allowlist_type("yp_list_t") + .allowlist_type("yp_node_t") + .allowlist_type("yp_node_type") + .allowlist_type("yp_parser_t") + .allowlist_type("yp_pack_size") + // TODO: Commenting this because I can't figure out how to get bindgen to generate the + // inner-unions nicely. Hand-rolling this in src/lib.rs for now. + // .allowlist_type("yp_string_t") + .blocklist_type(r#"^yp_string_t\S*"#) + .allowlist_type("yp_string_list_t") + .allowlist_type("yp_token_type_t") + // Enums + .rustified_non_exhaustive_enum("yp_comment_type_t") + .rustified_non_exhaustive_enum("yp_context_t") + .rustified_non_exhaustive_enum("yp_heredoc_indent_t") + .rustified_non_exhaustive_enum("yp_heredoc_quote_t") + .rustified_non_exhaustive_enum("yp_lex_mode_t") + .rustified_non_exhaustive_enum("yp_lex_state_t") + .rustified_non_exhaustive_enum("yp_node_type") + .rustified_non_exhaustive_enum("yp_pack_encoding") + .rustified_non_exhaustive_enum("yp_pack_endian") + .rustified_non_exhaustive_enum("yp_pack_length_type") + .rustified_non_exhaustive_enum("yp_pack_result") + .rustified_non_exhaustive_enum("yp_pack_signed") + .rustified_non_exhaustive_enum("yp_pack_size") + .rustified_non_exhaustive_enum("yp_pack_type") + .rustified_non_exhaustive_enum("yp_pack_variant") + .rustified_non_exhaustive_enum("yp_token_type") + .rustified_non_exhaustive_enum("yp_unescape_type_t") + // Functions + .allowlist_function("yp_buffer_init") + .allowlist_function("yp_buffer_free") + .allowlist_function("yp_node_destroy") + .allowlist_function("yp_list_empty_p") + .allowlist_function("yp_list_free") + .allowlist_function("yp_list_init") + .allowlist_function("yp_node_memsize") + .allowlist_function("yp_pack_parse") + .allowlist_function("yp_parse") + .allowlist_function("yp_parse_serialize") + .allowlist_function("yp_parser_free") + .allowlist_function("yp_parser_init") + .allowlist_function("yp_parser_register_encoding_changed_callback") + .allowlist_function("yp_parser_register_encoding_decode_callback") + .allowlist_function("yp_prettyprint") + .allowlist_function("yp_regexp_named_capture_group_names") + .allowlist_function("yp_serialize") + .allowlist_function("yp_size_to_native") + .allowlist_function("yp_string_free") + .allowlist_function("yp_string_length") + .allowlist_function("yp_string_source") + .allowlist_function("yp_string_list_init") + .allowlist_function("yp_string_list_free") + .allowlist_function("yp_token_type_to_str") + .allowlist_function("yp_unescape_calculate_difference") + .allowlist_function("yp_unescape_manipulate_string") + .allowlist_function("yp_version") + // Vars + .allowlist_var(r#"^yp_encoding\S+"#) + .generate() + .expect("Unable to generate yarp bindings") +} + +/// Write the bindings to the `$OUT_DIR/bindings.rs` file. We'll pull these into the actual library +/// in `src/lib.rs`. +fn write_bindngs(bindings: &bindgen::Bindings) { + let out_path = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()); + + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/rust/yarp-sys/src/lib.rs b/rust/yarp-sys/src/lib.rs new file mode 100644 index 00000000000..d0d0da82b59 --- /dev/null +++ b/rust/yarp-sys/src/lib.rs @@ -0,0 +1,71 @@ +//! # yarp-sys +//! +//! FFI-bindings for `yarp`. +//! +#![deny(unused_extern_crates)] +#![warn( + box_pointers, + clippy::all, + clippy::nursery, + clippy::pedantic, + future_incompatible, + missing_copy_implementations, + missing_docs, + nonstandard_style, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unreachable_pub, + unused_qualifications +)] + +// Allowing because we're not manually defining anything that would cause this, and +// the bindgen-generated `bindgen_test_layout_yp_parser()` triggers this. +#[allow(clippy::cognitive_complexity)] +// Allowing because we're not manually defining anything that would cause this, and +// the following bindgen-generated functions triggers this: +// - `bindgen_test_layout_yp_call_node()` +// - `bindgen_test_layout_yp_def_node()` +// - `bindgen_test_layout_yp_parser()` +#[allow(clippy::too_many_lines)] +#[allow(missing_copy_implementations)] +#[allow(non_upper_case_globals)] +#[allow(non_camel_case_types)] +#[allow(non_snake_case)] +#[allow(missing_docs)] +mod bindings { + use std::ffi::c_char; + + // In `build.rs`, we use `bindgen` to generate bindings based on C headers and `librubyparser`. Here + // is where we pull in those bindings and make them part of our library. + include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + + pub type yp_parser_t_lex_modes = yp_parser__bindgen_ty_1; + + // ╭──────────────────────────────────────────────────────────────────────────────╮ + // │ Hand-rolling `yp_string_t` things │ + // │ │ + // │ `bindgen` can generate these, but because of the nested `union`s, the │ + // │ types end up with weird names. I'm sure there's a way to fix that, but I │ + // │ haven't been able to. So I'm hand-rolling these for now. │ + // ╰──────────────────────────────────────────────────────────────────────────────╯ + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub struct yp_string_t { + pub type_: yp_string_t_type, + pub source: *const c_char, + pub length: usize, + } + + #[repr(u32)] + #[non_exhaustive] + #[derive(Clone, Copy, Debug)] + pub enum yp_string_t_type { + YP_STRING_SHARED, + YP_STRING_OWNED, + YP_STRING_CONSTANT, + YP_STRING_MAPPED, + } +} + +pub use self::bindings::*; diff --git a/rust/yarp-sys/tests/node_tests.rs b/rust/yarp-sys/tests/node_tests.rs new file mode 100644 index 00000000000..83ef5c81667 --- /dev/null +++ b/rust/yarp-sys/tests/node_tests.rs @@ -0,0 +1,38 @@ +use std::{ffi::CString, mem::MaybeUninit}; + +use yarp_sys::{ + yp_memsize_t, yp_node_destroy, yp_node_memsize, yp_node_type, yp_parse, yp_parser_free, + yp_parser_init, yp_parser_t, +}; + +#[test] +fn node_test() { + let mut parser = MaybeUninit::::uninit(); + let code = CString::new("class Foo; end").unwrap(); + let mut memsize = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init( + parser.as_mut_ptr(), + code.as_ptr(), + code.as_bytes().len(), + std::ptr::null(), + ); + + let parser = parser.assume_init_mut(); + let parsed_node = yp_parse(parser); + + assert_eq!( + (*parsed_node).type_, + yp_node_type::YP_NODE_PROGRAM_NODE as u16 + ); + + yp_node_memsize(parsed_node, memsize.as_mut_ptr()); + let memsize = memsize.assume_init(); + assert_eq!(memsize.memsize, 296); + assert_eq!(memsize.node_count, 4); + + yp_node_destroy(parser, parsed_node); + yp_parser_free(parser); + } +} diff --git a/rust/yarp-sys/tests/pack_tests.rs b/rust/yarp-sys/tests/pack_tests.rs new file mode 100644 index 00000000000..13504241b08 --- /dev/null +++ b/rust/yarp-sys/tests/pack_tests.rs @@ -0,0 +1,57 @@ +use std::{ffi::CString, mem::MaybeUninit}; + +use yarp_sys::{ + yp_pack_encoding, yp_pack_endian, yp_pack_length_type, yp_pack_parse, yp_pack_result, + yp_pack_signed, yp_pack_size, yp_pack_type, yp_pack_variant, yp_size_to_native, +}; + +#[test] +fn pack_parse_test() { + let variant_arg = yp_pack_variant::YP_PACK_VARIANT_PACK; + let first_format = CString::new("C").unwrap(); + let end_format = CString::new("").unwrap(); + let mut format = vec![first_format.as_ptr(), end_format.as_ptr()]; + + let mut type_out = MaybeUninit::::uninit(); + let mut signed_type_out = MaybeUninit::::uninit(); + let mut endian_out = MaybeUninit::::uninit(); + let mut size_out = MaybeUninit::::uninit(); + let mut length_type_out = MaybeUninit::::uninit(); + let mut length_out = 0_u64; + let mut encoding_out = MaybeUninit::::uninit(); + + unsafe { + let result = yp_pack_parse( + variant_arg, + format.as_mut_ptr(), + end_format.as_ptr(), + type_out.as_mut_ptr(), + signed_type_out.as_mut_ptr(), + endian_out.as_mut_ptr(), + size_out.as_mut_ptr(), + length_type_out.as_mut_ptr(), + &mut length_out, + encoding_out.as_mut_ptr(), + ); + + assert_eq!(result, yp_pack_result::YP_PACK_OK); + + let type_out = type_out.assume_init(); + let signed_type_out = signed_type_out.assume_init(); + let endian_out = endian_out.assume_init(); + let size_out = size_out.assume_init(); + let length_type_out = length_type_out.assume_init(); + let encoding_out = encoding_out.assume_init(); + + assert_eq!(type_out, yp_pack_type::YP_PACK_INTEGER); + assert_eq!(signed_type_out, yp_pack_signed::YP_PACK_UNSIGNED); + assert_eq!(endian_out, yp_pack_endian::YP_PACK_AGNOSTIC_ENDIAN); + assert_eq!(size_out, yp_pack_size::YP_PACK_SIZE_8); + assert_eq!(length_type_out, yp_pack_length_type::YP_PACK_LENGTH_FIXED); + assert_eq!(length_out, 1); + assert_eq!(encoding_out, yp_pack_encoding::YP_PACK_ENCODING_ASCII_8BIT); + + let native_size = yp_size_to_native(size_out); + assert_eq!(native_size, 1); + } +} diff --git a/rust/yarp-sys/tests/parser_tests.rs b/rust/yarp-sys/tests/parser_tests.rs new file mode 100644 index 00000000000..889967026f1 --- /dev/null +++ b/rust/yarp-sys/tests/parser_tests.rs @@ -0,0 +1,234 @@ +use std::{ + ffi::{c_char, CStr, CString}, + mem::MaybeUninit, + path::Path, + slice, str, + sync::OnceLock, +}; + +use yarp_sys::{ + yp_buffer_free, yp_buffer_init, yp_buffer_t, yp_comment_t, yp_comment_type_t, yp_diagnostic_t, + yp_encoding_ascii, yp_encoding_t, yp_node_destroy, yp_parse, yp_parser_free, yp_parser_init, + yp_parser_register_encoding_changed_callback, yp_parser_register_encoding_decode_callback, + yp_parser_t, yp_prettyprint, +}; + +fn ruby_file_contents() -> (CString, usize) { + let rust_path = Path::new(env!("CARGO_MANIFEST_DIR")); + let ruby_file_path = rust_path.join("../../lib/yarp.rb").canonicalize().unwrap(); + let file_contents = std::fs::read_to_string(ruby_file_path).unwrap(); + let len = file_contents.len(); + + (CString::new(file_contents).unwrap(), len) +} + +#[test] +fn init_test() { + let (ruby_file_contents, len) = ruby_file_contents(); + let source = ruby_file_contents.as_ptr(); + let mut parser = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init(parser.as_mut_ptr(), source, len, std::ptr::null()); + let parser = parser.assume_init_mut(); + + yp_parser_free(parser); + } +} + +#[test] +fn parse_and_print_test() { + let (ruby_file_contents, len) = ruby_file_contents(); + let source = ruby_file_contents.as_ptr(); + let mut parser = MaybeUninit::::uninit(); + let mut buffer = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init(parser.as_mut_ptr(), source, len, std::ptr::null()); + let parser = parser.assume_init_mut(); + let node = yp_parse(parser); + + assert!(yp_buffer_init(buffer.as_mut_ptr()), "Failed to init buffer"); + + let buffer = buffer.assume_init_mut(); + yp_prettyprint(parser, node, buffer); + + let slice = slice::from_raw_parts(buffer.value.cast::(), buffer.length); + let string = str::from_utf8(slice).unwrap(); + assert!(string.starts_with("ProgramNode")); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + yp_buffer_free(buffer); + } +} + +#[test] +fn comments_test() { + let source = CString::new("# Meow!").unwrap(); + let mut parser = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init( + parser.as_mut_ptr(), + source.as_ptr(), + source.as_bytes().len(), + std::ptr::null(), + ); + let parser = parser.assume_init_mut(); + let node = yp_parse(parser); + + let comment_list = &parser.comment_list; + let comment = comment_list.head as *const yp_comment_t; + assert_eq!((*comment).type_, yp_comment_type_t::YP_COMMENT_INLINE); + + let location = { + let start = (*comment).start.offset_from(parser.start); + let end = (*comment).end.offset_from(parser.start); + start..end + }; + assert_eq!(location, 0..7); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + } +} + +#[test] +fn diagnostics_test() { + let source = CString::new("class Foo;").unwrap(); + let mut parser = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init( + parser.as_mut_ptr(), + source.as_ptr(), + source.as_bytes().len(), + std::ptr::null(), + ); + let parser = parser.assume_init_mut(); + let node = yp_parse(parser); + + let error_list = &parser.error_list; + // TODO: error_list.head used to get set, but after rebasing `87e02c0b`, this behavior changed. (This pointer used to not be null). + assert!(!error_list.head.is_null()); + + let error = error_list.head as *const yp_diagnostic_t; + let message = CStr::from_ptr((*error).message); + assert_eq!( + message.to_string_lossy(), + "Expected to be able to parse an expression." + ); + + let location = { + let start = (*error).start.offset_from(parser.start); + let end = (*error).end.offset_from(parser.start); + start..end + }; + assert_eq!(location, 10..10); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + } +} + +#[test] +fn encoding_change_test() { + unsafe extern "C" fn callback(_parser: *mut yp_parser_t) { + let _ = THING.set(42).ok(); + } + + static THING: OnceLock = OnceLock::new(); + + let source = CString::new( + "# encoding: ascii\nclass Foo; end + ", + ) + .unwrap(); + let mut parser = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init( + parser.as_mut_ptr(), + source.as_ptr(), + source.as_bytes().len(), + std::ptr::null(), + ); + let parser = parser.assume_init_mut(); + + yp_parser_register_encoding_changed_callback(parser, Some(callback)); + + let node = yp_parse(parser); + // TODO: This used to get set (assumingly from encountering the 'ascii' encoding directive + // in `source`), but after rebasing `87e02c0b`, this behavior changed. + assert!(parser.encoding_changed); + + // This value should have been mutated inside the callback when the encoding changed. + assert_eq!(*THING.get().unwrap(), 42); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + } +} + +#[test] +fn encoding_decode_test() { + unsafe extern "C" fn callback( + _parser: *mut yp_parser_t, + name: *const c_char, + width: usize, + ) -> *mut yp_encoding_t { + let c_name = CStr::from_ptr(name); + + let _ = THING + .set(Output { + name: c_name.to_string_lossy().to_string(), + width, + }) + .ok(); + + let encoding = &mut yp_encoding_ascii; + let encoding_ptr: *mut yp_encoding_t = encoding; + + encoding_ptr + } + + struct Output { + name: String, + width: usize, + } + + static THING: OnceLock = OnceLock::new(); + + let source = CString::new("# encoding: meow").unwrap(); + let mut parser = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init( + parser.as_mut_ptr(), + source.as_ptr(), + source.as_bytes().len(), + std::ptr::null(), + ); + let parser = parser.assume_init_mut(); + + yp_parser_register_encoding_decode_callback(parser, Some(callback)); + + let node = yp_parse(parser); + // TODO: parser.encoding.name used to get set to "ascii" (via the callback), + // but stopped after I rebased on `87e02c0b`. + assert!(!parser.encoding.name.is_null()); + assert!(!yp_encoding_ascii.name.is_null()); + assert_eq!( + CStr::from_ptr(parser.encoding.name).to_string_lossy(), + CStr::from_ptr(yp_encoding_ascii.name).to_string_lossy() + ); + + let output = THING.get().unwrap(); + assert_eq!(&output.name, "meow"); + assert_eq!(output.width, 4); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + } +} diff --git a/rust/yarp-sys/tests/serialize_tests.rs b/rust/yarp-sys/tests/serialize_tests.rs new file mode 100644 index 00000000000..5949f21866a --- /dev/null +++ b/rust/yarp-sys/tests/serialize_tests.rs @@ -0,0 +1,106 @@ +use std::{ + ffi::{c_char, CString}, + mem::MaybeUninit, + path::Path, + slice, str, +}; + +use yarp_sys::{ + yp_buffer_free, yp_buffer_init, yp_buffer_t, yp_node_destroy, yp_parse, yp_parse_serialize, + yp_parser_free, yp_parser_init, yp_parser_t, yp_serialize, +}; + +fn ruby_file_contents() -> (CString, usize) { + let rust_path = Path::new(env!("CARGO_MANIFEST_DIR")); + let ruby_file_path = rust_path.join("../../lib/yarp.rb").canonicalize().unwrap(); + let file_contents = std::fs::read_to_string(ruby_file_path).unwrap(); + let len = file_contents.len(); + + (CString::new(file_contents).unwrap(), len) +} + +#[test] +fn serialize_test() { + let (ruby_file_contents, len) = ruby_file_contents(); + let source = ruby_file_contents.as_ptr(); + let mut parser = MaybeUninit::::uninit(); + let mut buffer = MaybeUninit::::uninit(); + + unsafe { + yp_parser_init(parser.as_mut_ptr(), source, len, std::ptr::null()); + let parser = parser.assume_init_mut(); + let node = yp_parse(parser); + + assert!(yp_buffer_init(buffer.as_mut_ptr()), "Failed to init buffer"); + + let buffer = buffer.assume_init_mut(); + yp_serialize(parser, node, buffer); + + let serialized = std::slice::from_raw_parts(buffer.value.cast::(), buffer.length); + + assert_eq!(&serialized[0..4], b"YARP"); + assert_eq!(serialized[4..5][0], 0); // YP_VERSION_MAJOR + assert_eq!(serialized[5..6][0], 6); // YP_VERSION_MINOR + assert_eq!(serialized[6..7][0], 0); // YP_VERSION_PATCH + + yp_buffer_free(buffer); + yp_node_destroy(parser, node); + yp_parser_free(parser); + } +} + +#[test] +fn parse_serialize_test() { + let (ruby_file_contents, len) = ruby_file_contents(); + let source = ruby_file_contents.as_ptr(); + let mut parser = MaybeUninit::::uninit(); + let mut serialize_buffer = MaybeUninit::::uninit(); + let mut parse_serialize_buffer = MaybeUninit::::uninit(); + + let serialized = unsafe { + yp_parser_init(parser.as_mut_ptr(), source, len, std::ptr::null()); + let parser = parser.assume_init_mut(); + let node = yp_parse(parser); + + assert!( + yp_buffer_init(serialize_buffer.as_mut_ptr()), + "Failed to init buffer" + ); + + let serialize_buffer = serialize_buffer.assume_init_mut(); + yp_serialize(parser, node, serialize_buffer); + + yp_node_destroy(parser, node); + yp_parser_free(parser); + + // Can't use String -> CString here because `value` contains nul bytes. + slice::from_raw_parts(serialize_buffer.value.cast::(), serialize_buffer.length) + }; + + unsafe { + assert!( + yp_buffer_init(parse_serialize_buffer.as_mut_ptr()), + "Failed to init buffer" + ); + + let parse_serialize_buffer = parse_serialize_buffer.assume_init_mut(); + let metadata = std::ptr::null(); + + yp_parse_serialize( + serialized.as_ptr().cast::(), + serialized.len(), + parse_serialize_buffer, + metadata, + ); + + let slice = slice::from_raw_parts( + parse_serialize_buffer.value.cast::(), + parse_serialize_buffer.length, + ); + let string = str::from_utf8(slice).unwrap(); + assert!(string.starts_with("YARP")); + + yp_buffer_free(serialize_buffer.as_mut_ptr()); + yp_buffer_free(parse_serialize_buffer); + } +} diff --git a/rust/yarp-sys/tests/utils_tests.rs b/rust/yarp-sys/tests/utils_tests.rs new file mode 100644 index 00000000000..6d0fa144f75 --- /dev/null +++ b/rust/yarp-sys/tests/utils_tests.rs @@ -0,0 +1,138 @@ +use std::{ + ffi::{CStr, CString}, + mem::MaybeUninit, +}; + +#[test] +fn version_test() { + use yarp_sys::yp_version; + + let cstring = unsafe { + let version = yp_version(); + CStr::from_ptr(version) + }; + + assert_eq!(&cstring.to_string_lossy(), "0.6.0"); +} + +#[test] +fn list_test() { + use yarp_sys::{yp_list_empty_p, yp_list_free, yp_list_t}; + + let mut list = MaybeUninit::::zeroed(); + + unsafe { + let list = list.assume_init_mut(); + + assert!(yp_list_empty_p(list)); + + yp_list_free(list); + } +} + +mod string { + use std::ffi::c_char; + + use yarp_sys::{ + yp_string_free, yp_string_length, yp_string_source, yp_string_t, yp_string_t_type, + }; + + use super::*; + + struct S { + c_string: CString, + yp_string: yp_string_t, + } + + impl S { + fn start_ptr(&self) -> *const c_char { + self.c_string.as_ptr() + } + } + + fn make_string(string_type: yp_string_t_type) -> S { + let c_string = CString::new("0123456789012345").unwrap(); + + let yp_string = yp_string_t { + type_: string_type, + source: c_string.as_ptr(), + length: c_string.as_bytes().len(), + }; + + S { + c_string, + yp_string, + } + } + + #[test] + fn shared_string_test() { + let mut s = make_string(yp_string_t_type::YP_STRING_SHARED); + + unsafe { + let len = yp_string_length(&s.yp_string); + assert_eq!(len, 16); + + let result_start = yp_string_source(&s.yp_string); + assert_eq!(s.start_ptr(), result_start); + + yp_string_free(&mut s.yp_string); + } + } + + #[test] + fn owned_string_test() { + let s = make_string(yp_string_t_type::YP_STRING_OWNED); + + unsafe { + let result_len = yp_string_length(&s.yp_string); + assert_eq!(result_len, 16); + + let result_start = yp_string_source(&s.yp_string); + assert_eq!(s.yp_string.source, result_start); + + // Don't drop the yp_string--we don't own it anymore! + } + } + + #[test] + fn constant_string_test() { + let mut s = make_string(yp_string_t_type::YP_STRING_CONSTANT); + + unsafe { + let result_len = yp_string_length(&s.yp_string); + assert_eq!(result_len, 16); + + let result_start = yp_string_source(&s.yp_string); + assert_eq!(s.yp_string.source, result_start); + + yp_string_free(&mut s.yp_string); + } + } + + #[test] + fn mapped_string_test() { + let s = make_string(yp_string_t_type::YP_STRING_MAPPED); + + unsafe { + let result_len = yp_string_length(&s.yp_string); + assert_eq!(result_len, 16); + + let result_start = yp_string_source(&s.yp_string); + assert_eq!(s.yp_string.source, result_start); + } + } +} + +#[test] +fn string_list_test() { + use yarp_sys::{yp_string_list_free, yp_string_list_init, yp_string_list_t}; + + let mut list = MaybeUninit::::uninit(); + + unsafe { + yp_string_list_init(list.as_mut_ptr()); + let list = list.assume_init_mut(); + yp_string_list_free(list); + } +}