diff --git a/Cargo.lock b/Cargo.lock index 405710cb8b9..c5bfefe8d13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1405,6 +1405,7 @@ dependencies = [ "tinystr 0.7.0", "unicode-bidi", "writeable", + "zerovec", ] [[package]] @@ -1929,13 +1930,11 @@ dependencies = [ "icu_segmenter", "icu_timezone", "lazy_static", - "litemap", "log", "reqwest", "serde", "serde_json", "simple_logger", - "tinystr 0.7.0", "tokio", "writeable", "zerovec", diff --git a/ffi/capi_cdylib/Cargo.toml b/ffi/capi_cdylib/Cargo.toml index 9e1a0aee36b..43982d75aed 100644 --- a/ffi/capi_cdylib/Cargo.toml +++ b/ffi/capi_cdylib/Cargo.toml @@ -41,6 +41,7 @@ icu_provider = { version = "1.0.0", path = "../../provider/core", default-featur default = ["icu_capi/default"] any_provider = ["icu_capi/any_provider"] buffer_provider = ["icu_capi/buffer_provider"] +baked_provider = ["icu_capi/baked_provider"] provider_fs = ["icu_capi/provider_fs"] # Indirectly implies buffer_provider provider_test = ["icu_capi/provider_test"] logging = ["icu_capi/logging"] diff --git a/ffi/capi_staticlib/Cargo.toml b/ffi/capi_staticlib/Cargo.toml index e346753c145..2782db11d0b 100644 --- a/ffi/capi_staticlib/Cargo.toml +++ b/ffi/capi_staticlib/Cargo.toml @@ -41,6 +41,7 @@ icu_provider = { version = "1.0.0", path = "../../provider/core", default-featur default = ["icu_capi/default"] any_provider = ["icu_capi/any_provider"] buffer_provider = ["icu_capi/buffer_provider"] +baked_provider = ["icu_capi/baked_provider"] provider_fs = ["icu_capi/provider_fs"] # Indirectly implies buffer_provider provider_test = ["icu_capi/provider_test"] logging = ["icu_capi/logging"] diff --git a/ffi/diplomat/Cargo.toml b/ffi/diplomat/Cargo.toml index ebe6b98dc49..a5d4fe0660c 100644 --- a/ffi/diplomat/Cargo.toml +++ b/ffi/diplomat/Cargo.toml @@ -32,8 +32,8 @@ all-features = true skip_optional_dependencies = true # Bench feature gets tested separately and is only relevant for CI. # logging enables a feature of a dependency that has no externally visible API changes -# serde enables dependency features but buffer_provider affects the actual code -denylist = ["bench", "logging"] +# baked_provider requires datagen. +denylist = ["bench", "logging", "baked_provider"] # Please keep the features list in sync with the icu_capi_staticlib/icu_capi_cdylib crates [features] @@ -61,6 +61,7 @@ provider_test = ["dep:icu_testdata"] logging = ["icu_provider/log_error_context", "dep:log"] # Use the env_logger functionality to log based on environment variables simple_logger = ["dep:simple_logger"] +baked_provider = ["any_provider", "dep:zerovec"] # meta feature for things we enable by default in C and C++ cpp_default = ["provider_test", "logging", "simple_logger"] @@ -94,6 +95,8 @@ serde = { version = "1.0", default-features = false, optional = true } icu_testdata = { version = "1.0.0", path = "../../provider/testdata", optional = true, features = ["icu_segmenter"] } +zerovec = { version = "*", path = "../../utils/zerovec", optional = true} + # Run `cargo make diplomat-install` to get the right diplomat binary installed # The version here can either be a `version = ".."` spec or `git = "https://github.com/rust-diplomat/diplomat", rev = ".."` # Since this crate is published, Diplomat must be published preceding a new ICU4X release but may use git versions in between diff --git a/ffi/diplomat/c/examples/fixeddecimal_tiny/.gitignore b/ffi/diplomat/c/examples/fixeddecimal_tiny/.gitignore index 36fd6b8da6c..2ddcea818e6 100644 --- a/ffi/diplomat/c/examples/fixeddecimal_tiny/.gitignore +++ b/ffi/diplomat/c/examples/fixeddecimal_tiny/.gitignore @@ -5,5 +5,4 @@ optim* *.elf *.o a.out.dSYM -decimal-bn-en.postcard -decimal_bn_en.h +baked diff --git a/ffi/diplomat/c/examples/fixeddecimal_tiny/Makefile b/ffi/diplomat/c/examples/fixeddecimal_tiny/Makefile index 8fa4bbdefd0..2b5b23ed183 100644 --- a/ffi/diplomat/c/examples/fixeddecimal_tiny/Makefile +++ b/ffi/diplomat/c/examples/fixeddecimal_tiny/Makefile @@ -17,51 +17,49 @@ GCC := gcc CLANG := clang-14 LLD := lld-14 +ROOT_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) -../../../../../target/debug/libicu_capi_staticlib.a: $(ALL_RUST) - cargo build -p icu_capi_staticlib --no-default-features --features buffer_provider +../../../../../target/debug/libicu_capi_staticlib.a: $(ALL_RUST) baked/mod.rs + ICU4X_FFI_BAKED_ROOT=$(ROOT_DIR)/baked/mod.rs cargo build -p icu_capi_staticlib --no-default-features --features baked_provider -icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a: $(ALL_RUST) +icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a: $(ALL_RUST) baked/mod.rs cd icu_capi_staticlib_tiny && \ - RUSTFLAGS="-Clinker-plugin-lto -Clinker=$(CLANG) -Ccodegen-units=1 -Clink-arg=-flto -Cpanic=abort" cargo +${ICU4X_NIGHTLY_TOOLCHAIN} panic-abort-build --target x86_64-unknown-linux-gnu + ICU4X_FFI_BAKED_ROOT=$(ROOT_DIR)/baked/mod.rs RUSTFLAGS="-Clinker-plugin-lto -Clinker=$(CLANG) -Ccodegen-units=1 -Clink-arg=-flto -Cpanic=abort" cargo +${ICU4X_NIGHTLY_TOOLCHAIN} panic-abort-build --target x86_64-unknown-linux-gnu -icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/release-opt-size/libicu_capi_staticlib_tiny.a: $(ALL_RUST) +icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/release-opt-size/libicu_capi_staticlib_tiny.a: $(ALL_RUST) baked/mod.rs cd icu_capi_staticlib_tiny && \ - RUSTFLAGS="-Clinker-plugin-lto -Clinker=$(CLANG) -Ccodegen-units=1 -Clink-arg=-flto -Cpanic=abort" cargo +${ICU4X_NIGHTLY_TOOLCHAIN} panic-abort-build --target x86_64-unknown-linux-gnu --profile=release-opt-size + ICU4X_FFI_BAKED_ROOT=$(ROOT_DIR)/baked/mod.rs RUSTFLAGS="-Clinker-plugin-lto -Clinker=$(CLANG) -Ccodegen-units=1 -Clink-arg=-flto -Cpanic=abort" cargo +${ICU4X_NIGHTLY_TOOLCHAIN} panic-abort-build --target x86_64-unknown-linux-gnu --profile=release-opt-size -decimal-bn-en.postcard: - cargo run -p icu_datagen --features bin -- --locales en bn --keys "decimal/symbols@1" --cldr-root ../../../../../provider/testdata/data/cldr/ --format blob --out decimal-bn-en.postcard - -decimal_bn_en.h: decimal-bn-en.postcard - xxd -i -C decimal-bn-en.postcard > decimal_bn_en.h +baked/mod.rs: + cargo run -p icu_datagen --features bin,icu_segmenter -- --locales en bn --all-keys --cldr-tag 42.0.0 --icuexport-tag release-72-1 --format mod --out baked --use-separate-crates # Naive target: no optimizations, full std -optim0.elf: ../../../../../target/debug/libicu_capi_staticlib.a $(ALL_HEADERS) test.c decimal_bn_en.h +optim0.elf: ../../../../../target/debug/libicu_capi_staticlib.a $(ALL_HEADERS) test.c $(GCC) test.c ../../../../../target/debug/libicu_capi_staticlib.a -ldl -lpthread -lm -g -o optim0.elf # optim.elf: gcc with maximum link-time code stripping (gc-sections and strip-all) -optim1.elf: ../../../../../target/debug/libicu_capi_staticlib.a $(ALL_HEADERS) test.c decimal_bn_en.h +optim1.elf: ../../../../../target/debug/libicu_capi_staticlib.a $(ALL_HEADERS) test.c $(GCC) -fdata-sections -ffunction-sections test.c ../../../../../target/debug/libicu_capi_staticlib.a -ldl -lpthread -lm -g -o optim1.elf -Wl,--gc-sections -Wl,--strip-all # optim2.elf: clang single-step with gc-sections -optim2.elf: icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a $(ALL_HEADERS) test.c decimal_bn_en.h +optim2.elf: icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a $(ALL_HEADERS) test.c $(CLANG) -flto -fdata-sections -ffunction-sections test.c icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a -g -o optim2.elf -Wl,--gc-sections -optim3.o: $(ALL_HEADERS) test.c decimal_bn_en.h +optim3.o: $(ALL_HEADERS) test.c $(CLANG) -c -flto=thin -fdata-sections -ffunction-sections --target=x86_64-unknown-linux-gnu test.c -g -o optim3.o # optim3.elf: clang two-step with lld, debug mode optim3.elf: optim3.o icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a $(CLANG) -flto=thin -fuse-ld=$(LLD) -L . -o optim3.elf optim3.o icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/debug/libicu_capi_staticlib_tiny.a -Wl,--gc-sections -optim4.o: $(ALL_HEADERS) test.c decimal_bn_en.h +optim4.o: $(ALL_HEADERS) test.c $(CLANG) -c -flto=thin -fdata-sections -ffunction-sections --target=x86_64-unknown-linux-gnu test.c -g -o optim4.o # optim4.elf: clang two-step with lld, release mode with debug symbols optim4.elf: optim4.o icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/release-opt-size/libicu_capi_staticlib_tiny.a $(CLANG) -flto=thin -fuse-ld=$(LLD) -L . -o optim4.elf optim4.o icu_capi_staticlib_tiny/target/x86_64-unknown-linux-gnu/release-opt-size/libicu_capi_staticlib_tiny.a -Wl,--gc-sections -optim5.o: $(ALL_HEADERS) test.c decimal_bn_en.h +optim5.o: $(ALL_HEADERS) test.c $(CLANG) -c -flto=thin -fdata-sections -ffunction-sections --target=x86_64-unknown-linux-gnu test.c -o optim5.o # optim5.elf: clang two-step with lld, release mode stripped of debug symbols diff --git a/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.lock b/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.lock index cfb43cf00c1..14c45b46fb8 100644 --- a/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.lock +++ b/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.lock @@ -8,19 +8,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "cobs" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" - -[[package]] -name = "deduplicating_array" -version = "0.1.3" -dependencies = [ - "serde", -] - [[package]] name = "diplomat" version = "0.4.2" @@ -94,7 +81,6 @@ dependencies = [ "displaydoc", "icu_locid", "icu_provider", - "serde", "tinystr", "writeable", "zerovec", @@ -120,15 +106,13 @@ dependencies = [ "icu_properties", "icu_provider", "icu_provider_adapters", - "icu_provider_blob", "icu_segmenter", - "icu_testdata", "icu_timezone", "log", - "serde", "tinystr", "unicode-bidi", "writeable", + "zerovec", ] [[package]] @@ -141,7 +125,7 @@ dependencies = [ [[package]] name = "icu_collator" -version = "1.0.0" +version = "1.0.1" dependencies = [ "displaydoc", "icu_collections", @@ -149,7 +133,6 @@ dependencies = [ "icu_normalizer", "icu_properties", "icu_provider", - "serde", "smallvec", "utf16_iter", "utf8_iter", @@ -162,7 +145,6 @@ name = "icu_collections" version = "1.0.0" dependencies = [ "displaydoc", - "serde", "yoke", "zerofrom", "zerovec", @@ -182,7 +164,6 @@ dependencies = [ "icu_provider", "icu_timezone", "litemap", - "serde", "smallvec", "tinystr", "writeable", @@ -197,7 +178,6 @@ dependencies = [ "fixed_decimal", "icu_locid", "icu_provider", - "serde", "writeable", ] @@ -205,12 +185,10 @@ dependencies = [ name = "icu_list" version = "1.0.0" dependencies = [ - "deduplicating_array", "displaydoc", "icu_locid", "icu_provider", "regex-automata", - "serde", "writeable", "zerovec", ] @@ -221,7 +199,6 @@ version = "1.0.0" dependencies = [ "displaydoc", "litemap", - "serde", "tinystr", "writeable", "zerovec", @@ -234,7 +211,6 @@ dependencies = [ "displaydoc", "icu_locid", "icu_provider", - "serde", "tinystr", "zerovec", ] @@ -247,7 +223,6 @@ dependencies = [ "icu_collections", "icu_properties", "icu_provider", - "serde", "smallvec", "utf16_iter", "utf8_iter", @@ -264,7 +239,6 @@ dependencies = [ "fixed_decimal", "icu_locid", "icu_provider", - "serde", "zerovec", ] @@ -275,7 +249,6 @@ dependencies = [ "displaydoc", "icu_collections", "icu_provider", - "serde", "unicode-bidi", "zerovec", ] @@ -287,8 +260,6 @@ dependencies = [ "displaydoc", "icu_locid", "icu_provider_macros", - "postcard", - "serde", "stable_deref_trait", "writeable", "yoke", @@ -302,24 +273,11 @@ version = "1.0.0" dependencies = [ "icu_locid", "icu_provider", - "serde", "tinystr", "yoke", "zerovec", ] -[[package]] -name = "icu_provider_blob" -version = "1.0.0" -dependencies = [ - "icu_provider", - "postcard", - "serde", - "writeable", - "yoke", - "zerovec", -] - [[package]] name = "icu_provider_macros" version = "1.0.0" @@ -337,38 +295,10 @@ dependencies = [ "icu_collections", "icu_locid", "icu_provider", - "serde", - "serde_json", "utf8_iter", "zerovec", ] -[[package]] -name = "icu_testdata" -version = "1.0.0" -dependencies = [ - "icu_calendar", - "icu_collator", - "icu_collections", - "icu_datetime", - "icu_decimal", - "icu_list", - "icu_locid", - "icu_locid_transform", - "icu_normalizer", - "icu_plurals", - "icu_properties", - "icu_provider", - "icu_provider_adapters", - "icu_provider_blob", - "icu_segmenter", - "icu_timezone", - "lazy_static", - "litemap", - "tinystr", - "zerovec", -] - [[package]] name = "icu_timezone" version = "1.0.0" @@ -377,25 +307,15 @@ dependencies = [ "icu_calendar", "icu_locid", "icu_provider", - "serde", "tinystr", "zerovec", ] -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -dependencies = [ - "spin", -] [[package]] name = "libc" @@ -406,9 +326,6 @@ checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" [[package]] name = "litemap" version = "0.6.0" -dependencies = [ - "serde", -] [[package]] name = "log" @@ -425,16 +342,6 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" -[[package]] -name = "postcard" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c2b180dc0bade59f03fd005cb967d3f1e5f69b13922dad0cd6e047cb8af2363" -dependencies = [ - "cobs", - "serde", -] - [[package]] name = "proc-macro2" version = "1.0.32" @@ -488,31 +395,11 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e466864e431129c7e0d3476b92f20458e5879919a0596c6472738d9fa2d342f8" -dependencies = [ - "itoa", - "ryu", - "serde", -] - [[package]] name = "smallvec" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" -dependencies = [ - "serde", -] - -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "stable_deref_trait" @@ -564,7 +451,6 @@ name = "tinystr" version = "0.7.0" dependencies = [ "displaydoc", - "serde", "zerovec", ] @@ -649,7 +535,6 @@ dependencies = [ name = "zerovec" version = "0.9.1" dependencies = [ - "serde", "yoke", "zerofrom", "zerovec-derive", diff --git a/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.toml b/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.toml index 33347a3fbe3..4231cc62b1a 100644 --- a/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.toml +++ b/ffi/diplomat/c/examples/fixeddecimal_tiny/icu_capi_staticlib_tiny/Cargo.toml @@ -12,7 +12,7 @@ publish = false crate-type = ["staticlib"] [dependencies] -icu_capi = { path = "../../../..", default-features = false, features = ["buffer_provider"] } +icu_capi = { path = "../../../..", default-features = false, features = ["baked_provider"] } dlmalloc = { version = "0.2", features = ["global"] } [profile.release] diff --git a/ffi/diplomat/c/examples/fixeddecimal_tiny/test.c b/ffi/diplomat/c/examples/fixeddecimal_tiny/test.c index 9a1881f6272..7e597f0d630 100644 --- a/ffi/diplomat/c/examples/fixeddecimal_tiny/test.c +++ b/ffi/diplomat/c/examples/fixeddecimal_tiny/test.c @@ -3,18 +3,12 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #include "../../include/ICU4XFixedDecimalFormatter.h" -#include "decimal_bn_en.h" #include #include int main() { ICU4XLocale* locale = ICU4XLocale_create_bn(); - diplomat_result_box_ICU4XDataProvider_ICU4XError provider_result = ICU4XDataProvider_create_from_byte_slice(DECIMAL_BN_EN_POSTCARD, DECIMAL_BN_EN_POSTCARD_LEN); - if (!provider_result.is_ok) { - printf("Failed to create ICU4XDataProvider\n"); - return 1; - } - ICU4XDataProvider* provider = provider_result.ok; + ICU4XDataProvider* provider = ICU4XDataProvider_create_baked(); ICU4XFixedDecimal* decimal = ICU4XFixedDecimal_create_from_u64(1000007); diplomat_result_box_ICU4XFixedDecimalFormatter_ICU4XError fdf_result = diff --git a/ffi/diplomat/c/include/ICU4XDataProvider.h b/ffi/diplomat/c/include/ICU4XDataProvider.h index fd8b35b7877..39d43b2f8e3 100644 --- a/ffi/diplomat/c/include/ICU4XDataProvider.h +++ b/ffi/diplomat/c/include/ICU4XDataProvider.h @@ -30,6 +30,8 @@ diplomat_result_box_ICU4XDataProvider_ICU4XError ICU4XDataProvider_create_from_b ICU4XDataProvider* ICU4XDataProvider_create_empty(); +ICU4XDataProvider* ICU4XDataProvider_create_baked(); + diplomat_result_void_ICU4XError ICU4XDataProvider_fork_by_key(ICU4XDataProvider* self, ICU4XDataProvider* other); diplomat_result_void_ICU4XError ICU4XDataProvider_fork_by_locale(ICU4XDataProvider* self, ICU4XDataProvider* other); diff --git a/ffi/diplomat/cpp/docs/source/provider_ffi.rst b/ffi/diplomat/cpp/docs/source/provider_ffi.rst index fba4764e6ca..d59810beb25 100644 --- a/ffi/diplomat/cpp/docs/source/provider_ffi.rst +++ b/ffi/diplomat/cpp/docs/source/provider_ffi.rst @@ -38,6 +38,13 @@ See the `Rust documentation for EmptyDataProvider `__ for more information. + .. cpp:function:: static ICU4XDataProvider create_baked() + + Constructs a :cpp:class:`ICU4XDataProvider` containing baked data. + + When compiling the Rust library, set the ``ICU4X_FFI_BAKED_ROOT`` environment variable to the baked data's ``mod.rs``'s path. + + .. cpp:function:: diplomat::result fork_by_key(ICU4XDataProvider& other) Creates a provider that tries the current provider and then, if the current provider doesn't support the data key, another provider ``other``. diff --git a/ffi/diplomat/cpp/include/ICU4XDataProvider.h b/ffi/diplomat/cpp/include/ICU4XDataProvider.h index fd8b35b7877..39d43b2f8e3 100644 --- a/ffi/diplomat/cpp/include/ICU4XDataProvider.h +++ b/ffi/diplomat/cpp/include/ICU4XDataProvider.h @@ -30,6 +30,8 @@ diplomat_result_box_ICU4XDataProvider_ICU4XError ICU4XDataProvider_create_from_b ICU4XDataProvider* ICU4XDataProvider_create_empty(); +ICU4XDataProvider* ICU4XDataProvider_create_baked(); + diplomat_result_void_ICU4XError ICU4XDataProvider_fork_by_key(ICU4XDataProvider* self, ICU4XDataProvider* other); diplomat_result_void_ICU4XError ICU4XDataProvider_fork_by_locale(ICU4XDataProvider* self, ICU4XDataProvider* other); diff --git a/ffi/diplomat/cpp/include/ICU4XDataProvider.hpp b/ffi/diplomat/cpp/include/ICU4XDataProvider.hpp index 461d2aa25f4..fbd086955a0 100644 --- a/ffi/diplomat/cpp/include/ICU4XDataProvider.hpp +++ b/ffi/diplomat/cpp/include/ICU4XDataProvider.hpp @@ -65,6 +65,14 @@ class ICU4XDataProvider { */ static ICU4XDataProvider create_empty(); + /** + * Constructs a [`ICU4XDataProvider`] containing baked data. + * + * When compiling the Rust library, set the `ICU4X_FFI_BAKED_ROOT` environment + * variable to the baked data's `mod.rs`'s path. + */ + static ICU4XDataProvider create_baked(); + /** * Creates a provider that tries the current provider and then, if the current provider * doesn't support the data key, another provider `other`. @@ -143,6 +151,9 @@ inline diplomat::result ICU4XDataProvider::create inline ICU4XDataProvider ICU4XDataProvider::create_empty() { return ICU4XDataProvider(capi::ICU4XDataProvider_create_empty()); } +inline ICU4XDataProvider ICU4XDataProvider::create_baked() { + return ICU4XDataProvider(capi::ICU4XDataProvider_create_baked()); +} inline diplomat::result ICU4XDataProvider::fork_by_key(ICU4XDataProvider& other) { auto diplomat_result_raw_out_value = capi::ICU4XDataProvider_fork_by_key(this->inner.get(), other.AsFFIMut()); diplomat::result diplomat_result_out_value; diff --git a/ffi/diplomat/js/docs/source/provider_ffi.rst b/ffi/diplomat/js/docs/source/provider_ffi.rst index c827a7dcee7..a98740e020b 100644 --- a/ffi/diplomat/js/docs/source/provider_ffi.rst +++ b/ffi/diplomat/js/docs/source/provider_ffi.rst @@ -40,6 +40,13 @@ See the `Rust documentation for EmptyDataProvider `__ for more information. + .. js:function:: create_baked() + + Constructs a :js:class:`ICU4XDataProvider` containing baked data. + + When compiling the Rust library, set the ``ICU4X_FFI_BAKED_ROOT`` environment variable to the baked data's ``mod.rs``'s path. + + .. js:function:: fork_by_key(other) Creates a provider that tries the current provider and then, if the current provider doesn't support the data key, another provider ``other``. diff --git a/ffi/diplomat/js/include/ICU4XDataProvider.d.ts b/ffi/diplomat/js/include/ICU4XDataProvider.d.ts index 83cd1f51bce..45ac699dee3 100644 --- a/ffi/diplomat/js/include/ICU4XDataProvider.d.ts +++ b/ffi/diplomat/js/include/ICU4XDataProvider.d.ts @@ -44,6 +44,14 @@ export class ICU4XDataProvider { */ static create_empty(): ICU4XDataProvider; + /** + + * Constructs a {@link ICU4XDataProvider `ICU4XDataProvider`} containing baked data. + + * When compiling the Rust library, set the `ICU4X_FFI_BAKED_ROOT` environment variable to the baked data's `mod.rs`'s path. + */ + static create_baked(): ICU4XDataProvider; + /** * Creates a provider that tries the current provider and then, if the current provider doesn't support the data key, another provider `other`. diff --git a/ffi/diplomat/js/include/ICU4XDataProvider.js b/ffi/diplomat/js/include/ICU4XDataProvider.js index 3b1354f8248..f129fda425f 100644 --- a/ffi/diplomat/js/include/ICU4XDataProvider.js +++ b/ffi/diplomat/js/include/ICU4XDataProvider.js @@ -64,6 +64,10 @@ export class ICU4XDataProvider { return new ICU4XDataProvider(wasm.ICU4XDataProvider_create_empty(), true, []); } + static create_baked() { + return new ICU4XDataProvider(wasm.ICU4XDataProvider_create_baked(), true, []); + } + fork_by_key(arg_other) { return (() => { const diplomat_receive_buffer = wasm.diplomat_alloc(5, 4); diff --git a/ffi/diplomat/src/provider.rs b/ffi/diplomat/src/provider.rs index 8e95b848cbd..674fb085dfd 100644 --- a/ffi/diplomat/src/provider.rs +++ b/ffi/diplomat/src/provider.rs @@ -17,6 +17,8 @@ pub enum ICU4XDataProviderInner { Empty, #[cfg(feature = "any_provider")] Any(Box), + #[cfg(feature = "baked_provider")] + Baked, #[cfg(feature = "buffer_provider")] Buffer(Box), } @@ -27,9 +29,21 @@ impl Default for ICU4XDataProviderInner { } } +#[cfg(feature = "baked_provider")] +struct BakedProvider; +#[cfg(feature = "baked_provider")] +mod baked { + include!(core::env!("ICU4X_FFI_BAKED_ROOT")); + impl_data_provider!(super::BakedProvider); + #[cfg(feature = "any_provider")] + impl_any_provider!(super::BakedProvider); +} + #[diplomat::bridge] pub mod ffi { use super::ICU4XDataProviderInner; + #[cfg(feature = "baked_provider")] + use super::BakedProvider; use crate::errors::ffi::ICU4XError; use crate::fallbacker::ffi::ICU4XLocaleFallbacker; use alloc::boxed::Box; @@ -49,18 +63,18 @@ pub mod ffi { fn convert_any_provider( x: D, ) -> Box { - Box::new(ICU4XDataProvider( - super::ICU4XDataProviderInner::from_any_provider(x), - )) + Box::new(ICU4XDataProvider(super::ICU4XDataProviderInner::Any( + Box::new(x), + ))) } #[cfg(feature = "buffer_provider")] fn convert_buffer_provider( x: D, ) -> Box { - Box::new(ICU4XDataProvider( - super::ICU4XDataProviderInner::from_buffer_provider(x), - )) + Box::new(ICU4XDataProvider(super::ICU4XDataProviderInner::Buffer( + Box::new(x), + ))) } impl ICU4XDataProvider { @@ -145,6 +159,18 @@ pub mod ffi { Box::new(ICU4XDataProvider(ICU4XDataProviderInner::Empty)) } + /// Constructs a [`ICU4XDataProvider`] containing baked data. + /// + /// When compiling the Rust library, set the `ICU4X_FFI_BAKED_ROOT` environment + /// variable to the baked data's `mod.rs`'s path. + pub fn create_baked() -> Box { + #[cfg(not(feature = "baked_provider"))] + panic!("Requires feature 'baked_provider'"); + + #[cfg(feature = "baked_provider")] + Box::new(ICU4XDataProvider(ICU4XDataProviderInner::Baked)) + } + /// Creates a provider that tries the current provider and then, if the current provider /// doesn't support the data key, another provider `other`. /// @@ -173,6 +199,14 @@ pub mod ffi { )); Ok(()) } + #[cfg(all(feature = "any_provider", feature = "baked_provider"))] + (ICU4XDataProviderInner::Baked, ICU4XDataProviderInner::Any(a)) + | (ICU4XDataProviderInner::Any(a), ICU4XDataProviderInner::Baked) => { + self.0 = ICU4XDataProviderInner::Any(Box::from( + icu_provider_adapters::fork::ForkByKeyProvider::new(a, BakedProvider), + )); + Ok(()) + } #[cfg(feature = "buffer_provider")] (ICU4XDataProviderInner::Buffer(a), ICU4XDataProviderInner::Buffer(b)) => { self.0 = ICU4XDataProviderInner::Buffer(Box::from( @@ -215,6 +249,18 @@ pub mod ffi { )); Ok(()) } + #[cfg(all(feature = "any_provider", feature = "baked_provider"))] + (ICU4XDataProviderInner::Baked, ICU4XDataProviderInner::Any(a)) + | (ICU4XDataProviderInner::Any(a), ICU4XDataProviderInner::Baked) => { + self.0 = ICU4XDataProviderInner::Any(Box::from( + icu_provider_adapters::fork::ForkByErrorProvider::new_with_predicate( + a, + BakedProvider, + MissingLocalePredicate, + ), + )); + Ok(()) + } #[cfg(feature = "buffer_provider")] (ICU4XDataProviderInner::Buffer(a), ICU4XDataProviderInner::Buffer(b)) => { self.0 = ICU4XDataProviderInner::Buffer(Box::from( @@ -265,6 +311,16 @@ pub mod ffi { Err(e) => Err(e.into()), } } + #[cfg(all(feature = "any_provider", feature = "baked_provider"))] + ICU4XDataProviderInner::Baked => { + match LocaleFallbackProvider::try_new_with_any_provider(BakedProvider) { + Ok(x) => { + self.0 = ICU4XDataProviderInner::Any(Box::new(x)); + Ok(()) + } + Err(e) => Err(e.into()), + } + } #[cfg(feature = "buffer_provider")] ICU4XDataProviderInner::Buffer(inner) => { match LocaleFallbackProvider::try_new_with_buffer_provider(inner) { @@ -304,6 +360,13 @@ pub mod ffi { )); Ok(()) } + #[cfg(all(feature = "any_provider", feature = "baked_provider"))] + ICU4XDataProviderInner::Baked => { + self.0 = ICU4XDataProviderInner::Any(Box::new( + LocaleFallbackProvider::new_with_fallbacker(BakedProvider, fallbacker.0.clone()), + )); + Ok(()) + } #[cfg(feature = "buffer_provider")] ICU4XDataProviderInner::Buffer(inner) => { self.0 = ICU4XDataProviderInner::Buffer(Box::new( @@ -317,7 +380,11 @@ pub mod ffi { } } -#[cfg(not(any(feature = "any_provider", feature = "buffer_provider")))] +#[cfg(not(any( + feature = "any_provider", + feature = "baked_provider", + feature = "buffer_provider" +)))] impl DataProvider for ICU4XDataProviderInner where M: KeyedDataMarker + 'static, @@ -327,7 +394,11 @@ where } } -#[cfg(all(feature = "buffer_provider", not(feature = "any_provider")))] +#[cfg(all( + feature = "buffer_provider", + not(feature = "any_provider"), + not(feature = "baked_provider") +))] impl DataProvider for ICU4XDataProviderInner where M: KeyedDataMarker + 'static, @@ -339,6 +410,8 @@ where fn load(&self, req: DataRequest) -> Result, DataError> { match self { ICU4XDataProviderInner::Empty => EmptyDataProvider::new().load(req), + #[cfg(feature = "baked_provider")] + ICU4XDataProviderInner::Baked => BakedProvider.load(req), ICU4XDataProviderInner::Buffer(buffer_provider) => { buffer_provider.as_deserializing().load(req) } @@ -346,7 +419,11 @@ where } } -#[cfg(all(feature = "any_provider", not(feature = "buffer_provider")))] +#[cfg(all( + feature = "any_provider", + not(feature = "buffer_provider"), + not(feature = "baked_provider") +))] impl DataProvider for ICU4XDataProviderInner where M: KeyedDataMarker + 'static, @@ -362,7 +439,28 @@ where } } -#[cfg(all(feature = "buffer_provider", feature = "any_provider"))] +#[cfg(all( + feature = "baked_provider", + not(feature = "buffer_provider"), + not(feature = "any_provider") +))] +impl DataProvider for ICU4XDataProviderInner +where + BakedProvider: DataProvider, +{ + fn load(&self, req: DataRequest) -> Result, DataError> { + match self { + ICU4XDataProviderInner::Empty => EmptyDataProvider::new().load(req), + ICU4XDataProviderInner::Baked => BakedProvider.load(req), + } + } +} + +#[cfg(all( + feature = "buffer_provider", + feature = "any_provider", + not(feature = "baked_provider") +))] impl DataProvider for ICU4XDataProviderInner where M: KeyedDataMarker + 'static, @@ -385,13 +483,81 @@ where } } -impl ICU4XDataProviderInner { - #[cfg(feature = "any_provider")] - fn from_any_provider(any_provider: impl AnyProvider + 'static) -> Self { - Self::Any(Box::new(any_provider)) +#[cfg(all( + feature = "buffer_provider", + feature = "baked_provider", + not(feature = "any_provider") +))] +impl DataProvider for ICU4XDataProviderInner +where + M: KeyedDataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + // Actual bound: + // for<'de> >::Output: Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::Deserialize<'de>, + BakedProvider: DataProvider, +{ + fn load(&self, req: DataRequest) -> Result, DataError> { + match self { + ICU4XDataProviderInner::Empty => EmptyDataProvider::new().load(req), + ICU4XDataProviderInner::Baked => BakedProvider.load(req), + ICU4XDataProviderInner::Buffer(buffer_provider) => { + buffer_provider.as_deserializing().load(req) + } + } } - #[cfg(feature = "buffer_provider")] - fn from_buffer_provider(buffer_provider: impl BufferProvider + 'static) -> Self { - Self::Buffer(Box::new(buffer_provider)) +} + +#[cfg(all( + feature = "any_provider", + feature = "baked_provider", + not(feature = "buffer_provider") +))] +impl DataProvider for ICU4XDataProviderInner +where + M: KeyedDataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + BakedProvider: DataProvider, +{ + fn load(&self, req: DataRequest) -> Result, DataError> { + match self { + ICU4XDataProviderInner::Empty => EmptyDataProvider::new().load(req), + ICU4XDataProviderInner::Baked => BakedProvider.load(req), + ICU4XDataProviderInner::Any(any_provider) => any_provider.as_downcasting().load(req), + } + } +} + +#[cfg(all( + feature = "any_provider", + feature = "baked_provider", + feature = "buffer_provider" +))] +impl DataProvider for ICU4XDataProviderInner +where + M: KeyedDataMarker + 'static, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, + // Actual bound: + // for<'de> >::Output: Deserialize<'de>, + // Necessary workaround bound (see `yoke::trait_hack` docs): + for<'de> YokeTraitHack<>::Output>: serde::Deserialize<'de>, + BakedProvider: DataProvider, +{ + fn load(&self, req: DataRequest) -> Result, DataError> { + match self { + ICU4XDataProviderInner::Empty => EmptyDataProvider::new().load(req), + ICU4XDataProviderInner::Baked => BakedProvider.load(req), + ICU4XDataProviderInner::Any(any_provider) => any_provider.as_downcasting().load(req), + ICU4XDataProviderInner::Buffer(buffer_provider) => { + buffer_provider.as_deserializing().load(req) + } + } } } diff --git a/provider/datagen/src/registry.rs b/provider/datagen/src/registry.rs index 665db48dbfc..94be411dc54 100644 --- a/provider/datagen/src/registry.rs +++ b/provider/datagen/src/registry.rs @@ -26,11 +26,11 @@ use icu_compactdecimal::provider::*; use icu_displaynames::provider::*; #[cfg(feature = "experimental")] use icu_relativetime::provider::*; -#[cfg(feature = "experimental")] +#[cfg(feature = "icu_segmenter")] use icu_segmenter::provider::*; macro_rules! registry { - ($($marker:ident,)+ #[cfg(feature = "experimental")] { $($exp_marker:ident,)+ }) => { + ($($marker:ident,)+ #[cfg(feature = "experimental")] { $($exp_marker:ident,)+ } #[cfg(feature = "icu_segmenter")] { $($seg_marker:ident,)+ }) => { /// List of all supported keys pub fn all_keys() -> Vec { vec![ @@ -41,6 +41,10 @@ macro_rules! registry { #[cfg(feature = "experimental")] <$exp_marker>::KEY, )+ + $( + #[cfg(feature = "icu_segmenter")] + <$seg_marker>::KEY, + )+ ] } @@ -51,10 +55,21 @@ macro_rules! registry { HelloWorldV1Marker, $($marker,)+ $($exp_marker,)+ + $($seg_marker,)+ ] ); - #[cfg(not(feature = "experimental"))] + #[cfg(all(feature = "icu_segmenter", not(feature = "experimental")))] + icu_provider::make_exportable_provider!( + crate::DatagenProvider, + [ + HelloWorldV1Marker, + $($marker,)+ + $($seg_marker,)+ + ] + ); + + #[cfg(all(not(feature = "icu_segmenter"), not(feature = "experimental")))] icu_provider::make_exportable_provider!( crate::DatagenProvider, [ @@ -81,6 +96,12 @@ macro_rules! registry { return $exp_marker.bake(env); } )+ + $( + #[cfg(feature = "icu_segmenter")] + if key == $seg_marker::KEY { + return $seg_marker.bake(env); + } + )+ unreachable!("unregistered marker") } } @@ -224,12 +245,6 @@ registry!( DateSkeletonPatternsV1Marker, RegionDisplayNamesV1Marker, LanguageDisplayNamesV1Marker, - GraphemeClusterBreakDataV1Marker, - LineBreakDataV1Marker, - LstmDataV1Marker, - SentenceBreakDataV1Marker, - UCharDictionaryBreakDataV1Marker, - WordBreakDataV1Marker, LongSecondRelativeTimeFormatDataV1Marker, ShortSecondRelativeTimeFormatDataV1Marker, NarrowSecondRelativeTimeFormatDataV1Marker, @@ -257,6 +272,16 @@ registry!( LongCompactDecimalFormatDataV1Marker, ShortCompactDecimalFormatDataV1Marker, } + #[cfg(feature = "icu_segmenter")] + { + GraphemeClusterBreakDataV1Marker, + LineBreakDataV1Marker, + LstmDataV1Marker, + SentenceBreakDataV1Marker, + UCharDictionaryBreakDataV1Marker, + WordBreakDataV1Marker, + } + ); #[test] diff --git a/provider/datagen/src/transform/mod.rs b/provider/datagen/src/transform/mod.rs index d587125ae24..905cdd10562 100644 --- a/provider/datagen/src/transform/mod.rs +++ b/provider/datagen/src/transform/mod.rs @@ -4,7 +4,7 @@ pub mod cldr; pub mod icuexport; -#[cfg(feature = "experimental")] +#[cfg(feature = "icu_segmenter")] pub mod segmenter; use icu_provider::datagen::*; diff --git a/provider/testdata/Cargo.toml b/provider/testdata/Cargo.toml index ac28e330a8a..df77be2511d 100644 --- a/provider/testdata/Cargo.toml +++ b/provider/testdata/Cargo.toml @@ -317,8 +317,6 @@ writeable = { version = "0.5", path = "../../utils/writeable", optional = true } # databake deps icu_locid = { version = "1.0.0", path = "../../components/locid" } icu_collections = { version = "1.0.0", path = "../../components/collections" } -litemap = { version = "0.6", path = "../../utils/litemap", default-features = false } -tinystr = { version = "0.7", path = "../../utils/tinystr" } zerovec = { version = "0.9", path = "../../utils/zerovec" } # databake options