Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce borrowed variants of normalizer structs #5413

Merged
merged 8 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions components/casemap/benches/casemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icu_casemap::CaseMapper;
use icu_locale_core::langid;
#[cfg(feature = "bench")]
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::DecomposingNormalizerBorrowed;

const TEST_STRING_EN: &str = "One of the key design principles of ICU4X is to make locale data small and portable, allowing it to be pulled from multiple sources depending on the needs of the application. This document explains how that goal can be achieved.";

Expand Down Expand Up @@ -59,7 +59,7 @@ fn greek_uppercasing(_c: &mut Criterion) {
let el = langid!("el");

let iliad_lowercase = casemapper.lowercase_to_string(ILIAD, &root);
let decomposer = DecomposingNormalizer::new_nfd();
let decomposer = DecomposingNormalizerBorrowed::new_nfd();
let nfd = decomposer.normalize_utf8(ILIAD.as_bytes());
let nfd_lowercase = decomposer.normalize_utf8(iliad_lowercase.as_bytes());

Expand Down
4 changes: 2 additions & 2 deletions components/casemap/tests/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ fn test_dutch() {

#[test]
fn test_greek_upper() {
let nfc = icu_normalizer::ComposingNormalizer::new_nfc();
let nfd = icu_normalizer::DecomposingNormalizer::new_nfd();
let nfc = icu_normalizer::ComposingNormalizerBorrowed::new_nfc();
let nfd = icu_normalizer::DecomposingNormalizerBorrowed::new_nfd();

let cm = CaseMapper::new();
let modern_greek = &langid!("el");
Expand Down
4 changes: 2 additions & 2 deletions components/casemap/tests/gen_greek_to_me.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use icu_casemap::greek_to_me::{
self, GreekDiacritics, GreekPrecomposedLetterData, GreekVowel, PackedGreekPrecomposedLetterData,
};
use icu_casemap::CaseMapper;
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::DecomposingNormalizerBorrowed;
use icu_properties::{maps, GeneralCategoryGroup, Script};
use std::collections::BTreeMap;
use std::fmt::Write;

fn main() {
let decomposer = DecomposingNormalizer::new_nfd();
let decomposer = DecomposingNormalizerBorrowed::new_nfd();
let script = maps::script();
let gc = maps::general_category();
let cm = CaseMapper::new();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl ComposingTransliterator {
// would be cool to use `normalize_to` and pass Insertable, but we need to know the
// input string, which gets replaced by the normalized string.

let buf = self.0.normalize(rep.as_str_modifiable());
let buf = self.0.as_borrowed().normalize(rep.as_str_modifiable());
rep.replace_modifiable_with_str(&buf);
}
}
Expand Down Expand Up @@ -115,7 +115,7 @@ impl DecomposingTransliterator {
// would be cool to use `normalize_to` and pass Insertable, but we need to know the
// input string, which gets replaced by the normalized string.

let buf = self.0.normalize(rep.as_str_modifiable());
let buf = self.0.as_borrowed().normalize(rep.as_str_modifiable());
rep.replace_modifiable_with_str(&buf);
}
}
Expand Down
14 changes: 8 additions & 6 deletions components/normalizer/benches/canonical_composition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
use criterion::{black_box, BenchmarkId, Criterion};
use detone::IterDecomposeVietnamese;

use icu_normalizer::properties::{CanonicalComposition, CanonicalDecomposition, Decomposed};
use icu_normalizer::ComposingNormalizer;
use icu_normalizer::properties::{
CanonicalCompositionBorrowed, CanonicalDecompositionBorrowed, Decomposed,
};
use icu_normalizer::ComposingNormalizerBorrowed;

struct BenchDataContent {
pub file_name: String,
Expand All @@ -23,7 +25,7 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 16] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();

return [
BenchDataContent {
Expand Down Expand Up @@ -147,7 +149,7 @@ fn normalizer_bench_data() -> [BenchDataContent; 16] {
}

fn function_under_bench(
canonical_composer: &CanonicalComposition,
canonical_composer: &CanonicalCompositionBorrowed,
composable_points: &[(char, char)],
) {
for pair in composable_points.iter() {
Expand All @@ -159,7 +161,7 @@ pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "canonical_composition";
let mut group = criterion.benchmark_group(group_name);

let composer = CanonicalComposition::new();
let composer = CanonicalCompositionBorrowed::new();

for bench_data_content in black_box(normalizer_bench_data()) {
group.bench_function(
Expand All @@ -172,7 +174,7 @@ pub fn criterion_benchmark(criterion: &mut Criterion) {
}

fn decompose_data(nfc: &str) -> Vec<(char, char)> {
let decomposer = CanonicalDecomposition::new();
let decomposer = CanonicalDecompositionBorrowed::new();
nfc.chars()
.map(|c| decomposer.decompose(c))
.filter_map(|decomposed| {
Expand Down
21 changes: 12 additions & 9 deletions components/normalizer/benches/canonical_decomposition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

use criterion::{black_box, BenchmarkId, Criterion};

use icu_normalizer::properties::CanonicalDecomposition;
use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use icu_normalizer::properties::CanonicalDecompositionBorrowed;
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};

struct BenchDataContent {
pub file_name: String,
Expand All @@ -25,10 +25,10 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 15] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let nfkd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();
let nfd_normalizer = DecomposingNormalizerBorrowed::new_nfd();
let nfkc_normalizer = ComposingNormalizerBorrowed::new_nfkc();
let nfkd_normalizer = DecomposingNormalizerBorrowed::new_nfkd();

let content_latin: (&str, &str) = (
"TestNames_Latin",
Expand Down Expand Up @@ -116,7 +116,7 @@ fn normalizer_bench_data() -> [BenchDataContent; 15] {

#[cfg(debug_assertions)]
fn function_under_bench(
_canonical_decomposer: &CanonicalDecomposition,
_canonical_decomposer: &CanonicalDecompositionBorrowed,
_decomposable_points: &str,
) {
// using debug assertion fails some test.
Expand All @@ -125,7 +125,10 @@ fn function_under_bench(
}

#[cfg(not(debug_assertions))]
fn function_under_bench(canonical_decomposer: &CanonicalDecomposition, decomposable_points: &str) {
fn function_under_bench(
canonical_decomposer: &CanonicalDecompositionBorrowed,
decomposable_points: &str,
) {
decomposable_points.chars().for_each(|point| {
canonical_decomposer.decompose(point);
});
Expand All @@ -135,7 +138,7 @@ pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "canonical_decomposition";
let mut group = criterion.benchmark_group(group_name);

let decomposer = CanonicalDecomposition::new();
let decomposer = CanonicalDecompositionBorrowed::new();

for bench_data_content in black_box(normalizer_bench_data()) {
group.bench_function(
Expand Down
16 changes: 8 additions & 8 deletions components/normalizer/benches/composing_normalizer_nfc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use criterion::{black_box, BenchmarkId, Criterion};

use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};

struct BenchDataContent {
pub file_name: String,
Expand All @@ -28,10 +28,10 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 15] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let nfkd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();
let nfd_normalizer = DecomposingNormalizerBorrowed::new_nfd();
let nfkc_normalizer = ComposingNormalizerBorrowed::new_nfkc();
let nfkd_normalizer = DecomposingNormalizerBorrowed::new_nfkd();

let content_latin: (&str, &str) = (
"TestNames_Latin",
Expand Down Expand Up @@ -127,18 +127,18 @@ fn normalizer_bench_data() -> [BenchDataContent; 15] {
})
}

fn function_under_bench(normalizer: &ComposingNormalizer, text: &str) {
fn function_under_bench(normalizer: &ComposingNormalizerBorrowed, text: &str) {
normalizer.normalize(text);
}

fn function_under_bench_utf16(normalizer: &ComposingNormalizer, text: &[u16]) {
fn function_under_bench_utf16(normalizer: &ComposingNormalizerBorrowed, text: &[u16]) {
normalizer.normalize_utf16(text);
}

pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "composing_normalizer_nfc";

let normalizer_under_bench: ComposingNormalizer = ComposingNormalizer::new_nfc();
let normalizer_under_bench = ComposingNormalizerBorrowed::new_nfc();

let mut group = criterion.benchmark_group(group_name);

Expand Down
16 changes: 8 additions & 8 deletions components/normalizer/benches/composing_normalizer_nfkc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use criterion::{black_box, BenchmarkId, Criterion};

use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};

struct BenchDataContent {
pub file_name: String,
Expand All @@ -28,10 +28,10 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 15] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let nfkd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();
let nfd_normalizer = DecomposingNormalizerBorrowed::new_nfd();
let nfkc_normalizer = ComposingNormalizerBorrowed::new_nfkc();
let nfkd_normalizer = DecomposingNormalizerBorrowed::new_nfkd();

let content_latin: (&str, &str) = (
"TestNames_Latin",
Expand Down Expand Up @@ -127,18 +127,18 @@ fn normalizer_bench_data() -> [BenchDataContent; 15] {
})
}

fn function_under_bench(normalizer: &ComposingNormalizer, text: &str) {
fn function_under_bench(normalizer: &ComposingNormalizerBorrowed, text: &str) {
normalizer.normalize(text);
}

fn function_under_bench_u16(normalizer: &ComposingNormalizer, text: &[u16]) {
fn function_under_bench_u16(normalizer: &ComposingNormalizerBorrowed, text: &[u16]) {
normalizer.normalize_utf16(text);
}

pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "composing_normalizer_nfkc";

let normalizer_under_bench: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let normalizer_under_bench = ComposingNormalizerBorrowed::new_nfkc();

let mut group = criterion.benchmark_group(group_name);

Expand Down
16 changes: 8 additions & 8 deletions components/normalizer/benches/decomposing_normalizer_nfd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use criterion::{black_box, BenchmarkId, Criterion};

use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};

struct BenchDataContent {
pub file_name: String,
Expand All @@ -28,10 +28,10 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 15] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let nfkd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();
let nfd_normalizer = DecomposingNormalizerBorrowed::new_nfd();
let nfkc_normalizer = ComposingNormalizerBorrowed::new_nfkc();
let nfkd_normalizer = DecomposingNormalizerBorrowed::new_nfkd();

let content_latin: (&str, &str) = (
"TestNames_Latin",
Expand Down Expand Up @@ -127,18 +127,18 @@ fn normalizer_bench_data() -> [BenchDataContent; 15] {
})
}

fn function_under_bench(normalizer: &DecomposingNormalizer, text: &str) {
fn function_under_bench(normalizer: &DecomposingNormalizerBorrowed, text: &str) {
normalizer.normalize(text);
}

fn function_under_bench_u16(normalizer: &DecomposingNormalizer, text: &[u16]) {
fn function_under_bench_u16(normalizer: &DecomposingNormalizerBorrowed, text: &[u16]) {
normalizer.normalize_utf16(text);
}

pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "decomposing_normalizer_nfd";

let normalizer_under_bench: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let normalizer_under_bench = DecomposingNormalizerBorrowed::new_nfd();

let mut group = criterion.benchmark_group(group_name);

Expand Down
16 changes: 8 additions & 8 deletions components/normalizer/benches/decomposing_normalizer_nfkd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use criterion::{black_box, BenchmarkId, Criterion};

use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use icu_normalizer::{ComposingNormalizerBorrowed, DecomposingNormalizerBorrowed};

struct BenchDataContent {
pub file_name: String,
Expand All @@ -28,10 +28,10 @@ fn strip_headers(content: &str) -> String {
}

fn normalizer_bench_data() -> [BenchDataContent; 15] {
let nfc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
let nfd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
let nfkc_normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
let nfkd_normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let nfc_normalizer = ComposingNormalizerBorrowed::new_nfc();
let nfd_normalizer = DecomposingNormalizerBorrowed::new_nfd();
let nfkc_normalizer = ComposingNormalizerBorrowed::new_nfkc();
let nfkd_normalizer = DecomposingNormalizerBorrowed::new_nfkd();

let content_latin: (&str, &str) = (
"TestNames_Latin",
Expand Down Expand Up @@ -127,18 +127,18 @@ fn normalizer_bench_data() -> [BenchDataContent; 15] {
})
}

fn function_under_bench(normalizer: &DecomposingNormalizer, text: &str) {
fn function_under_bench(normalizer: &DecomposingNormalizerBorrowed, text: &str) {
normalizer.normalize(text);
}

fn function_under_bench_u16(normalizer: &DecomposingNormalizer, text: &[u16]) {
fn function_under_bench_u16(normalizer: &DecomposingNormalizerBorrowed, text: &[u16]) {
normalizer.normalize_utf16(text);
}

pub fn criterion_benchmark(criterion: &mut Criterion) {
let group_name = "decomposing_normalizer_nfkd";

let normalizer_under_bench: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
let normalizer_under_bench = DecomposingNormalizerBorrowed::new_nfkd();

let mut group = criterion.benchmark_group(group_name);
for bench_data_content in black_box(normalizer_bench_data()) {
Expand Down
12 changes: 6 additions & 6 deletions components/normalizer/fuzz/fuzz_targets/compare_self.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

#![no_main]
use libfuzzer_sys::fuzz_target;
use icu_normalizer::ComposingNormalizer;
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::ComposingNormalizerBorrowed;
use icu_normalizer::DecomposingNormalizerBorrowed;
use utf16_iter::Utf16CharsEx;
use utf8_iter::Utf8CharsEx;

fuzz_target!(|data: &[u8]| {
let well_formed = String::from_utf8_lossy(data);
let utf16: Vec<u16> = well_formed.encode_utf16().collect();

let nfd = DecomposingNormalizer::new_nfd();
let nfkd = DecomposingNormalizer::new_nfkd();
let nfc = ComposingNormalizer::new_nfc();
let nfkc = ComposingNormalizer::new_nfkc();
let nfd = DecomposingNormalizerBorrowed::new_nfd();
let nfkd = DecomposingNormalizerBorrowed::new_nfkd();
let nfc = ComposingNormalizerBorrowed::new_nfc();
let nfkc = ComposingNormalizerBorrowed::new_nfkc();

// Not macroizing these to get nice line numbers by default.

Expand Down
4 changes: 2 additions & 2 deletions components/normalizer/fuzz/fuzz_targets/compare_utf16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use libfuzzer_sys::fuzz_target;
use rust_icu_ustring::UChar;
use rust_icu_unorm2::UNormalizer;
use icu_normalizer::DecomposingNormalizer;
use icu_normalizer::DecomposingNormalizerBorrowed;

// Sadly, UNormalizer doesn't take slices
fn slice_to_icu4c(slice: &[u16]) -> UChar {
Expand All @@ -23,7 +23,7 @@ fn slice_from_icu4c(string: &UChar) -> &[u16] {
}

fn normalize_icu4x(buffer: &[u16]) -> Vec<u16> {
let normalizer = DecomposingNormalizer::new_nfd();
let normalizer = DecomposingNormalizerBorrowed::new_nfd();
normalizer.normalize_utf16(buffer)
}

Expand Down
Loading
Loading