From 47ca9c1225906e592b85cb76b6b8af8fb26ee275 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 19:32:16 -0700
Subject: [PATCH 01/21] Add debug_unwrap helper macro

---
 experimental/zerotrie/src/helpers.rs | 21 +++++++++++++++++++++
 experimental/zerotrie/src/lib.rs     |  1 +
 experimental/zerotrie/src/varint.rs  | 10 ++--------
 3 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/experimental/zerotrie/src/helpers.rs b/experimental/zerotrie/src/helpers.rs
index e7983b490e1..43baf7fff81 100644
--- a/experimental/zerotrie/src/helpers.rs
+++ b/experimental/zerotrie/src/helpers.rs
@@ -54,3 +54,24 @@ pub(crate) fn debug_get_range(slice: &[u8], range: Range<usize>) -> Option<&[u8]
         }
     }
 }
+
+macro_rules! debug_unwrap {
+    ($expr:expr, return $retval:expr, $($arg:tt)+) => {
+        match $expr {
+            Some(x) => x,
+            None => {
+                debug_assert!(false, $($arg)*);
+                return $retval;
+            }
+        }
+    };
+    ($expr:expr, return $retval:expr) => {
+        debug_unwrap!($expr, return $retval, "invalid trie")
+    };
+    ($expr:expr, $($arg:tt)+) => {
+        debug_unwrap!($expr, return (), $($arg)*)
+    };
+    ($expr:expr) => {
+        debug_unwrap!($expr, return ())
+    };
+}
diff --git a/experimental/zerotrie/src/lib.rs b/experimental/zerotrie/src/lib.rs
index 3fa8ed6ee27..817093d2f36 100644
--- a/experimental/zerotrie/src/lib.rs
+++ b/experimental/zerotrie/src/lib.rs
@@ -57,6 +57,7 @@ extern crate alloc;
 mod builder;
 mod byte_phf;
 mod error;
+#[macro_use]
 mod helpers;
 mod reader;
 #[cfg(feature = "serde")]
diff --git a/experimental/zerotrie/src/varint.rs b/experimental/zerotrie/src/varint.rs
index 420bc4d9c13..5364aaa8ba4 100644
--- a/experimental/zerotrie/src/varint.rs
+++ b/experimental/zerotrie/src/varint.rs
@@ -53,10 +53,7 @@ pub const fn read_varint_meta2(start: u8, remainder: &[u8]) -> Option<(usize, &[
     if (start & 0b00100000) != 0 {
         loop {
             let next;
-            (next, remainder) = match remainder.split_first() {
-                Some(t) => t,
-                None => return None,
-            };
+            (next, remainder) = debug_unwrap!(remainder.split_first(), return None);
             // Note: value << 7 could drop high bits. The first addition can't overflow.
             // The second addition could overflow; in such a case we just inform the
             // developer via the debug assertion.
@@ -78,10 +75,7 @@ pub const fn read_varint_meta3(start: u8, remainder: &[u8]) -> Option<(usize, &[
     if (start & 0b00010000) != 0 {
         loop {
             let next;
-            (next, remainder) = match remainder.split_first() {
-                Some(t) => t,
-                None => return None,
-            };
+            (next, remainder) = debug_unwrap!(remainder.split_first(), return None);
             // Note: value << 7 could drop high bits. The first addition can't overflow.
             // The second addition could overflow; in such a case we just inform the
             // developer via the debug assertion.

From 92a420763ba77940ccc96577f4b8a5af667dd5e9 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 19:50:39 -0700
Subject: [PATCH 02/21] Clean up varint functions to not double assert

---
 experimental/zerotrie/src/helpers.rs | 12 +++++++
 experimental/zerotrie/src/reader.rs  | 16 ++++-----
 experimental/zerotrie/src/varint.rs  | 53 +++++++++++++++-------------
 3 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/experimental/zerotrie/src/helpers.rs b/experimental/zerotrie/src/helpers.rs
index 43baf7fff81..73393b18a41 100644
--- a/experimental/zerotrie/src/helpers.rs
+++ b/experimental/zerotrie/src/helpers.rs
@@ -68,6 +68,18 @@ macro_rules! debug_unwrap {
     ($expr:expr, return $retval:expr) => {
         debug_unwrap!($expr, return $retval, "invalid trie")
     };
+    ($expr:expr, break, $($arg:tt)+) => {
+        match $expr {
+            Some(x) => x,
+            None => {
+                debug_assert!(false, $($arg)*);
+                break;
+            }
+        }
+    };
+    ($expr:expr, break) => {
+        debug_unwrap!($expr, break, "invalid trie")
+    };
     ($expr:expr, $($arg:tt)+) => {
         debug_unwrap!($expr, return (), $($arg)*)
     };
diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index ba0345538cf..ad48f0fa480 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -311,8 +311,8 @@ pub fn get_bsearch_only(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
         let byte_type = byte_type(*b);
         (x, trie) = match byte_type {
             NodeType::Ascii => (0, trie),
-            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie)?,
-            NodeType::Branch => read_varint_meta2(*b, trie)?,
+            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
+            NodeType::Branch => read_varint_meta2(*b, trie),
         };
         if let Some((c, temp)) = ascii.split_first() {
             if matches!(byte_type, NodeType::Ascii) {
@@ -375,8 +375,8 @@ pub fn get_phf_limited(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
         let byte_type = byte_type(*b);
         (x, trie) = match byte_type {
             NodeType::Ascii => (0, trie),
-            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie)?,
-            NodeType::Branch => read_varint_meta2(*b, trie)?,
+            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
+            NodeType::Branch => read_varint_meta2(*b, trie),
         };
         if let Some((c, temp)) = ascii.split_first() {
             if matches!(byte_type, NodeType::Ascii) {
@@ -445,8 +445,8 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
         let byte_type = byte_type(*b);
         (x, trie) = match byte_type {
             NodeType::Ascii => (0, trie),
-            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie)?,
-            NodeType::Branch => read_varint_meta2(*b, trie)?,
+            NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
+            NodeType::Branch => read_varint_meta2(*b, trie),
         };
         if let Some((c, temp)) = ascii.split_first() {
             if matches!(byte_type, NodeType::Ascii) {
@@ -554,8 +554,8 @@ impl<'a> Iterator for ZeroTrieIterator<'a> {
             }
             (x, trie) = match byte_type {
                 NodeType::Ascii => (0, trie),
-                NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie)?,
-                NodeType::Branch => read_varint_meta2(*b, trie)?,
+                NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
+                NodeType::Branch => read_varint_meta2(*b, trie),
             };
             if matches!(byte_type, NodeType::Span) {
                 (span, trie) = debug_split_at(trie, x)?;
diff --git a/experimental/zerotrie/src/varint.rs b/experimental/zerotrie/src/varint.rs
index 5364aaa8ba4..5af0ea664be 100644
--- a/experimental/zerotrie/src/varint.rs
+++ b/experimental/zerotrie/src/varint.rs
@@ -47,13 +47,16 @@ use crate::builder::nonconst::TrieBuilderStore;
 /// Reads a varint with 2 bits of metadata in the lead byte.
 ///
 /// Returns the varint value and a subslice of `remainder` with the varint bytes removed.
-pub const fn read_varint_meta2(start: u8, remainder: &[u8]) -> Option<(usize, &[u8])> {
+///
+/// If the varint spills off the end of the slice, a debug assertion will fail,
+/// and the function will return the value up to that point.
+pub const fn read_varint_meta2(start: u8, remainder: &[u8]) -> (usize, &[u8]) {
     let mut value = (start & 0b00011111) as usize;
     let mut remainder = remainder;
     if (start & 0b00100000) != 0 {
         loop {
             let next;
-            (next, remainder) = debug_unwrap!(remainder.split_first(), return None);
+            (next, remainder) = debug_unwrap!(remainder.split_first(), break, "invalid varint");
             // Note: value << 7 could drop high bits. The first addition can't overflow.
             // The second addition could overflow; in such a case we just inform the
             // developer via the debug assertion.
@@ -63,19 +66,22 @@ pub const fn read_varint_meta2(start: u8, remainder: &[u8]) -> Option<(usize, &[
             }
         }
     }
-    Some((value, remainder))
+    (value, remainder)
 }
 
 /// Reads a varint with 3 bits of metadata in the lead byte.
 ///
 /// Returns the varint value and a subslice of `remainder` with the varint bytes removed.
-pub const fn read_varint_meta3(start: u8, remainder: &[u8]) -> Option<(usize, &[u8])> {
+///
+/// If the varint spills off the end of the slice, a debug assertion will fail,
+/// and the function will return the value up to that point.
+pub const fn read_varint_meta3(start: u8, remainder: &[u8]) -> (usize, &[u8]) {
     let mut value = (start & 0b00001111) as usize;
     let mut remainder = remainder;
     if (start & 0b00010000) != 0 {
         loop {
             let next;
-            (next, remainder) = debug_unwrap!(remainder.split_first(), return None);
+            (next, remainder) = debug_unwrap!(remainder.split_first(), break, "invalid varint");
             // Note: value << 7 could drop high bits. The first addition can't overflow.
             // The second addition could overflow; in such a case we just inform the
             // developer via the debug assertion.
@@ -85,7 +91,7 @@ pub const fn read_varint_meta3(start: u8, remainder: &[u8]) -> Option<(usize, &[
             }
         }
     }
-    Some((value, remainder))
+    (value, remainder)
 }
 
 /// Reads and removes a varint with 3 bits of metadata from a [`TrieBuilderStore`].
@@ -356,7 +362,7 @@ mod tests {
     #[test]
     fn test_read() {
         for cas in CASES {
-            let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]).unwrap();
+            let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]);
             assert_eq!(recovered, (cas.value, cas.remainder), "{:?}", cas);
         }
     }
@@ -377,7 +383,7 @@ mod tests {
                 "{:?}",
                 cas
             );
-            let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]).unwrap();
+            let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]);
             assert_eq!(recovered, (cas.value, cas.remainder), "{:?}", cas);
             let write_bytes = write_varint_meta2(cas.value);
             assert_eq!(
@@ -395,8 +401,7 @@ mod tests {
         while i < MAX_VARINT {
             let bytes = write_varint_meta2(i);
             let recovered = read_varint_meta2(bytes.as_slice()[0], &bytes.as_slice()[1..]);
-            assert!(recovered.is_some(), "{:?}", i);
-            assert_eq!(i, recovered.unwrap().0, "{:?}", bytes.as_slice());
+            assert_eq!(i, recovered.0, "{:?}", bytes.as_slice());
             i <<= 1;
             i += 1;
         }
@@ -408,8 +413,7 @@ mod tests {
         while i < MAX_VARINT {
             let bytes = write_varint_meta3(i);
             let recovered = read_varint_meta3(bytes.as_slice()[0], &bytes.as_slice()[1..]);
-            assert!(recovered.is_some(), "{:?}", i);
-            assert_eq!(i, recovered.unwrap().0, "{:?}", bytes.as_slice());
+            assert_eq!(i, recovered.0, "{:?}", bytes.as_slice());
             i <<= 1;
             i += 1;
         }
@@ -427,8 +431,7 @@ mod tests {
         let (recovered_value, remainder) = read_varint_meta2(
             *write_bytes.as_const_slice().first().unwrap(),
             subarray.as_slice(),
-        )
-        .unwrap();
+        );
         assert!(remainder.is_empty());
         assert_eq!(recovered_value, MAX_VARINT);
         assert_eq!(
@@ -453,7 +456,7 @@ mod tests {
         let write_bytes = write_varint_meta3(MAX_VARINT);
         assert_eq!(write_bytes.len(), MAX_VARINT_LENGTH);
         let (lead, trailing) = write_bytes.as_slice().split_first().unwrap();
-        let (recovered_value, remainder) = read_varint_meta3(*lead, trailing).unwrap();
+        let (recovered_value, remainder) = read_varint_meta3(*lead, trailing);
         assert!(remainder.is_empty());
         assert_eq!(recovered_value, MAX_VARINT);
         assert_eq!(
@@ -477,18 +480,18 @@ mod tests {
     fn test_latent_values() {
         // Same values documented in the module docs: M=2
         let m2 = read_varint_meta2;
-        assert_eq!(m2(0, &[]).unwrap().0, 0);
-        assert_eq!(m2(0x20, &[0x00]).unwrap().0, 32);
-        assert_eq!(m2(0x20, &[0x80, 0x00]).unwrap().0, 4128);
-        assert_eq!(m2(0x20, &[0x80, 0x80, 0x00]).unwrap().0, 528416);
-        assert_eq!(m2(0x20, &[0x80, 0x80, 0x80, 0x00]).unwrap().0, 67637280);
+        assert_eq!(m2(0, &[]).0, 0);
+        assert_eq!(m2(0x20, &[0x00]).0, 32);
+        assert_eq!(m2(0x20, &[0x80, 0x00]).0, 4128);
+        assert_eq!(m2(0x20, &[0x80, 0x80, 0x00]).0, 528416);
+        assert_eq!(m2(0x20, &[0x80, 0x80, 0x80, 0x00]).0, 67637280);
 
         // Same values documented in the module docs: M=3
         let m3 = read_varint_meta3;
-        assert_eq!(m3(0, &[]).unwrap().0, 0);
-        assert_eq!(m3(0x10, &[0x00]).unwrap().0, 16);
-        assert_eq!(m3(0x10, &[0x80, 0x00]).unwrap().0, 2064);
-        assert_eq!(m3(0x10, &[0x80, 0x80, 0x00]).unwrap().0, 264208);
-        assert_eq!(m3(0x10, &[0x80, 0x80, 0x80, 0x00]).unwrap().0, 33818640);
+        assert_eq!(m3(0, &[]).0, 0);
+        assert_eq!(m3(0x10, &[0x00]).0, 16);
+        assert_eq!(m3(0x10, &[0x80, 0x00]).0, 2064);
+        assert_eq!(m3(0x10, &[0x80, 0x80, 0x00]).0, 264208);
+        assert_eq!(m3(0x10, &[0x80, 0x80, 0x80, 0x00]).0, 33818640);
     }
 }

From 512ebc646af90906cc1d340f10d578b61f427196 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 20:09:05 -0700
Subject: [PATCH 03/21] Change debug_split_at to return a value in the GIGO
 case

---
 experimental/zerotrie/src/byte_phf/mod.rs |  8 +++----
 experimental/zerotrie/src/helpers.rs      | 11 +++++-----
 experimental/zerotrie/src/reader.rs       | 26 +++++++++++------------
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/experimental/zerotrie/src/byte_phf/mod.rs b/experimental/zerotrie/src/byte_phf/mod.rs
index cbbb01a175d..bdfad27b977 100644
--- a/experimental/zerotrie/src/byte_phf/mod.rs
+++ b/experimental/zerotrie/src/byte_phf/mod.rs
@@ -213,7 +213,7 @@ where
         if n == 0 {
             return None;
         }
-        let (qq, eks) = debug_split_at(buffer, n)?;
+        let (qq, eks) = debug_split_at(buffer, n);
         debug_assert_eq!(qq.len(), eks.len());
         let q = debug_get(qq, f1(key, *p, n))?;
         let l2 = f2(key, q, n);
@@ -232,9 +232,7 @@ where
     /// Get an iterator over the keys in the order in which they are stored in the map.
     pub fn keys(&self) -> &[u8] {
         let n = self.num_items();
-        debug_split_at(self.0.as_ref(), 1 + n)
-            .map(|s| s.1)
-            .unwrap_or(&[])
+        debug_split_at(self.0.as_ref(), 1 + n).1
     }
     /// Diagnostic function that returns `p` and the maximum value of `q`
     #[cfg(test)]
@@ -244,7 +242,7 @@ where
         if n == 0 {
             return None;
         }
-        let (qq, _) = debug_split_at(buffer, n)?;
+        let (qq, _) = debug_split_at(buffer, n);
         Some((*p, *qq.iter().max().unwrap()))
     }
     /// Returns the map as bytes. The map can be recovered with [`Self::from_store`]
diff --git a/experimental/zerotrie/src/helpers.rs b/experimental/zerotrie/src/helpers.rs
index 73393b18a41..b4bbb08815f 100644
--- a/experimental/zerotrie/src/helpers.rs
+++ b/experimental/zerotrie/src/helpers.rs
@@ -4,18 +4,17 @@
 
 use core::ops::Range;
 
-/// Like slice::split_at but returns an Option instead of panicking.
-///
-/// Debug-panics if `mid` is out of range.
+/// Like slice::split_at but debug-panics and returns an empty second slice
+/// if the index is out of range.
 #[inline]
-pub(crate) fn debug_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
+pub(crate) fn debug_split_at(slice: &[u8], mid: usize) -> (&[u8], &[u8]) {
     if mid > slice.len() {
         debug_assert!(false, "debug_split_at: index expected to be in range");
-        None
+        (slice, &[])
     } else {
         // Note: We're trusting the compiler to inline this and remove the assertion
         // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-        Some(slice.split_at(mid))
+        slice.split_at(mid)
     }
 }
 
diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index ad48f0fa480..56ec575ab50 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -224,7 +224,7 @@ fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> Option<&[u8]
     let mut q = 0usize;
     loop {
         let indices;
-        (indices, trie) = debug_split_at(trie, n - 1)?;
+        (indices, trie) = debug_split_at(trie, n - 1);
         p = (p << 8)
             + if i == 0 {
                 0
@@ -247,7 +247,7 @@ fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> Option<&[u8]
 #[inline]
 fn get_branch_w0(mut trie: &[u8], i: usize, n: usize) -> Option<&[u8]> {
     let indices;
-    (indices, trie) = debug_split_at(trie, n - 1)?;
+    (indices, trie) = debug_split_at(trie, n - 1);
     let p = if i == 0 {
         0
     } else {
@@ -331,7 +331,7 @@ pub fn get_bsearch_only(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x)?;
+                (trie_span, trie) = debug_split_at(trie, x);
                 (ascii_span, ascii) = maybe_split_at(ascii, x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
@@ -348,7 +348,7 @@ pub fn get_bsearch_only(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let w = w & 0x3;
             let x = if x == 0 { 256 } else { x };
             // Always use binary search
-            (search, trie) = debug_split_at(trie, x)?;
+            (search, trie) = debug_split_at(trie, x);
             i = search.binary_search(c).ok()?;
             trie = if w == 0 {
                 get_branch_w0(trie, i, x)
@@ -395,7 +395,7 @@ pub fn get_phf_limited(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x)?;
+                (trie_span, trie) = debug_split_at(trie, x);
                 (ascii_span, ascii) = maybe_split_at(ascii, x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
@@ -413,11 +413,11 @@ pub fn get_phf_limited(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let x = if x == 0 { 256 } else { x };
             if x < 16 {
                 // binary search
-                (search, trie) = debug_split_at(trie, x)?;
+                (search, trie) = debug_split_at(trie, x);
                 i = search.binary_search(c).ok()?;
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1)?;
+                (search, trie) = debug_split_at(trie, x * 2 + 1);
                 i = PerfectByteHashMap::from_store(search).get(*c)?;
             }
             trie = if w == 0 {
@@ -465,7 +465,7 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x)?;
+                (trie_span, trie) = debug_split_at(trie, x);
                 (ascii_span, ascii) = maybe_split_at(ascii, x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
@@ -480,11 +480,11 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let x = if x == 0 { 256 } else { x };
             if x < 16 {
                 // binary search
-                (search, trie) = debug_split_at(trie, x)?;
+                (search, trie) = debug_split_at(trie, x);
                 i = search.binary_search(c).ok()?;
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1)?;
+                (search, trie) = debug_split_at(trie, x * 2 + 1);
                 i = PerfectByteHashMap::from_store(search).get(*c)?;
             }
             trie = if w == 0 {
@@ -558,7 +558,7 @@ impl<'a> Iterator for ZeroTrieIterator<'a> {
                 NodeType::Branch => read_varint_meta2(*b, trie),
             };
             if matches!(byte_type, NodeType::Span) {
-                (span, trie) = debug_split_at(trie, x)?;
+                (span, trie) = debug_split_at(trie, x);
                 string.extend(span);
                 continue;
             }
@@ -578,11 +578,11 @@ impl<'a> Iterator for ZeroTrieIterator<'a> {
             }
             let byte = if x < 16 || !self.use_phf {
                 // binary search
-                (search, trie) = debug_split_at(trie, x)?;
+                (search, trie) = debug_split_at(trie, x);
                 debug_get(search, branch_idx)?
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1)?;
+                (search, trie) = debug_split_at(trie, x * 2 + 1);
                 debug_get(search, branch_idx + x + 1)?
             };
             string.push(byte);

From ed19daa9456ee1cdbeaf3a5d3284a9400f1d77f7 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 23:06:13 -0600
Subject: [PATCH 04/21] Refactor helpers to be trait-added functions

---
 experimental/zerotrie/src/byte_phf/mod.rs | 14 ++--
 experimental/zerotrie/src/helpers.rs      | 83 ++++++++++++-----------
 experimental/zerotrie/src/reader.rs       | 58 ++++++++--------
 3 files changed, 78 insertions(+), 77 deletions(-)

diff --git a/experimental/zerotrie/src/byte_phf/mod.rs b/experimental/zerotrie/src/byte_phf/mod.rs
index bdfad27b977..e1623641012 100644
--- a/experimental/zerotrie/src/byte_phf/mod.rs
+++ b/experimental/zerotrie/src/byte_phf/mod.rs
@@ -213,12 +213,12 @@ where
         if n == 0 {
             return None;
         }
-        let (qq, eks) = debug_split_at(buffer, n);
+        let (qq, eks) = buffer.debug_split_at(n);
         debug_assert_eq!(qq.len(), eks.len());
-        let q = debug_get(qq, f1(key, *p, n))?;
-        let l2 = f2(key, q, n);
-        let ek = debug_get(eks, l2)?;
-        if ek == key {
+        let q = debug_unwrap!(qq.get(f1(key, *p, n)), return None);
+        let l2 = f2(key, *q, n);
+        let ek = debug_unwrap!(eks.get(l2), return None);
+        if *ek == key {
             Some(l2)
         } else {
             None
@@ -232,7 +232,7 @@ where
     /// Get an iterator over the keys in the order in which they are stored in the map.
     pub fn keys(&self) -> &[u8] {
         let n = self.num_items();
-        debug_split_at(self.0.as_ref(), 1 + n).1
+        self.0.as_ref().debug_split_at(1 + n).1
     }
     /// Diagnostic function that returns `p` and the maximum value of `q`
     #[cfg(test)]
@@ -242,7 +242,7 @@ where
         if n == 0 {
             return None;
         }
-        let (qq, _) = debug_split_at(buffer, n);
+        let (qq, _) = buffer.debug_split_at(n);
         Some((*p, *qq.iter().max().unwrap()))
     }
     /// Returns the map as bytes. The map can be recovered with [`Self::from_store`]
diff --git a/experimental/zerotrie/src/helpers.rs b/experimental/zerotrie/src/helpers.rs
index b4bbb08815f..4bb3abe2ca1 100644
--- a/experimental/zerotrie/src/helpers.rs
+++ b/experimental/zerotrie/src/helpers.rs
@@ -2,54 +2,53 @@
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
-use core::ops::Range;
-
-/// Like slice::split_at but debug-panics and returns an empty second slice
-/// if the index is out of range.
-#[inline]
-pub(crate) fn debug_split_at(slice: &[u8], mid: usize) -> (&[u8], &[u8]) {
-    if mid > slice.len() {
-        debug_assert!(false, "debug_split_at: index expected to be in range");
-        (slice, &[])
-    } else {
-        // Note: We're trusting the compiler to inline this and remove the assertion
-        // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-        slice.split_at(mid)
-    }
+pub(crate) trait MaybeSplitAt<T> {
+    /// Like slice::split_at but returns an Option instead of panicking
+    /// if the index is out of range.
+    fn maybe_split_at(&self, mid: usize) -> Option<(&Self, &Self)>;
+    /// Like slice::split_at but debug-panics and returns an empty second slice
+    /// if the index is out of range.
+    fn debug_split_at(&self, mid: usize) -> (&Self, &Self);
 }
 
-/// Like slice::split_at but returns an Option instead of panicking.
-#[inline]
-pub(crate) fn maybe_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
-    if mid > slice.len() {
-        None
-    } else {
-        // Note: We're trusting the compiler to inline this and remove the assertion
-        // hiding on the top of slice::split_at: `assert(mid <= self.len())`
-        Some(slice.split_at(mid))
-    }
-}
-
-/// Gets the item at the specified index, panicking in debug mode if it is not there.
-#[inline]
-pub(crate) fn debug_get(slice: &[u8], index: usize) -> Option<u8> {
-    match slice.get(index) {
-        Some(x) => Some(*x),
-        None => {
-            debug_assert!(false, "debug_get: index expected to be in range");
+impl<T> MaybeSplitAt<T> for [T] {
+    #[inline]
+    fn maybe_split_at(&self, mid: usize) -> Option<(&Self, &Self)> {
+        if mid > self.len() {
             None
+        } else {
+            // Note: We're trusting the compiler to inline this and remove the assertion
+            // hiding on the top of slice::split_at: `assert(mid <= self.len())`
+            Some(self.split_at(mid))
+        }
+    }
+    #[inline]
+    fn debug_split_at(&self, mid: usize) -> (&Self, &Self) {
+        if mid > self.len() {
+            debug_assert!(false, "debug_split_at: index expected to be in range");
+            (self, &[])
+        } else {
+            // Note: We're trusting the compiler to inline this and remove the assertion
+            // hiding on the top of slice::split_at: `assert(mid <= self.len())`
+            self.split_at(mid)
         }
     }
 }
 
-/// Gets the range between the specified indices, panicking in debug mode if not in bounds.
-#[inline]
-pub(crate) fn debug_get_range(slice: &[u8], range: Range<usize>) -> Option<&[u8]> {
-    match slice.get(range) {
-        Some(x) => Some(x),
-        None => {
-            debug_assert!(false, "debug_get_range: indices expected to be in range");
-            None
+pub(crate) trait DebugUnwrapOr<T> {
+    /// Unwraps the option or panics in debug mode, returning the `gigo_value`
+    fn debug_unwrap_or(self, gigo_value: T) -> T;
+}
+
+impl<T> DebugUnwrapOr<T> for Option<T> {
+    #[inline]
+    fn debug_unwrap_or(self, gigo_value: T) -> T {
+        match self {
+            Some(x) => x,
+            None => {
+                debug_assert!(false, "debug_unwrap_or called on a None value");
+                gigo_value
+            }
         }
     }
 }
@@ -86,3 +85,5 @@ macro_rules! debug_unwrap {
         debug_unwrap!($expr, return ())
     };
 }
+
+pub(crate) use debug_unwrap;
diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index 56ec575ab50..afd447c5b40 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -219,17 +219,17 @@ use alloc::string::String;
 /// - `n` = the number of items in the offset table
 /// - `w` = the width of the offset table items minus one
 #[inline]
-fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> Option<&[u8]> {
+fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> &[u8] {
     let mut p = 0usize;
     let mut q = 0usize;
     loop {
         let indices;
-        (indices, trie) = debug_split_at(trie, n - 1);
+        (indices, trie) = trie.debug_split_at(n - 1);
         p = (p << 8)
             + if i == 0 {
                 0
             } else {
-                debug_get(indices, i - 1)? as usize
+                *indices.get(i - 1).debug_unwrap_or(&0) as usize
             };
         q = match indices.get(i) {
             Some(x) => (q << 8) + *x as usize,
@@ -240,24 +240,24 @@ fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> Option<&[u8]
         }
         w -= 1;
     }
-    debug_get_range(trie, p..q)
+    trie.get(p..q).debug_unwrap_or(&[])
 }
 
 /// Version of [`get_branch()`] specialized for the case `w == 0` for performance
 #[inline]
-fn get_branch_w0(mut trie: &[u8], i: usize, n: usize) -> Option<&[u8]> {
+fn get_branch_w0(mut trie: &[u8], i: usize, n: usize) -> &[u8] {
     let indices;
-    (indices, trie) = debug_split_at(trie, n - 1);
+    (indices, trie) = trie.debug_split_at(n - 1);
     let p = if i == 0 {
         0
     } else {
-        debug_get(indices, i - 1)? as usize
+        *indices.get(i - 1).debug_unwrap_or(&0) as usize
     };
     let q = match indices.get(i) {
         Some(x) => *x as usize,
         None => trie.len(),
     };
-    debug_get_range(trie, p..q)
+    trie.get(p..q).debug_unwrap_or(&[])
 }
 
 /// The node type. See the module-level docs for more explanation of the four node types.
@@ -331,8 +331,8 @@ pub fn get_bsearch_only(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x);
-                (ascii_span, ascii) = maybe_split_at(ascii, x)?;
+                (trie_span, trie) = trie.debug_split_at(x);
+                (ascii_span, ascii) = ascii.maybe_split_at(x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
                     continue;
@@ -348,13 +348,13 @@ pub fn get_bsearch_only(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let w = w & 0x3;
             let x = if x == 0 { 256 } else { x };
             // Always use binary search
-            (search, trie) = debug_split_at(trie, x);
+            (search, trie) = trie.debug_split_at(x);
             i = search.binary_search(c).ok()?;
             trie = if w == 0 {
                 get_branch_w0(trie, i, x)
             } else {
                 get_branch(trie, i, x, w)
-            }?;
+            };
             ascii = temp;
             continue;
         } else {
@@ -395,8 +395,8 @@ pub fn get_phf_limited(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x);
-                (ascii_span, ascii) = maybe_split_at(ascii, x)?;
+                (trie_span, trie) = trie.debug_split_at(x);
+                (ascii_span, ascii) = ascii.maybe_split_at(x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
                     continue;
@@ -413,18 +413,18 @@ pub fn get_phf_limited(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let x = if x == 0 { 256 } else { x };
             if x < 16 {
                 // binary search
-                (search, trie) = debug_split_at(trie, x);
+                (search, trie) = trie.debug_split_at(x);
                 i = search.binary_search(c).ok()?;
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1);
+                (search, trie) = trie.debug_split_at(x * 2 + 1);
                 i = PerfectByteHashMap::from_store(search).get(*c)?;
             }
             trie = if w == 0 {
                 get_branch_w0(trie, i, x)
             } else {
                 get_branch(trie, i, x, w)
-            }?;
+            };
             ascii = temp;
             continue;
         } else {
@@ -465,8 +465,8 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             }
             if matches!(byte_type, NodeType::Span) {
                 let (trie_span, ascii_span);
-                (trie_span, trie) = debug_split_at(trie, x);
-                (ascii_span, ascii) = maybe_split_at(ascii, x)?;
+                (trie_span, trie) = trie.debug_split_at(x);
+                (ascii_span, ascii) = ascii.maybe_split_at(x)?;
                 if trie_span == ascii_span {
                     // Matched a byte span
                     continue;
@@ -480,18 +480,18 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
             let x = if x == 0 { 256 } else { x };
             if x < 16 {
                 // binary search
-                (search, trie) = debug_split_at(trie, x);
+                (search, trie) = trie.debug_split_at(x);
                 i = search.binary_search(c).ok()?;
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1);
+                (search, trie) = trie.debug_split_at(x * 2 + 1);
                 i = PerfectByteHashMap::from_store(search).get(*c)?;
             }
             trie = if w == 0 {
                 get_branch_w0(trie, i, x)
             } else {
                 get_branch(trie, i, x, w)
-            }?;
+            };
             ascii = temp;
             continue;
         } else {
@@ -558,7 +558,7 @@ impl<'a> Iterator for ZeroTrieIterator<'a> {
                 NodeType::Branch => read_varint_meta2(*b, trie),
             };
             if matches!(byte_type, NodeType::Span) {
-                (span, trie) = debug_split_at(trie, x);
+                (span, trie) = trie.debug_split_at(x);
                 string.extend(span);
                 continue;
             }
@@ -578,19 +578,19 @@ impl<'a> Iterator for ZeroTrieIterator<'a> {
             }
             let byte = if x < 16 || !self.use_phf {
                 // binary search
-                (search, trie) = debug_split_at(trie, x);
-                debug_get(search, branch_idx)?
+                (search, trie) = trie.debug_split_at(x);
+                debug_unwrap!(search.get(branch_idx), return None)
             } else {
                 // phf
-                (search, trie) = debug_split_at(trie, x * 2 + 1);
-                debug_get(search, branch_idx + x + 1)?
+                (search, trie) = trie.debug_split_at(x * 2 + 1);
+                debug_unwrap!(search.get(branch_idx + x + 1), return None)
             };
-            string.push(byte);
+            string.push(*byte);
             trie = if w == 0 {
                 get_branch_w0(trie, branch_idx, x)
             } else {
                 get_branch(trie, branch_idx, x, w)
-            }?;
+            };
             branch_idx = 0;
         }
     }

From 3743661150a9cb80ab37431f0c82b83b076b9774 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 19:34:35 -0700
Subject: [PATCH 05/21] Add as_borrowed_slice and AsRef impl. Need to bikeshed
 the name.

---
 experimental/zerotrie/src/zerotrie.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index f319ed9324a..deedcb81443 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -231,6 +231,20 @@ macro_rules! impl_zerotrie_subtype {
             pub fn as_borrowed(&self) -> &$name<[u8]> {
                 $name::from_bytes(self.store.as_ref())
             }
+            /// Returns a trie with a store borrowing from this trie.
+            #[inline]
+            pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
+                $name::from_store(self.store.as_ref())
+            }
+        }
+        impl<Store> AsRef<$name<[u8]>> for $name<Store>
+        where
+        Store: AsRef<[u8]> + ?Sized,
+        {
+            #[inline]
+            fn as_ref(&self) -> &$name<[u8]> {
+                self.as_borrowed()
+            }
         }
         #[cfg(feature = "alloc")]
         impl<Store> $name<Store>

From 46316a09e499f7c06ffbf8121c6c34b492ba00f6 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 23:14:42 -0600
Subject: [PATCH 06/21] Initial implementation of ZeroTrie step function

---
 experimental/zerotrie/src/reader.rs   | 71 ++++++++++++++++++++++++
 experimental/zerotrie/src/zerotrie.rs | 78 +++++++++++++++++++++++++++
 2 files changed, 149 insertions(+)

diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index afd447c5b40..94bf1b2c190 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -504,6 +504,77 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
     }
 }
 
+pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8)  {
+    let (mut b, x, search);
+    loop {
+        (b, *trie) = match trie.split_first() {
+            Some(v) => v,
+            None => {
+                // Empty trie or only a value node
+                return
+            }
+        };
+        match byte_type(*b) {
+            NodeType::Ascii if *b == c => {
+                // Matched a byte
+                return;
+            }
+            NodeType::Ascii => {
+                // Byte that doesn't match
+                *trie = &[];
+                return;
+            }
+            NodeType::Branch => {
+                // Proceed to the branch node logic below
+                (x, *trie) = read_varint_meta2(*b, trie);
+                break;
+            }
+            NodeType::Span => {
+                // Question: Should we put the trie back into a valid state?
+                // Currently this code is unreachable so let's not worry about it.
+                debug_assert!(false, "span nodes not supported in stepping");
+                return;
+            }
+            NodeType::Value => {
+                // Skip the value node and go to the next node
+                (_, *trie) = read_varint_meta3(*b, trie);
+                continue;
+            }
+        };
+    }
+    // Branch node
+    let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
+    // See comment above regarding this assertion
+    debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
+    let w = w & 0x3;
+    let x = if x == 0 { 256 } else { x };
+    // Always use binary search
+    (search, *trie) = trie.debug_split_at(x);
+    match search.binary_search(&c) {
+        Ok(i) => {
+            // Matched a byte
+            *trie = if w == 0 {
+                get_branch_w0(trie, i, x)
+            } else {
+                get_branch(trie, i, x, w)
+            };
+        },
+        Err(_) => {
+            // Byte that doesn't match
+            *trie = &[]
+        }
+    };
+}
+
+pub(crate) fn peek_value(mut trie: &[u8]) -> Option<usize>  {
+    let b;
+    (b, trie) = trie.split_first()?;
+    match byte_type(*b) {
+        NodeType::Ascii | NodeType::Span | NodeType::Branch => None,
+        NodeType::Value => Some(read_varint_meta3(*b, trie).0),
+    }
+}
+
 #[cfg(feature = "alloc")]
 use alloc::vec::Vec;
 
diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index deedcb81443..90cf22c2c87 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -204,6 +204,24 @@ macro_rules! impl_zerotrie_subtype {
             pub fn is_empty(&self) -> bool {
                 self.store.as_ref().is_empty()
             }
+            /// Gets the value at the head of the trie. This is equivalent to
+            /// calling `get` with the empty string.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+            ///
+            /// // A trie with two values: "" and "abc"
+            #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"\\x80abc\\x81\");")]
+            ///
+            /// assert_eq!(Some(0), trie.head_value());
+            /// assert_eq!(Some(0), trie.get(""));
+            /// ```
+            #[inline]
+            pub fn head_value(&self) -> Option<usize> {
+                peek_value(self.store.as_ref())
+            }
             /// Returns the size of the trie in number of bytes.
             ///
             /// To get the number of keys in the trie, use `.iter().count()`:
@@ -565,6 +583,66 @@ impl_zerotrie_subtype!(
     Vec::into_boxed_slice
 );
 
+impl ZeroTrieSimpleAscii<&[u8]> {
+    /// Steps one node into the trie, mutating self.
+    ///
+    /// Useful to query a trie with data that is not a slice. Use
+    /// [`Self::head_value()`] to check for the presence of a string
+    /// in the trie.
+    ///
+    /// This is only supported on `ZeroTrieSimpleAscii` because other trie
+    /// types may contain span nodes, which cannot be split.
+    ///
+    /// # Examples
+    ///
+    /// Get a value out of a trie by manually iterating over the bytes:
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Get out the value for "abc"
+    /// let mut it = trie.as_borrowed_slice();
+    /// for c in b"abc".iter() {
+    ///     it.step(*c);
+    /// }
+    /// assert_eq!(it.head_value(), Some(0));
+    /// ```
+    ///
+    /// Unrolled loop checking for string presence at every step:
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Search the trie for the string "abcdxy"
+    /// let mut it = trie.as_borrowed_slice();
+    /// assert_eq!(it.head_value(), None); // ""
+    /// it.step(b'a');
+    /// assert_eq!(it.head_value(), None); // "a"
+    /// it.step(b'b');
+    /// assert_eq!(it.head_value(), None); // "ab"
+    /// it.step(b'c');
+    /// assert_eq!(it.head_value(), Some(0)); // "abc"
+    /// it.step(b'd');
+    /// assert_eq!(it.head_value(), None); // "abcd"
+    /// assert!(!it.is_empty());
+    /// it.step(b'x'); // no strings have the prefix "abcdx"
+    /// assert!(it.is_empty());
+    /// assert_eq!(it.head_value(), None); // "abcdx"
+    /// it.step(b'y');
+    /// assert_eq!(it.head_value(), None); // "abcdxy"
+    /// ```
+    #[inline]
+    pub fn step(&mut self, byte: u8) {
+        step_bsearch_only(&mut self.store, byte)
+    }
+}
+
 macro_rules! impl_dispatch {
     ($self:ident, $inner_fn:ident()) => {
         match $self.0 {

From 2d8d8f9cced0f04b68431ad0df4a4347be2ba2cd Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Wed, 29 Nov 2023 23:49:39 -0600
Subject: [PATCH 07/21] Use ZeroTrie stepping in BlobSchemaV2 to avoid
 allocations

---
 provider/blob/src/blob_schema.rs | 37 ++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index 4475b2e3236..0bee8c6c3c3 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -3,6 +3,7 @@
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
 use alloc::boxed::Box;
+use core::fmt;
 use icu_provider::prelude::*;
 use serde::Deserialize;
 use writeable::Writeable;
@@ -149,6 +150,28 @@ impl Default for BlobSchemaV2<'_> {
     }
 }
 
+/// A struct that steps through a ZeroTrie when fed data from fmt::Write
+struct ZeroTrieStepWrite<'a> {
+    trie: ZeroTrieSimpleAscii<&'a [u8]>,
+}
+
+impl<'a> fmt::Write for ZeroTrieStepWrite<'a> {
+    fn write_str(&mut self, s: &str) -> fmt::Result {
+        for b in s.bytes() {
+            self.trie.step(b);
+        }
+        Ok(())
+    }
+    fn write_char(&mut self, c: char) -> fmt::Result {
+        debug_assert!(c.is_ascii());
+        self.trie.step(c as u8);
+        Ok(())
+    }
+    fn write_fmt(&mut self, _: fmt::Arguments<'_>) -> fmt::Result {
+        unreachable!()
+    }
+}
+
 impl<'data> BlobSchemaV2<'data> {
     pub fn load(&self, key: DataKey, req: DataRequest) -> Result<&'data [u8], DataError> {
         let key_index = self
@@ -163,10 +186,16 @@ impl<'data> BlobSchemaV2<'data> {
             .locales
             .get(key_index)
             .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
-        // TODO(#4249): Add a lookup function to zerotrie so we don't need to stringify
-        let locale_str = req.locale.write_to_string();
-        let blob_index = ZeroTrieSimpleAscii::from_store(zerotrie)
-            .get(locale_str.as_bytes())
+        let mut trie_write = ZeroTrieStepWrite {
+            trie: ZeroTrieSimpleAscii::from_store(zerotrie),
+        };
+        #[allow(clippy::unwrap_used)] // infallible impl
+        req.locale
+            .write_to(&mut trie_write)
+            .unwrap();
+        let blob_index = trie_write
+            .trie
+            .head_value()
             .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;
         let buffer = self
             .buffers

From f9a614e7079ac3db2707ea471b9bab92c06995ef Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 00:56:07 -0600
Subject: [PATCH 08/21] Add APIs for ZeroTrieSimpleAsciiCursor and use them in
 BlobSchemaV2

---
 experimental/zerotrie/src/lib.rs      |  1 +
 experimental/zerotrie/src/zerotrie.rs | 93 +++++++++++++++++++++++----
 provider/blob/src/blob_schema.rs      | 14 ++--
 3 files changed, 87 insertions(+), 21 deletions(-)

diff --git a/experimental/zerotrie/src/lib.rs b/experimental/zerotrie/src/lib.rs
index 817093d2f36..61fca0555b9 100644
--- a/experimental/zerotrie/src/lib.rs
+++ b/experimental/zerotrie/src/lib.rs
@@ -69,6 +69,7 @@ pub use crate::zerotrie::ZeroTrie;
 pub use crate::zerotrie::ZeroTrieExtendedCapacity;
 pub use crate::zerotrie::ZeroTriePerfectHash;
 pub use crate::zerotrie::ZeroTrieSimpleAscii;
+pub use crate::zerotrie::ZeroTrieSimpleAsciiCursor;
 pub use error::Error as ZeroTrieError;
 
 #[doc(hidden)]
diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 90cf22c2c87..9e37a3dbfd7 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -583,15 +583,17 @@ impl_zerotrie_subtype!(
     Vec::into_boxed_slice
 );
 
-impl ZeroTrieSimpleAscii<&[u8]> {
+impl<Store> ZeroTrieSimpleAscii<Store>
+where
+    Store: AsRef<[u8]> + ?Sized,
+{
     /// Steps one node into the trie, mutating self.
     ///
     /// Useful to query a trie with data that is not a slice. Use
     /// [`Self::head_value()`] to check for the presence of a string
     /// in the trie.
     ///
-    /// This is only supported on `ZeroTrieSimpleAscii` because other trie
-    /// types may contain span nodes, which cannot be split.
+    /// This is currently supported only on `ZeroTrieSimpleAscii`.
     ///
     /// # Examples
     ///
@@ -604,11 +606,16 @@ impl ZeroTrieSimpleAscii<&[u8]> {
     /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
     ///
     /// // Get out the value for "abc"
-    /// let mut it = trie.as_borrowed_slice();
+    /// let mut it = trie.cursor();
     /// for c in b"abc".iter() {
+    ///     // Checking is_empty() is not required, but it is
+    ///     // good for efficiency
+    ///     if it.is_empty() {
+    ///         break;
+    ///     }
     ///     it.step(*c);
     /// }
-    /// assert_eq!(it.head_value(), Some(0));
+    /// assert_eq!(it.value(), Some(0));
     /// ```
     ///
     /// Unrolled loop checking for string presence at every step:
@@ -620,26 +627,84 @@ impl ZeroTrieSimpleAscii<&[u8]> {
     /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
     ///
     /// // Search the trie for the string "abcdxy"
-    /// let mut it = trie.as_borrowed_slice();
-    /// assert_eq!(it.head_value(), None); // ""
+    /// let mut it = trie.cursor();
+    /// assert_eq!(it.value(), None); // ""
     /// it.step(b'a');
-    /// assert_eq!(it.head_value(), None); // "a"
+    /// assert_eq!(it.value(), None); // "a"
     /// it.step(b'b');
-    /// assert_eq!(it.head_value(), None); // "ab"
+    /// assert_eq!(it.value(), None); // "ab"
     /// it.step(b'c');
-    /// assert_eq!(it.head_value(), Some(0)); // "abc"
+    /// assert_eq!(it.value(), Some(0)); // "abc"
     /// it.step(b'd');
-    /// assert_eq!(it.head_value(), None); // "abcd"
+    /// assert_eq!(it.value(), None); // "abcd"
     /// assert!(!it.is_empty());
     /// it.step(b'x'); // no strings have the prefix "abcdx"
     /// assert!(it.is_empty());
-    /// assert_eq!(it.head_value(), None); // "abcdx"
+    /// assert_eq!(it.value(), None); // "abcdx"
     /// it.step(b'y');
-    /// assert_eq!(it.head_value(), None); // "abcdxy"
+    /// assert_eq!(it.value(), None); // "abcdxy"
     /// ```
     #[inline]
+    pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
+        ZeroTrieSimpleAsciiCursor {
+            trie: self.as_borrowed_slice(),
+        }
+    }
+}
+
+impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
+    /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
+    /// having to doubly anchor the trie to the stack.
+    #[inline]
+    pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
+        ZeroTrieSimpleAsciiCursor {
+            trie: self
+        }
+    }
+}
+
+/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
+///
+/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
+#[derive(Debug)]
+pub struct ZeroTrieSimpleAsciiCursor<'a> {
+    trie: ZeroTrieSimpleAscii<&'a [u8]>,
+}
+
+impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
+    /// Steps the cursor one byte into the trie.
+    #[inline]
     pub fn step(&mut self, byte: u8) {
-        step_bsearch_only(&mut self.store, byte)
+        step_bsearch_only(&mut self.trie.store, byte)
+    }
+
+    /// Gets the value at the current position in the trie.
+    ///
+    /// Calling this function on a new cursor is equivalent to calling `.get()`
+    /// with the empty string.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "" and "abc"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
+    ///
+    /// assert_eq!(Some(0), trie.get(""));
+    /// assert_eq!(Some(0), trie.cursor().value());
+    /// ```
+    #[inline]
+    pub fn value(&self) -> Option<usize> {
+        peek_value(self.trie.store.as_ref())
+    }
+
+    /// Checks whether the cursor points to an empty trie.
+    ///
+    /// Use this to determine when to stop iterating.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.trie.is_empty()
     }
 }
 
diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index 0bee8c6c3c3..431e1a2b7de 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -7,7 +7,7 @@ use core::fmt;
 use icu_provider::prelude::*;
 use serde::Deserialize;
 use writeable::Writeable;
-use zerotrie::ZeroTrieSimpleAscii;
+use zerotrie::{ZeroTrieSimpleAscii, ZeroTrieSimpleAsciiCursor};
 use zerovec::maps::{ZeroMap2dBorrowed, ZeroMapKV};
 use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec, ZeroSlice};
 
@@ -152,19 +152,19 @@ impl Default for BlobSchemaV2<'_> {
 
 /// A struct that steps through a ZeroTrie when fed data from fmt::Write
 struct ZeroTrieStepWrite<'a> {
-    trie: ZeroTrieSimpleAscii<&'a [u8]>,
+    cursor: ZeroTrieSimpleAsciiCursor<'a>,
 }
 
 impl<'a> fmt::Write for ZeroTrieStepWrite<'a> {
     fn write_str(&mut self, s: &str) -> fmt::Result {
         for b in s.bytes() {
-            self.trie.step(b);
+            self.cursor.step(b);
         }
         Ok(())
     }
     fn write_char(&mut self, c: char) -> fmt::Result {
         debug_assert!(c.is_ascii());
-        self.trie.step(c as u8);
+        self.cursor.step(c as u8);
         Ok(())
     }
     fn write_fmt(&mut self, _: fmt::Arguments<'_>) -> fmt::Result {
@@ -187,15 +187,15 @@ impl<'data> BlobSchemaV2<'data> {
             .get(key_index)
             .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
         let mut trie_write = ZeroTrieStepWrite {
-            trie: ZeroTrieSimpleAscii::from_store(zerotrie),
+            cursor: ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor(),
         };
         #[allow(clippy::unwrap_used)] // infallible impl
         req.locale
             .write_to(&mut trie_write)
             .unwrap();
         let blob_index = trie_write
-            .trie
-            .head_value()
+            .cursor
+            .value()
             .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;
         let buffer = self
             .buffers

From 929f81fcf0cc4d20024a852cd86d83bcb8525fad Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 00:57:10 -0600
Subject: [PATCH 09/21] Move around examples and cargo fmt

---
 experimental/zerotrie/src/reader.rs   |  8 +--
 experimental/zerotrie/src/zerotrie.rs | 80 ++++++++++++++++++---------
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index 94bf1b2c190..c27592f7816 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -504,14 +504,14 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
     }
 }
 
-pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8)  {
+pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8) {
     let (mut b, x, search);
     loop {
         (b, *trie) = match trie.split_first() {
             Some(v) => v,
             None => {
                 // Empty trie or only a value node
-                return
+                return;
             }
         };
         match byte_type(*b) {
@@ -558,7 +558,7 @@ pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8)  {
             } else {
                 get_branch(trie, i, x, w)
             };
-        },
+        }
         Err(_) => {
             // Byte that doesn't match
             *trie = &[]
@@ -566,7 +566,7 @@ pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8)  {
     };
 }
 
-pub(crate) fn peek_value(mut trie: &[u8]) -> Option<usize>  {
+pub(crate) fn peek_value(mut trie: &[u8]) -> Option<usize> {
     let b;
     (b, trie) = trie.split_first()?;
     match byte_type(*b) {
diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 9e37a3dbfd7..00cd71de1e8 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -587,11 +587,9 @@ impl<Store> ZeroTrieSimpleAscii<Store>
 where
     Store: AsRef<[u8]> + ?Sized,
 {
-    /// Steps one node into the trie, mutating self.
+    /// Gets a cursor into the current trie.
     ///
-    /// Useful to query a trie with data that is not a slice. Use
-    /// [`Self::head_value()`] to check for the presence of a string
-    /// in the trie.
+    /// Useful to query a trie with data that is not a slice.
     ///
     /// This is currently supported only on `ZeroTrieSimpleAscii`.
     ///
@@ -606,19 +604,19 @@ where
     /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
     ///
     /// // Get out the value for "abc"
-    /// let mut it = trie.cursor();
-    /// for c in b"abc".iter() {
+    /// let mut cursor = trie.cursor();
+    /// for b in b"abc".iter() {
     ///     // Checking is_empty() is not required, but it is
     ///     // good for efficiency
-    ///     if it.is_empty() {
+    ///     if cursor.is_empty() {
     ///         break;
     ///     }
-    ///     it.step(*c);
+    ///     cursor.step(*b);
     /// }
-    /// assert_eq!(it.value(), Some(0));
+    /// assert_eq!(cursor.value(), Some(0));
     /// ```
     ///
-    /// Unrolled loop checking for string presence at every step:
+    /// Find the longest prefix match:
     ///
     /// ```
     /// use zerotrie::ZeroTrieSimpleAscii;
@@ -626,23 +624,22 @@ where
     /// // A trie with two values: "abc" and "abcdef"
     /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
     ///
-    /// // Search the trie for the string "abcdxy"
-    /// let mut it = trie.cursor();
-    /// assert_eq!(it.value(), None); // ""
-    /// it.step(b'a');
-    /// assert_eq!(it.value(), None); // "a"
-    /// it.step(b'b');
-    /// assert_eq!(it.value(), None); // "ab"
-    /// it.step(b'c');
-    /// assert_eq!(it.value(), Some(0)); // "abc"
-    /// it.step(b'd');
-    /// assert_eq!(it.value(), None); // "abcd"
-    /// assert!(!it.is_empty());
-    /// it.step(b'x'); // no strings have the prefix "abcdx"
-    /// assert!(it.is_empty());
-    /// assert_eq!(it.value(), None); // "abcdx"
-    /// it.step(b'y');
-    /// assert_eq!(it.value(), None); // "abcdxy"
+    /// // Find the longest prefix of the string "abcdxy":
+    /// let query = b"abcdxy";
+    /// let mut longest_prefix = 0;
+    /// let mut cursor = trie.cursor();
+    /// for (i, b) in query.iter().enumerate() {
+    ///     if cursor.is_empty() {
+    ///         break;
+    ///     }
+    ///     if cursor.value().is_some() {
+    ///         longest_prefix = i;
+    ///     }
+    ///     cursor.step(*b);
+    /// }
+    ///
+    /// // The longest prefix is "abc" which is length 3:
+    /// assert_eq!(longest_prefix, 3);
     /// ```
     #[inline]
     pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
@@ -673,6 +670,35 @@ pub struct ZeroTrieSimpleAsciiCursor<'a> {
 
 impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     /// Steps the cursor one byte into the trie.
+    ///
+    /// # Examples
+    ///
+    /// Unrolled loop checking for string presence at every step:
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Search the trie for the string "abcdxy"
+    /// let mut cursor = trie.cursor();
+    /// assert_eq!(cursor.value(), None); // ""
+    /// cursor.step(b'a');
+    /// assert_eq!(cursor.value(), None); // "a"
+    /// cursor.step(b'b');
+    /// assert_eq!(cursor.value(), None); // "ab"
+    /// cursor.step(b'c');
+    /// assert_eq!(cursor.value(), Some(0)); // "abc"
+    /// cursor.step(b'd');
+    /// assert_eq!(cursor.value(), None); // "abcd"
+    /// assert!(!cursor.is_empty());
+    /// cursor.step(b'x'); // no strings have the prefix "abcdx"
+    /// assert!(cursor.is_empty());
+    /// assert_eq!(cursor.value(), None); // "abcdx"
+    /// cursor.step(b'y');
+    /// assert_eq!(cursor.value(), None); // "abcdxy"
+    /// ```
     #[inline]
     pub fn step(&mut self, byte: u8) {
         step_bsearch_only(&mut self.trie.store, byte)

From 79fc8907fd5c132c10279e04ce364d27c2bdc52a Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:02:24 -0600
Subject: [PATCH 10/21] Return core::fmt::Error instead of asserting ascii

---
 provider/blob/src/blob_schema.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index 431e1a2b7de..66b1dda3ac7 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -158,12 +158,17 @@ struct ZeroTrieStepWrite<'a> {
 impl<'a> fmt::Write for ZeroTrieStepWrite<'a> {
     fn write_str(&mut self, s: &str) -> fmt::Result {
         for b in s.bytes() {
+            if !b.is_ascii() {
+                return Err(fmt::Error);
+            }
             self.cursor.step(b);
         }
         Ok(())
     }
     fn write_char(&mut self, c: char) -> fmt::Result {
-        debug_assert!(c.is_ascii());
+        if !c.is_ascii() {
+            return Err(fmt::Error);
+        }
         self.cursor.step(c as u8);
         Ok(())
     }

From 3a8efa16cce85e0fd30bd27aeae8589f1181f065 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:05:54 -0600
Subject: [PATCH 11/21] Move the core::fmt::Write impl into the zerotrie crate

---
 experimental/zerotrie/src/zerotrie.rs | 23 ++++++++++++++++
 provider/blob/src/blob_schema.rs      | 39 +++------------------------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 00cd71de1e8..8416c142f3e 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -4,6 +4,7 @@
 
 use crate::reader::*;
 
+use core::fmt;
 use core::borrow::Borrow;
 
 #[cfg(feature = "alloc")]
@@ -734,6 +735,28 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     }
 }
 
+impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
+    fn write_str(&mut self, s: &str) -> fmt::Result {
+        for b in s.bytes() {
+            if !b.is_ascii() {
+                return Err(fmt::Error);
+            }
+            self.step(b);
+        }
+        Ok(())
+    }
+    fn write_char(&mut self, c: char) -> fmt::Result {
+        if !c.is_ascii() {
+            return Err(fmt::Error);
+        }
+        self.step(c as u8);
+        Ok(())
+    }
+    fn write_fmt(&mut self, _: fmt::Arguments<'_>) -> fmt::Result {
+        unreachable!()
+    }
+}
+
 macro_rules! impl_dispatch {
     ($self:ident, $inner_fn:ident()) => {
         match $self.0 {
diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index 66b1dda3ac7..7c385679337 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -3,11 +3,10 @@
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
 use alloc::boxed::Box;
-use core::fmt;
 use icu_provider::prelude::*;
 use serde::Deserialize;
 use writeable::Writeable;
-use zerotrie::{ZeroTrieSimpleAscii, ZeroTrieSimpleAsciiCursor};
+use zerotrie::ZeroTrieSimpleAscii;
 use zerovec::maps::{ZeroMap2dBorrowed, ZeroMapKV};
 use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec, ZeroSlice};
 
@@ -150,33 +149,6 @@ impl Default for BlobSchemaV2<'_> {
     }
 }
 
-/// A struct that steps through a ZeroTrie when fed data from fmt::Write
-struct ZeroTrieStepWrite<'a> {
-    cursor: ZeroTrieSimpleAsciiCursor<'a>,
-}
-
-impl<'a> fmt::Write for ZeroTrieStepWrite<'a> {
-    fn write_str(&mut self, s: &str) -> fmt::Result {
-        for b in s.bytes() {
-            if !b.is_ascii() {
-                return Err(fmt::Error);
-            }
-            self.cursor.step(b);
-        }
-        Ok(())
-    }
-    fn write_char(&mut self, c: char) -> fmt::Result {
-        if !c.is_ascii() {
-            return Err(fmt::Error);
-        }
-        self.cursor.step(c as u8);
-        Ok(())
-    }
-    fn write_fmt(&mut self, _: fmt::Arguments<'_>) -> fmt::Result {
-        unreachable!()
-    }
-}
-
 impl<'data> BlobSchemaV2<'data> {
     pub fn load(&self, key: DataKey, req: DataRequest) -> Result<&'data [u8], DataError> {
         let key_index = self
@@ -191,15 +163,12 @@ impl<'data> BlobSchemaV2<'data> {
             .locales
             .get(key_index)
             .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
-        let mut trie_write = ZeroTrieStepWrite {
-            cursor: ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor(),
-        };
+        let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
         #[allow(clippy::unwrap_used)] // infallible impl
         req.locale
-            .write_to(&mut trie_write)
+            .write_to(&mut cursor)
             .unwrap();
-        let blob_index = trie_write
-            .cursor
+        let blob_index = cursor
             .value()
             .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;
         let buffer = self

From 838ad05f6b9d8a4d25782f9226ab893f6d5a6f94 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:06:16 -0600
Subject: [PATCH 12/21] Don't assert unreachable anymore

---
 experimental/zerotrie/src/zerotrie.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 8416c142f3e..0a6d2486476 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -752,9 +752,6 @@ impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
         self.step(c as u8);
         Ok(())
     }
-    fn write_fmt(&mut self, _: fmt::Arguments<'_>) -> fmt::Result {
-        unreachable!()
-    }
 }
 
 macro_rules! impl_dispatch {

From 4e706186392868bd0901a5f1e3398d7db8139474 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:18:02 -0600
Subject: [PATCH 13/21] Docs for the new core::fmt::Write impl

---
 experimental/zerotrie/src/zerotrie.rs | 48 ++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 0a6d2486476..5608927c8c1 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -596,9 +596,10 @@ where
     ///
     /// # Examples
     ///
-    /// Get a value out of a trie by manually iterating over the bytes:
+    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
     ///
     /// ```
+    /// use core::fmt::Write;
     /// use zerotrie::ZeroTrieSimpleAscii;
     ///
     /// // A trie with two values: "abc" and "abcdef"
@@ -606,14 +607,7 @@ where
     ///
     /// // Get out the value for "abc"
     /// let mut cursor = trie.cursor();
-    /// for b in b"abc".iter() {
-    ///     // Checking is_empty() is not required, but it is
-    ///     // good for efficiency
-    ///     if cursor.is_empty() {
-    ///         break;
-    ///     }
-    ///     cursor.step(*b);
-    /// }
+    /// write!(&mut cursor, "abc");
     /// assert_eq!(cursor.value(), Some(0));
     /// ```
     ///
@@ -630,6 +624,8 @@ where
     /// let mut longest_prefix = 0;
     /// let mut cursor = trie.cursor();
     /// for (i, b) in query.iter().enumerate() {
+    ///     // Checking is_empty() is not required, but it is
+    ///     // good for efficiency
     ///     if cursor.is_empty() {
     ///         break;
     ///     }
@@ -736,6 +732,23 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
 }
 
 impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
+    /// Steps the cursor through each ASCII byte of the string.
+    ///
+    /// If the string contains non-ASCII chars, an error is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use core::fmt::Write;
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// let mut cursor = trie.cursor();
+    /// cursor.write_str("abcdxy").expect("all ASCII");
+    /// cursor.write_str("🚂").expect_err("non-ASCII");
+    /// ```
     fn write_str(&mut self, s: &str) -> fmt::Result {
         for b in s.bytes() {
             if !b.is_ascii() {
@@ -745,6 +758,23 @@ impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
         }
         Ok(())
     }
+    /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
+    /// an error if the char is non-ASCII.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use core::fmt::Write;
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// let mut cursor = trie.cursor();
+    /// cursor.write_char('a').expect("ASCII");
+    /// cursor.write_char('x').expect("ASCII");
+    /// cursor.write_char('🚂').expect_err("non-ASCII");
+    /// ```
     fn write_char(&mut self, c: char) -> fmt::Result {
         if !c.is_ascii() {
             return Err(fmt::Error);

From 8f754cc48e4b40e8c7b10e22a44bae3876d5f439 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:20:15 -0600
Subject: [PATCH 14/21] Move cursor impls to their own file

---
 experimental/zerotrie/src/cursor.rs   | 208 ++++++++++++++++++++++++++
 experimental/zerotrie/src/lib.rs      |   3 +-
 experimental/zerotrie/src/zerotrie.rs | 201 -------------------------
 3 files changed, 210 insertions(+), 202 deletions(-)
 create mode 100644 experimental/zerotrie/src/cursor.rs

diff --git a/experimental/zerotrie/src/cursor.rs b/experimental/zerotrie/src/cursor.rs
new file mode 100644
index 00000000000..e03360abb45
--- /dev/null
+++ b/experimental/zerotrie/src/cursor.rs
@@ -0,0 +1,208 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ZeroTrieSimpleAscii;
+use crate::reader::*;
+
+use core::fmt;
+
+impl<Store> ZeroTrieSimpleAscii<Store>
+where
+    Store: AsRef<[u8]> + ?Sized,
+{
+    /// Gets a cursor into the current trie.
+    ///
+    /// Useful to query a trie with data that is not a slice.
+    ///
+    /// This is currently supported only on `ZeroTrieSimpleAscii`.
+    ///
+    /// # Examples
+    ///
+    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
+    ///
+    /// ```
+    /// use core::fmt::Write;
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Get out the value for "abc"
+    /// let mut cursor = trie.cursor();
+    /// write!(&mut cursor, "abc");
+    /// assert_eq!(cursor.value(), Some(0));
+    /// ```
+    ///
+    /// Find the longest prefix match:
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Find the longest prefix of the string "abcdxy":
+    /// let query = b"abcdxy";
+    /// let mut longest_prefix = 0;
+    /// let mut cursor = trie.cursor();
+    /// for (i, b) in query.iter().enumerate() {
+    ///     // Checking is_empty() is not required, but it is
+    ///     // good for efficiency
+    ///     if cursor.is_empty() {
+    ///         break;
+    ///     }
+    ///     if cursor.value().is_some() {
+    ///         longest_prefix = i;
+    ///     }
+    ///     cursor.step(*b);
+    /// }
+    ///
+    /// // The longest prefix is "abc" which is length 3:
+    /// assert_eq!(longest_prefix, 3);
+    /// ```
+    #[inline]
+    pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
+        ZeroTrieSimpleAsciiCursor {
+            trie: self.as_borrowed_slice(),
+        }
+    }
+}
+
+impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
+    /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
+    /// having to doubly anchor the trie to the stack.
+    #[inline]
+    pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
+        ZeroTrieSimpleAsciiCursor {
+            trie: self
+        }
+    }
+}
+
+/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
+///
+/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
+#[derive(Debug)]
+pub struct ZeroTrieSimpleAsciiCursor<'a> {
+    trie: ZeroTrieSimpleAscii<&'a [u8]>,
+}
+
+impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
+    /// Steps the cursor one byte into the trie.
+    ///
+    /// # Examples
+    ///
+    /// Unrolled loop checking for string presence at every step:
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// // Search the trie for the string "abcdxy"
+    /// let mut cursor = trie.cursor();
+    /// assert_eq!(cursor.value(), None); // ""
+    /// cursor.step(b'a');
+    /// assert_eq!(cursor.value(), None); // "a"
+    /// cursor.step(b'b');
+    /// assert_eq!(cursor.value(), None); // "ab"
+    /// cursor.step(b'c');
+    /// assert_eq!(cursor.value(), Some(0)); // "abc"
+    /// cursor.step(b'd');
+    /// assert_eq!(cursor.value(), None); // "abcd"
+    /// assert!(!cursor.is_empty());
+    /// cursor.step(b'x'); // no strings have the prefix "abcdx"
+    /// assert!(cursor.is_empty());
+    /// assert_eq!(cursor.value(), None); // "abcdx"
+    /// cursor.step(b'y');
+    /// assert_eq!(cursor.value(), None); // "abcdxy"
+    /// ```
+    #[inline]
+    pub fn step(&mut self, byte: u8) {
+        step_bsearch_only(&mut self.trie.store, byte)
+    }
+
+    /// Gets the value at the current position in the trie.
+    ///
+    /// Calling this function on a new cursor is equivalent to calling `.get()`
+    /// with the empty string.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "" and "abc"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
+    ///
+    /// assert_eq!(Some(0), trie.get(""));
+    /// assert_eq!(Some(0), trie.cursor().value());
+    /// ```
+    #[inline]
+    pub fn value(&self) -> Option<usize> {
+        peek_value(self.trie.store.as_ref())
+    }
+
+    /// Checks whether the cursor points to an empty trie.
+    ///
+    /// Use this to determine when to stop iterating.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.trie.is_empty()
+    }
+}
+
+impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
+    /// Steps the cursor through each ASCII byte of the string.
+    ///
+    /// If the string contains non-ASCII chars, an error is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use core::fmt::Write;
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// let mut cursor = trie.cursor();
+    /// cursor.write_str("abcdxy").expect("all ASCII");
+    /// cursor.write_str("🚂").expect_err("non-ASCII");
+    /// ```
+    fn write_str(&mut self, s: &str) -> fmt::Result {
+        for b in s.bytes() {
+            if !b.is_ascii() {
+                return Err(fmt::Error);
+            }
+            self.step(b);
+        }
+        Ok(())
+    }
+    /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
+    /// an error if the char is non-ASCII.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use core::fmt::Write;
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "abc" and "abcdef"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+    ///
+    /// let mut cursor = trie.cursor();
+    /// cursor.write_char('a').expect("ASCII");
+    /// cursor.write_char('x').expect("ASCII");
+    /// cursor.write_char('🚂').expect_err("non-ASCII");
+    /// ```
+    fn write_char(&mut self, c: char) -> fmt::Result {
+        if !c.is_ascii() {
+            return Err(fmt::Error);
+        }
+        self.step(c as u8);
+        Ok(())
+    }
+}
diff --git a/experimental/zerotrie/src/lib.rs b/experimental/zerotrie/src/lib.rs
index 61fca0555b9..f03c56122e8 100644
--- a/experimental/zerotrie/src/lib.rs
+++ b/experimental/zerotrie/src/lib.rs
@@ -56,6 +56,7 @@ extern crate alloc;
 
 mod builder;
 mod byte_phf;
+mod cursor;
 mod error;
 #[macro_use]
 mod helpers;
@@ -65,11 +66,11 @@ mod serde;
 mod varint;
 mod zerotrie;
 
+pub use crate::cursor::ZeroTrieSimpleAsciiCursor;
 pub use crate::zerotrie::ZeroTrie;
 pub use crate::zerotrie::ZeroTrieExtendedCapacity;
 pub use crate::zerotrie::ZeroTriePerfectHash;
 pub use crate::zerotrie::ZeroTrieSimpleAscii;
-pub use crate::zerotrie::ZeroTrieSimpleAsciiCursor;
 pub use error::Error as ZeroTrieError;
 
 #[doc(hidden)]
diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 5608927c8c1..34921efd5ad 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -4,7 +4,6 @@
 
 use crate::reader::*;
 
-use core::fmt;
 use core::borrow::Borrow;
 
 #[cfg(feature = "alloc")]
@@ -584,206 +583,6 @@ impl_zerotrie_subtype!(
     Vec::into_boxed_slice
 );
 
-impl<Store> ZeroTrieSimpleAscii<Store>
-where
-    Store: AsRef<[u8]> + ?Sized,
-{
-    /// Gets a cursor into the current trie.
-    ///
-    /// Useful to query a trie with data that is not a slice.
-    ///
-    /// This is currently supported only on `ZeroTrieSimpleAscii`.
-    ///
-    /// # Examples
-    ///
-    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
-    ///
-    /// ```
-    /// use core::fmt::Write;
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "abc" and "abcdef"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
-    ///
-    /// // Get out the value for "abc"
-    /// let mut cursor = trie.cursor();
-    /// write!(&mut cursor, "abc");
-    /// assert_eq!(cursor.value(), Some(0));
-    /// ```
-    ///
-    /// Find the longest prefix match:
-    ///
-    /// ```
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "abc" and "abcdef"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
-    ///
-    /// // Find the longest prefix of the string "abcdxy":
-    /// let query = b"abcdxy";
-    /// let mut longest_prefix = 0;
-    /// let mut cursor = trie.cursor();
-    /// for (i, b) in query.iter().enumerate() {
-    ///     // Checking is_empty() is not required, but it is
-    ///     // good for efficiency
-    ///     if cursor.is_empty() {
-    ///         break;
-    ///     }
-    ///     if cursor.value().is_some() {
-    ///         longest_prefix = i;
-    ///     }
-    ///     cursor.step(*b);
-    /// }
-    ///
-    /// // The longest prefix is "abc" which is length 3:
-    /// assert_eq!(longest_prefix, 3);
-    /// ```
-    #[inline]
-    pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
-        ZeroTrieSimpleAsciiCursor {
-            trie: self.as_borrowed_slice(),
-        }
-    }
-}
-
-impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
-    /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
-    /// having to doubly anchor the trie to the stack.
-    #[inline]
-    pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
-        ZeroTrieSimpleAsciiCursor {
-            trie: self
-        }
-    }
-}
-
-/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
-///
-/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
-#[derive(Debug)]
-pub struct ZeroTrieSimpleAsciiCursor<'a> {
-    trie: ZeroTrieSimpleAscii<&'a [u8]>,
-}
-
-impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
-    /// Steps the cursor one byte into the trie.
-    ///
-    /// # Examples
-    ///
-    /// Unrolled loop checking for string presence at every step:
-    ///
-    /// ```
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "abc" and "abcdef"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
-    ///
-    /// // Search the trie for the string "abcdxy"
-    /// let mut cursor = trie.cursor();
-    /// assert_eq!(cursor.value(), None); // ""
-    /// cursor.step(b'a');
-    /// assert_eq!(cursor.value(), None); // "a"
-    /// cursor.step(b'b');
-    /// assert_eq!(cursor.value(), None); // "ab"
-    /// cursor.step(b'c');
-    /// assert_eq!(cursor.value(), Some(0)); // "abc"
-    /// cursor.step(b'd');
-    /// assert_eq!(cursor.value(), None); // "abcd"
-    /// assert!(!cursor.is_empty());
-    /// cursor.step(b'x'); // no strings have the prefix "abcdx"
-    /// assert!(cursor.is_empty());
-    /// assert_eq!(cursor.value(), None); // "abcdx"
-    /// cursor.step(b'y');
-    /// assert_eq!(cursor.value(), None); // "abcdxy"
-    /// ```
-    #[inline]
-    pub fn step(&mut self, byte: u8) {
-        step_bsearch_only(&mut self.trie.store, byte)
-    }
-
-    /// Gets the value at the current position in the trie.
-    ///
-    /// Calling this function on a new cursor is equivalent to calling `.get()`
-    /// with the empty string.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "" and "abc"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
-    ///
-    /// assert_eq!(Some(0), trie.get(""));
-    /// assert_eq!(Some(0), trie.cursor().value());
-    /// ```
-    #[inline]
-    pub fn value(&self) -> Option<usize> {
-        peek_value(self.trie.store.as_ref())
-    }
-
-    /// Checks whether the cursor points to an empty trie.
-    ///
-    /// Use this to determine when to stop iterating.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.trie.is_empty()
-    }
-}
-
-impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
-    /// Steps the cursor through each ASCII byte of the string.
-    ///
-    /// If the string contains non-ASCII chars, an error is returned.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use core::fmt::Write;
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "abc" and "abcdef"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
-    ///
-    /// let mut cursor = trie.cursor();
-    /// cursor.write_str("abcdxy").expect("all ASCII");
-    /// cursor.write_str("🚂").expect_err("non-ASCII");
-    /// ```
-    fn write_str(&mut self, s: &str) -> fmt::Result {
-        for b in s.bytes() {
-            if !b.is_ascii() {
-                return Err(fmt::Error);
-            }
-            self.step(b);
-        }
-        Ok(())
-    }
-    /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
-    /// an error if the char is non-ASCII.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use core::fmt::Write;
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "abc" and "abcdef"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
-    ///
-    /// let mut cursor = trie.cursor();
-    /// cursor.write_char('a').expect("ASCII");
-    /// cursor.write_char('x').expect("ASCII");
-    /// cursor.write_char('🚂').expect_err("non-ASCII");
-    /// ```
-    fn write_char(&mut self, c: char) -> fmt::Result {
-        if !c.is_ascii() {
-            return Err(fmt::Error);
-        }
-        self.step(c as u8);
-        Ok(())
-    }
-}
-
 macro_rules! impl_dispatch {
     ($self:ident, $inner_fn:ident()) => {
         match $self.0 {

From bb4d1e700b5a09b89a684e899da6074cef75d9b2 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:23:33 -0600
Subject: [PATCH 15/21] fmt, clippy

---
 experimental/zerotrie/src/cursor.rs | 8 +++-----
 provider/blob/src/blob_schema.rs    | 4 +---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/experimental/zerotrie/src/cursor.rs b/experimental/zerotrie/src/cursor.rs
index e03360abb45..4f63fda2b1f 100644
--- a/experimental/zerotrie/src/cursor.rs
+++ b/experimental/zerotrie/src/cursor.rs
@@ -2,8 +2,8 @@
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
-use crate::ZeroTrieSimpleAscii;
 use crate::reader::*;
+use crate::ZeroTrieSimpleAscii;
 
 use core::fmt;
 
@@ -74,9 +74,7 @@ impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
     /// having to doubly anchor the trie to the stack.
     #[inline]
     pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
-        ZeroTrieSimpleAsciiCursor {
-            trie: self
-        }
+        ZeroTrieSimpleAsciiCursor { trie: self }
     }
 }
 
@@ -142,7 +140,7 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     /// ```
     #[inline]
     pub fn value(&self) -> Option<usize> {
-        peek_value(self.trie.store.as_ref())
+        peek_value(self.trie.store)
     }
 
     /// Checks whether the cursor points to an empty trie.
diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index 7c385679337..cf99364e196 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -165,9 +165,7 @@ impl<'data> BlobSchemaV2<'data> {
             .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
         let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
         #[allow(clippy::unwrap_used)] // infallible impl
-        req.locale
-            .write_to(&mut cursor)
-            .unwrap();
+        req.locale.write_to(&mut cursor).unwrap();
         let blob_index = cursor
             .value()
             .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;

From babaf98bf29e23b5681dfc7256e06b73c208db82 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:24:12 -0600
Subject: [PATCH 16/21] Delete the `.head_value()` function in favor of
 `.cursor().value()`

---
 experimental/zerotrie/src/zerotrie.rs | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/experimental/zerotrie/src/zerotrie.rs b/experimental/zerotrie/src/zerotrie.rs
index 34921efd5ad..deedcb81443 100644
--- a/experimental/zerotrie/src/zerotrie.rs
+++ b/experimental/zerotrie/src/zerotrie.rs
@@ -204,24 +204,6 @@ macro_rules! impl_zerotrie_subtype {
             pub fn is_empty(&self) -> bool {
                 self.store.as_ref().is_empty()
             }
-            /// Gets the value at the head of the trie. This is equivalent to
-            /// calling `get` with the empty string.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
-            ///
-            /// // A trie with two values: "" and "abc"
-            #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"\\x80abc\\x81\");")]
-            ///
-            /// assert_eq!(Some(0), trie.head_value());
-            /// assert_eq!(Some(0), trie.get(""));
-            /// ```
-            #[inline]
-            pub fn head_value(&self) -> Option<usize> {
-                peek_value(self.store.as_ref())
-            }
             /// Returns the size of the trie in number of bytes.
             ///
             /// To get the number of keys in the trie, use `.iter().count()`:

From d5e9e2e86fcd6f11a355e73caafcc0a992be1551 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 01:40:01 -0600
Subject: [PATCH 17/21] Change .value() to a mutating function and add
 .peek_value()

---
 experimental/zerotrie/src/cursor.rs | 38 ++++++++++++++++++++++++++---
 experimental/zerotrie/src/reader.rs | 11 ++++++---
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/experimental/zerotrie/src/cursor.rs b/experimental/zerotrie/src/cursor.rs
index 4f63fda2b1f..ea5323f054e 100644
--- a/experimental/zerotrie/src/cursor.rs
+++ b/experimental/zerotrie/src/cursor.rs
@@ -122,7 +122,33 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
         step_bsearch_only(&mut self.trie.store, byte)
     }
 
-    /// Gets the value at the current position in the trie.
+    /// Takes the value at the current position and moves the cursor.
+    ///
+    /// Calling this function on a new cursor is equivalent to calling `.get()`
+    /// with the empty string.
+    ///
+    /// This is slightly more efficient than [`Self::peek_value()`] if you
+    /// check the value at each step.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use zerotrie::ZeroTrieSimpleAscii;
+    ///
+    /// // A trie with two values: "" and "abc"
+    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
+    ///
+    /// assert_eq!(Some(0), trie.get(""));
+    /// let mut cursor = trie.cursor();
+    /// assert_eq!(Some(0), cursor.value());
+    /// assert_eq!(None, cursor.value());
+    /// ```
+    #[inline]
+    pub fn value(&mut self) -> Option<usize> {
+        take_value(&mut self.trie.store)
+    }
+
+    /// Gets the value at the current position without moving the cursor.
     ///
     /// Calling this function on a new cursor is equivalent to calling `.get()`
     /// with the empty string.
@@ -136,11 +162,14 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
     ///
     /// assert_eq!(Some(0), trie.get(""));
-    /// assert_eq!(Some(0), trie.cursor().value());
+    /// let cursor = trie.cursor();
+    /// assert_eq!(Some(0), cursor.peek_value());
+    /// assert_eq!(Some(0), cursor.peek_value());
     /// ```
     #[inline]
-    pub fn value(&self) -> Option<usize> {
-        peek_value(self.trie.store)
+    pub fn peek_value(&self) -> Option<usize> {
+        let mut temp = self.trie.store;
+        take_value(&mut temp)
     }
 
     /// Checks whether the cursor points to an empty trie.
@@ -179,6 +208,7 @@ impl<'a> fmt::Write for ZeroTrieSimpleAsciiCursor<'a> {
         }
         Ok(())
     }
+
     /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
     /// an error if the char is non-ASCII.
     ///
diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index c27592f7816..d49bab5212b 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -566,12 +566,15 @@ pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8) {
     };
 }
 
-pub(crate) fn peek_value(mut trie: &[u8]) -> Option<usize> {
-    let b;
-    (b, trie) = trie.split_first()?;
+pub(crate) fn take_value(trie: &mut &[u8]) -> Option<usize> {
+    let (b, new_trie) = trie.split_first()?;
     match byte_type(*b) {
         NodeType::Ascii | NodeType::Span | NodeType::Branch => None,
-        NodeType::Value => Some(read_varint_meta3(*b, trie).0),
+        NodeType::Value => {
+            let x;
+            (x, *trie) = read_varint_meta3(*b, new_trie);
+            Some(x)
+        }
     }
 }
 

From fb05eb002e7f5c5cdcccba3de8cd0d2d2ae0bd76 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 13:24:56 -0600
Subject: [PATCH 18/21] Update provider/blob/src/blob_schema.rs

Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com>
---
 provider/blob/src/blob_schema.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index cf99364e196..b1651204b9e 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -164,7 +164,7 @@ impl<'data> BlobSchemaV2<'data> {
             .get(key_index)
             .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(key, req))?;
         let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
-        #[allow(clippy::unwrap_used)] // infallible impl
+        #[allow(clippy::unwrap_used)] // DataLocale::write_to produces ASCII only
         req.locale.write_to(&mut cursor).unwrap();
         let blob_index = cursor
             .value()

From 5ce1b16c0b1e654b00b5937465fa79216f2062e1 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 13:33:00 -0600
Subject: [PATCH 19/21] Remove peek_value and impl Clone instead

---
 experimental/zerotrie/src/cursor.rs | 34 ++++-------------------------
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/experimental/zerotrie/src/cursor.rs b/experimental/zerotrie/src/cursor.rs
index ea5323f054e..dbb91f9e3dd 100644
--- a/experimental/zerotrie/src/cursor.rs
+++ b/experimental/zerotrie/src/cursor.rs
@@ -81,7 +81,8 @@ impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
 /// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
 ///
 /// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
-#[derive(Debug)]
+// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
+#[derive(Debug, Clone)]
 pub struct ZeroTrieSimpleAsciiCursor<'a> {
     trie: ZeroTrieSimpleAscii<&'a [u8]>,
 }
@@ -122,13 +123,10 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
         step_bsearch_only(&mut self.trie.store, byte)
     }
 
-    /// Takes the value at the current position and moves the cursor.
+    /// Takes the value at the current position.
     ///
     /// Calling this function on a new cursor is equivalent to calling `.get()`
-    /// with the empty string.
-    ///
-    /// This is slightly more efficient than [`Self::peek_value()`] if you
-    /// check the value at each step.
+    /// with the empty string (except that it can only be called once).
     ///
     /// # Examples
     ///
@@ -148,30 +146,6 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
         take_value(&mut self.trie.store)
     }
 
-    /// Gets the value at the current position without moving the cursor.
-    ///
-    /// Calling this function on a new cursor is equivalent to calling `.get()`
-    /// with the empty string.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use zerotrie::ZeroTrieSimpleAscii;
-    ///
-    /// // A trie with two values: "" and "abc"
-    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
-    ///
-    /// assert_eq!(Some(0), trie.get(""));
-    /// let cursor = trie.cursor();
-    /// assert_eq!(Some(0), cursor.peek_value());
-    /// assert_eq!(Some(0), cursor.peek_value());
-    /// ```
-    #[inline]
-    pub fn peek_value(&self) -> Option<usize> {
-        let mut temp = self.trie.store;
-        take_value(&mut temp)
-    }
-
     /// Checks whether the cursor points to an empty trie.
     ///
     /// Use this to determine when to stop iterating.

From 7017868df8866f6a9c0f57326eb24d878673fed3 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Fri, 1 Dec 2023 16:38:08 -0600
Subject: [PATCH 20/21] value -> take_value

---
 experimental/zerotrie/src/cursor.rs | 24 ++++++++++++------------
 provider/blob/src/blob_schema.rs    |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/experimental/zerotrie/src/cursor.rs b/experimental/zerotrie/src/cursor.rs
index dbb91f9e3dd..a7a88645415 100644
--- a/experimental/zerotrie/src/cursor.rs
+++ b/experimental/zerotrie/src/cursor.rs
@@ -31,7 +31,7 @@ where
     /// // Get out the value for "abc"
     /// let mut cursor = trie.cursor();
     /// write!(&mut cursor, "abc");
-    /// assert_eq!(cursor.value(), Some(0));
+    /// assert_eq!(cursor.take_value(), Some(0));
     /// ```
     ///
     /// Find the longest prefix match:
@@ -52,7 +52,7 @@ where
     ///     if cursor.is_empty() {
     ///         break;
     ///     }
-    ///     if cursor.value().is_some() {
+    ///     if cursor.take_value().is_some() {
     ///         longest_prefix = i;
     ///     }
     ///     cursor.step(*b);
@@ -102,21 +102,21 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     ///
     /// // Search the trie for the string "abcdxy"
     /// let mut cursor = trie.cursor();
-    /// assert_eq!(cursor.value(), None); // ""
+    /// assert_eq!(cursor.take_value(), None); // ""
     /// cursor.step(b'a');
-    /// assert_eq!(cursor.value(), None); // "a"
+    /// assert_eq!(cursor.take_value(), None); // "a"
     /// cursor.step(b'b');
-    /// assert_eq!(cursor.value(), None); // "ab"
+    /// assert_eq!(cursor.take_value(), None); // "ab"
     /// cursor.step(b'c');
-    /// assert_eq!(cursor.value(), Some(0)); // "abc"
+    /// assert_eq!(cursor.take_value(), Some(0)); // "abc"
     /// cursor.step(b'd');
-    /// assert_eq!(cursor.value(), None); // "abcd"
+    /// assert_eq!(cursor.take_value(), None); // "abcd"
     /// assert!(!cursor.is_empty());
     /// cursor.step(b'x'); // no strings have the prefix "abcdx"
     /// assert!(cursor.is_empty());
-    /// assert_eq!(cursor.value(), None); // "abcdx"
+    /// assert_eq!(cursor.take_value(), None); // "abcdx"
     /// cursor.step(b'y');
-    /// assert_eq!(cursor.value(), None); // "abcdxy"
+    /// assert_eq!(cursor.take_value(), None); // "abcdxy"
     /// ```
     #[inline]
     pub fn step(&mut self, byte: u8) {
@@ -138,11 +138,11 @@ impl<'a> ZeroTrieSimpleAsciiCursor<'a> {
     ///
     /// assert_eq!(Some(0), trie.get(""));
     /// let mut cursor = trie.cursor();
-    /// assert_eq!(Some(0), cursor.value());
-    /// assert_eq!(None, cursor.value());
+    /// assert_eq!(Some(0), cursor.take_value());
+    /// assert_eq!(None, cursor.take_value());
     /// ```
     #[inline]
-    pub fn value(&mut self) -> Option<usize> {
+    pub fn take_value(&mut self) -> Option<usize> {
         take_value(&mut self.trie.store)
     }
 
diff --git a/provider/blob/src/blob_schema.rs b/provider/blob/src/blob_schema.rs
index b1651204b9e..37cd2b3bfa5 100644
--- a/provider/blob/src/blob_schema.rs
+++ b/provider/blob/src/blob_schema.rs
@@ -167,7 +167,7 @@ impl<'data> BlobSchemaV2<'data> {
         #[allow(clippy::unwrap_used)] // DataLocale::write_to produces ASCII only
         req.locale.write_to(&mut cursor).unwrap();
         let blob_index = cursor
-            .value()
+            .take_value()
             .ok_or_else(|| DataErrorKind::MissingLocale.with_req(key, req))?;
         let buffer = self
             .buffers

From 09b56c03b07db0f89bb7cff02bf9d413bf778a66 Mon Sep 17 00:00:00 2001
From: "Shane F. Carr" <shane@unicode.org>
Date: Mon, 4 Dec 2023 13:59:13 -0800
Subject: [PATCH 21/21] Docs for internal functions

---
 experimental/zerotrie/src/reader.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/experimental/zerotrie/src/reader.rs b/experimental/zerotrie/src/reader.rs
index d49bab5212b..bc5df09808e 100644
--- a/experimental/zerotrie/src/reader.rs
+++ b/experimental/zerotrie/src/reader.rs
@@ -504,6 +504,11 @@ pub fn get_phf_extended(mut trie: &[u8], mut ascii: &[u8]) -> Option<usize> {
     }
 }
 
+/// Steps one node into the trie assuming all branch nodes are binary search and that
+/// there are no span nodes.
+///
+/// The input-output argument `trie` starts at the original trie and ends pointing to
+/// the sub-trie reachable by `c`.
 pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8) {
     let (mut b, x, search);
     loop {
@@ -566,6 +571,11 @@ pub(crate) fn step_bsearch_only(trie: &mut &[u8], c: u8) {
     };
 }
 
+/// Steps one node into the trie if the head node is a value node, returning the value.
+/// If the head node is not a value node, no change is made.
+///
+/// The input-output argument `trie` starts at the original trie and ends pointing to
+/// the sub-trie with the value node removed.
 pub(crate) fn take_value(trie: &mut &[u8]) -> Option<usize> {
     let (b, new_trie) = trie.split_first()?;
     match byte_type(*b) {