open-i18n · CAD97 · Aug 12, 2017 · Aug 12, 2017 · Aug 12, 2017 · Aug 12, 2017
diff --git a/unic/char/Cargo.toml b/unic/char/Cargo.toml
@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }
 
 [dependencies]
 unic-char-property = { path = "property/", version = "0.5.0" }
+unic-char-range = { path = "range/", version = "0.5.0" }
diff --git a/unic/char/range/Cargo.toml b/unic/char/range/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "unic-char-range"
+version = "0.5.0"
+authors = ["The UNIC Project Developers"]
+repository = "https://github.com/behnam/rust-unic/"
+license = "MIT/Apache-2.0"
+keywords = ["text", "unicode", "iteration"]
+description = "UNIC - CharRange"
+categories = ["text-processing", "iteration"]
+readme = "README.md"
+
+# No tests/benches that depends on /data/
+exclude = []
+
+[features]
+default = []
+
+# Unstable features
+unstable = [ "associated-consts", "fused", "trusted-len" ]
+associated-consts = []
+fused = []
+trusted-len = []
+
+[dependencies]
+
+[badges]
+travis-ci = { repository = "behnam/rust-unic", branch = "master" }
diff --git a/unic/char/range/benches/benchmarks.rs b/unic/char/range/benches/benchmarks.rs
@@ -0,0 +1,17 @@
+#![feature(test)]
+
+extern crate test;
+extern crate unic_char_range;
+
+use unic_char_range::*;
+
+#[bench]
+fn count(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().iter().count())
+}
+
+#[bench]
+// iterate the same range without skipping surrogates
+fn count_baseline(b: &mut test::Bencher) {
+    b.iter(|| (0..0x110000).take_while(|_| true).count())
+}
diff --git a/unic/char/range/src/iter.rs b/unic/char/range/src/iter.rs
@@ -0,0 +1,112 @@
+use std::{char, iter};
+use CharRange;
+
+/// Range of Surrogate Code Points.
+///
+/// Reference: <http://unicode.org/glossary/#surrogate_code_point>
+const SURROGATE_RANGE: ::std::ops::Range<u32> = 0xD800..0xE000;
+
+/// An iterator over `char`.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct CharIter {
+    /// The codepoint of the smallest uniterated codepoint.
+    ///
+    /// If greater than or equal to `high`, iteration is finished.
+    ///
+    /// # Safety
+    ///
+    /// Must be a valid, non-surrogate codepoint while iteration is unfinished.
+    low: u32,
+
+    /// The codepoint one greater than the largest uniterated codepoint.
+    ///
+    /// If less than or equal to `low`, iteration is finished.
+    ///
+    /// # Safety
+    ///
+    /// Must be one greater than a valid, non surrogate codepoint while iteration is unfinished.
+    high: u32,
+}
+
+impl<'a> From<&'a CharRange> for CharIter {
+    fn from(range: &CharRange) -> CharIter {
+        CharIter {
+            low: range.first() as u32,
+            high: range.last() as u32 + 1,
+        }
+    }
+}
+
+impl CharIter {
+    #[inline]
+    fn is_finished(&self) -> bool {
+        self.low >= self.high
+    }
+}
+
+impl Iterator for CharIter {
+    type Item = char;
+
+    #[inline]
+    #[allow(unsafe_code)]
+    fn next(&mut self) -> Option<char> {
+        if self.is_finished() {
+            return None;
+        }
+
+        let char = unsafe { char::from_u32_unchecked(self.low) };
+        self.low += 1;
+
+        // ensure `low` is never a surrogate code point
+        if self.low == SURROGATE_RANGE.start {
+            self.low = SURROGATE_RANGE.end;
+        }
+
+        Some(char)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+}
+
+impl iter::DoubleEndedIterator for CharIter {
+    #[allow(unsafe_code)]
+    fn next_back(&mut self) -> Option<char> {
+        if self.is_finished() {
+            return None;
+        }
+
+        self.high -= 1;
+        let char = unsafe { char::from_u32_unchecked(self.high) };
+
+        // ensure `high` is never one greater than a surrogate code point
+        if self.high == SURROGATE_RANGE.end {
+            self.high = SURROGATE_RANGE.start;
+        }
+
+        Some(char)
+    }
+}
+
+impl iter::ExactSizeIterator for CharIter {
+    fn len(&self) -> usize {
+        if self.is_finished() {
+            return 0;
+        }
+        let naive_len = self.high as usize - self.low as usize;
+        if self.low <= SURROGATE_RANGE.start && SURROGATE_RANGE.end <= self.high {
+            naive_len - SURROGATE_RANGE.len()
+        } else {
+            naive_len
+        }
+    }
+}
+
+#[cfg(feature = "fused")]
+impl iter::FusedIterator for CharIter {}
+
+#[allow(unsafe_code)]
+#[cfg(feature = "trusted-len")]
+unsafe impl iter::TrustedLen for CharIter {}
diff --git a/unic/char/range/src/lib.rs b/unic/char/range/src/lib.rs
@@ -0,0 +1,33 @@
+//! # Unic - Char - Range
+//!
+//! A simple way to control iteration over a range of characters.
+//!
+//! # Examples
+//!
+//! ```
+//! # #[macro_use] extern crate unic_char_range;
+//! # use unic_char_range::*;
+//!
+//! # fn main() {
+//! for character in chars!('a'=..='z') {
+//!     // character is each character in the lowercase english alphabet in order
+//! }
+//!
+//! for character in CharRange::all() {
+//!     // character is every valid char from lowest codepoint to highest
+//! }
+//! # }
+//! ```
+//!
+#![forbid(bad_style, missing_debug_implementations, unconditional_recursion)]
+#![deny(missing_docs, unsafe_code, unused, future_incompatible)]
+#![cfg_attr(feature = "fused", feature(fused))]
+#![cfg_attr(feature = "trusted-len", feature(trusted_len))]
+
+mod range;
+mod iter;
+mod step;
+mod macros;
+
+pub use range::CharRange;
+pub use iter::CharIter;
diff --git a/unic/char/range/src/macros.rs b/unic/char/range/src/macros.rs
@@ -0,0 +1,74 @@
+#[macro_export]
+/// Convenience macro to allow simple construction of character ranges.
+///
+/// # Syntax
+///
+/// ```
+/// # #[macro_use] extern crate unic_char_range;
+/// # fn main() {
+/// chars!('a'=..='z'); // iterates the inclusive range 'a' through 'z'
+/// chars!('a'=..<'z'); // iterates the inclusive range 'a' through 'y'
+/// chars!('a'<..='z'); // iterates the inclusive range 'b' through 'z'
+/// chars!('a'<..<'z'); // iterates the inclusive range 'b' through 'y'
+/// # }
+/// ```
+macro_rules! chars {
+    // $:expr can only be terminated by `=>`, `,`, `;` so use a $:tt
+    ( $start:tt =..= $end:tt ) => ( $crate::CharRange::closed_range($start, $end) );
+    ( $start:tt =..< $end:tt ) => ( $crate::CharRange::half_open_right_range($start, $end) );
+    ( $start:tt <..= $end:tt ) => ( $crate::CharRange::half_open_left_range($start, $end) );
+    ( $start:tt <..< $end:tt ) => ( $crate::CharRange::open_range($start, $end) );
+}
+
+#[cfg(test)]
+mod test {
+    use std::char;
+
+    #[test]
+    fn char_closed_iteration_works() {
+        let mut target = 'a' as u32 - 1;
+
+        for char in chars!('a'=..='z') {
+            target += 1;
+            assert_eq!(Some(char), char::from_u32(target));
+        }
+
+        assert_eq!(target, 'z' as u32, "All characters were iterated");
+    }
+
+    #[test]
+    fn char_half_open_right_iteration_works() {
+        let mut target = 'a' as u32 - 1;
+
+        for char in chars!('a'=..<'z') {
+            target += 1;
+            assert_eq!(Some(char), char::from_u32(target));
+        }
+
+        assert_eq!(target, 'y' as u32, "All characters were iterated");
+    }
+
+    #[test]
+    fn char_half_open_left_iteration_works() {
+        let mut target = 'b' as u32 - 1;
+
+        for char in chars!('a'<..='z') {
+            target += 1;
+            assert_eq!(Some(char), char::from_u32(target));
+        }
+
+        assert_eq!(target, 'z' as u32, "All characters were iterated");
+    }
+
+    #[test]
+    fn char_open_iteration_works() {
+        let mut target = 'b' as u32 - 1;
+
+        for char in chars!('a'<..<'z') {
+            target += 1;
+            assert_eq!(Some(char), char::from_u32(target));
+        }
+
+        assert_eq!(target, 'y' as u32, "All characters were iterated");
+    }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }

		[dependencies]
		unic-char-property = { path = "property/", version = "0.5.0" }
		unic-char-range = { path = "range/", version = "0.5.0" }