open-i18n · bors · Aug 13, 2017 · Aug 12, 2017 · Aug 12, 2017 · Aug 12, 2017
diff --git a/unic/char/Cargo.toml b/unic/char/Cargo.toml
@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }
 
 [dependencies]
 unic-char-property = { path = "property/", version = "0.5.0" }
+unic-char-range = { path = "range/", version = "0.5.0" }
diff --git a/unic/char/range/Cargo.toml b/unic/char/range/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "unic-char-range"
+version = "0.5.0"
+authors = ["The UNIC Project Developers"]
+repository = "https://github.com/behnam/rust-unic/"
+license = "MIT/Apache-2.0"
+keywords = ["text", "unicode", "iteration"]
+description = "UNIC - Unicode Characters - Character Range and Iteration"
+categories = ["text-processing"]
+
+# No tests/benches that depends on /data/
+exclude = []
+
+[features]
+default = []
+
+# Unstable features
+unstable = [ "exact-size-is-empty", "fused", "trusted-len" ]
+exact-size-is-empty = []
+fused = []
+trusted-len = []
+
+
+[badges]
+travis-ci = { repository = "behnam/rust-unic", branch = "master" }
diff --git a/unic/char/range/benches/benchmarks.rs b/unic/char/range/benches/benchmarks.rs
@@ -0,0 +1,32 @@
+#![feature(test)]
+
+extern crate test;
+extern crate unic_char_range;
+
+use std::char;
+use unic_char_range::CharRange;
+
+#[bench]
+fn forward_iteration(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().iter().count())
+}
+
+#[bench]
+fn forward_iteration_baseline(b: &mut test::Bencher) {
+    b.iter(|| (0..0x11_0000).filter_map(char::from_u32).count())
+}
+
+#[bench]
+fn reverse_iteration(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().iter().rev().count())
+}
+
+#[bench]
+fn reverse_iteration_baseline(b: &mut test::Bencher) {
+    b.iter(|| (0..0x11_0000).rev().filter_map(char::from_u32).count())
+}
+
+#[bench]
+fn range_length(b: &mut test::Bencher) {
+    b.iter(|| CharRange::all().len())
+}
diff --git a/unic/char/range/src/iter.rs b/unic/char/range/src/iter.rs
@@ -0,0 +1,151 @@
+use std::char;
+use std::ops::Range;
+use {step, CharRange};
+
+const SURROGATE_RANGE: Range<u32> = 0xD800..0xE000;
+
+/// An iterator over a range of unicode code points.
+///
+/// Constructed via `CharRange::iter`. See `CharRange` for more information.
+#[derive(Clone, Debug)]
+pub struct CharIter {
+    /// The lowest uniterated character (inclusive).
+    ///
+    /// Iteration is finished if this is higher than `high`.
+    low: char,
+
+    /// The highest uniterated character (inclusive).
+    ///
+    /// Iteration is finished if this is lower than `low`.
+    high: char,
+}
+
+impl From<CharRange> for CharIter {
+    fn from(range: CharRange) -> CharIter {
+        CharIter {
+            low: range.low,
+            high: range.high,
+        }
+    }
+}
+
+impl From<CharIter> for CharRange {
+    fn from(iter: CharIter) -> CharRange {
+        CharRange {
+            low: iter.low,
+            high: iter.high,
+        }
+    }
+}
+
+impl CharIter {
+    #[inline]
+    #[allow(unsafe_code)]
+    // When stepping `self.low` forward would go over `char::MAX`,
+    // Set `self.high` to `'\0'` instead. It will have the same effect --
+    // consuming the last element from the iterator and ending iteration.
+    fn step_forward(&mut self) {
+        if self.low == char::MAX {
+            self.high = '\0'
+        } else {
+            self.low = unsafe { step::forward(self.low) }
+        }
+    }
+
+    #[inline]
+    #[allow(unsafe_code)]
+    // When stepping `self.high` backward would cause underflow,
+    // set `self.low` to `char::MAX` instead. It will have the same effect --
+    // consuming the last element from the iterator and ending iteration.
+    fn step_backward(&mut self) {
+        if self.high == '\0' {
+            self.low = char::MAX;
+        } else {
+            self.high = unsafe { step::backward(self.high) }
+        }
+    }
+
+    #[inline]
+    /// ExactSizeIterator::is_empty() for stable
+    fn is_finished(&self) -> bool {
+        self.low > self.high
+    }
+}
+
+impl Iterator for CharIter {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        if self.is_finished() {
+            return None;
+        }
+
+        let ch = self.low;
+        self.step_forward();
+        Some(ch)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+
+    fn last(self) -> Option<char> {
+        if self.is_finished() {
+            None
+        } else {
+            Some(self.high)
+        }
+    }
+
+    fn max(self) -> Option<char> {
+        self.last()
+    }
+
+    fn min(mut self) -> Option<char> {
+        self.next()
+    }
+}
+
+impl DoubleEndedIterator for CharIter {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.is_finished() {
+            None
+        } else {
+            let ch = self.high;
+            self.step_backward();
+            Some(ch)
+        }
+    }
+}
+
+impl ExactSizeIterator for CharIter {
+    fn len(&self) -> usize {
+        if self.is_finished() {
+            return 0;
+        }
+        let naive_range = (self.low as u32)..(self.high as u32 + 1);
+        if naive_range.start <= SURROGATE_RANGE.start && SURROGATE_RANGE.end <= naive_range.end {
+            naive_range.len() - SURROGATE_RANGE.len()
+        } else {
+            naive_range.len()
+        }
+    }
+
+    #[cfg(feature = "exact-size-is-empty")]
+    fn is_empty(&self) -> bool {
+        self.is_finished()
+    }
+}
+
+#[cfg(any(feature = "fused", feature = "trusted-len"))]
+use std::iter;
+
+#[cfg(feature = "fused")]
+impl iter::FusedIterator for CharIter {}
+
+#[allow(unsafe_code)]
+#[cfg(feature = "trusted-len")]
+unsafe impl iter::TrustedLen for CharIter {}
diff --git a/unic/char/range/src/lib.rs b/unic/char/range/src/lib.rs
@@ -0,0 +1,46 @@
+//! # Unic - Char - Range
+//!
+//! A simple way to control iteration over a range of characters.
+//!
+//! # Examples
+//!
+//! ```
+//! #[macro_use] extern crate unic_char_range;
+//!
+//! # fn main() {
+//! for character in chars!('a'..='z') {
+//!     // character is each character in the lowercase english alphabet in order
+//! }
+//!
+//! for character in chars!(..) {
+//!     // character is every valid char from lowest codepoint to highest
+//! }
+//! # }
+//! ```
+//!
+//! # Features
+//!
+//! None of these features are included by default; they rely on unstable Rust feature gates.
+//!
+//! - `unstable`: enables all features
+//! - `exact-size-is-empty`: provide a specific impl of [`ExactSizeIterator::is_empty`][is_empty]
+//! - `fused`: impl the [`FusedIterator`] contract
+//! - `trusted-len`: impl the [`TrustedLen`] contract
+//!
+//! [is_empty]: https://doc.rust-lang.org/std/iter/trait.ExactSizeIterator.html#method.is_empty
+//! [`FusedIterator`]: https://doc.rust-lang.org/std/iter/trait.FusedIterator.html
+//! [`TrustedLen`]: https://doc.rust-lang.org/std/iter/trait.TrustedLen.html
+//!
+#![forbid(bad_style, missing_debug_implementations, unconditional_recursion)]
+#![deny(missing_docs, unsafe_code, unused, future_incompatible)]
+#![cfg_attr(feature = "exact-size-is-empty", feature(exact_size_is_empty))]
+#![cfg_attr(feature = "fused", feature(fused))]
+#![cfg_attr(feature = "trusted-len", feature(trusted_len))]
+
+mod macros;
+mod range;
+mod iter;
+mod step;
+
+pub use range::CharRange;
+pub use iter::CharIter;
diff --git a/unic/char/range/src/macros.rs b/unic/char/range/src/macros.rs
@@ -0,0 +1,25 @@
+#[macro_export]
+/// Convenience macro for the initialization of `CharRange`s.
+///
+/// # Syntax
+///
+/// ```
+/// # #[macro_use] extern crate unic_char_range;
+/// # fn main() {
+/// chars!('a'..'z'); // The half open range including 'a' and excluding 'z'
+/// chars!('a'..='z'); // The closed range including 'a' and including 'z'
+/// chars!(..); // All characters
+/// # }
+/// ```
+///
+/// `chars!('a'..='z')` and `chars!(..)` are constant-time expressions, and can be used
+/// where such are required, such as in the initialization of constant data structures.
+///
+/// Note that because an `expr` capture cannot be followed by a `..`/`..=`,
+/// this macro captures token trees. This means that if you want to pass more than one token,
+/// you must parenthesize it (e.g. `chars!('\0' ..= (char::MAX)`).
+macro_rules! chars {
+    ( $low:tt .. $high:tt ) => ( $crate::CharRange::open_right($low, $high) );
+    ( $low:tt ..= $high:tt ) => ( $crate::CharRange { low: $low, high: $high } );
+    ( .. ) => ( chars!( '\0' ..= (::std::char::MAX) ) );
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@ travis-ci = { repository = "behnam/rust-unic", branch = "master" }

		[dependencies]
		unic-char-property = { path = "property/", version = "0.5.0" }
		unic-char-range = { path = "range/", version = "0.5.0" }