From 070ce235f2c1b72be10ac10cd5474b0ce80a1071 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 22 May 2023 16:14:19 +0200 Subject: [PATCH] Specialize StepBy> For ranges < usize we determine the number of items StepBy would yield and then store that in the range.end instead of the actual end. This significantly simplifies calculation of the loop induction variable especially in cases where StepBy::step (an usize) could overflow the Range's item type --- library/core/benches/iter.rs | 52 +++ library/core/src/iter/adapters/step_by.rs | 357 ++++++++++++++++++-- library/core/tests/iter/adapters/step_by.rs | 55 +++ 3 files changed, 428 insertions(+), 36 deletions(-) diff --git a/library/core/benches/iter.rs b/library/core/benches/iter.rs index 60ef83223d104..5ec22e5147b16 100644 --- a/library/core/benches/iter.rs +++ b/library/core/benches/iter.rs @@ -2,6 +2,7 @@ use core::borrow::Borrow; use core::iter::*; use core::mem; use core::num::Wrapping; +use core::ops::Range; use test::{black_box, Bencher}; #[bench] @@ -69,6 +70,57 @@ fn bench_max(b: &mut Bencher) { }) } +#[bench] +fn bench_range_step_by_sum_reducible(b: &mut Bencher) { + let r = 0u32..1024; + b.iter(|| { + let r = black_box(r.clone()).step_by(8); + + let mut sum: u32 = 0; + for i in r { + sum += i; + } + + sum + }) +} + +#[bench] +fn bench_range_step_by_loop_u32(b: &mut Bencher) { + let r = 0..(u16::MAX as u32); + b.iter(|| { + let r = black_box(r.clone()).step_by(64); + + let mut sum: u32 = 0; + for i in r { + let i = i ^ i.wrapping_sub(1); + sum = sum.wrapping_add(i); + } + + sum + }) +} + +#[bench] +fn bench_range_step_by_fold_usize(b: &mut Bencher) { + let r: Range = 0..(u16::MAX as usize); + b.iter(|| { + let r = black_box(r.clone()); + r.step_by(64) + .map(|x: usize| x ^ (x.wrapping_sub(1))) + .fold(0usize, |acc, i| acc.wrapping_add(i)) + }) +} + +#[bench] +fn bench_range_step_by_fold_u16(b: &mut Bencher) { + let r: Range = 0..u16::MAX; + b.iter(|| { + let r = black_box(r.clone()); + r.step_by(64).map(|x: u16| x ^ (x.wrapping_sub(1))).fold(0u16, |acc, i| acc.wrapping_add(i)) + }) +} + pub fn copy_zip(xs: &[u8], ys: &mut [u8]) { for (a, b) in ys.iter_mut().zip(xs) { *a = *b; diff --git a/library/core/src/iter/adapters/step_by.rs b/library/core/src/iter/adapters/step_by.rs index 4252c34a0e0fc..2f7e66e8c6083 100644 --- a/library/core/src/iter/adapters/step_by.rs +++ b/library/core/src/iter/adapters/step_by.rs @@ -1,4 +1,9 @@ -use crate::{intrinsics, iter::from_fn, ops::Try}; +use crate::convert::TryFrom; +use crate::{ + intrinsics, + iter::from_fn, + ops::{Range, Try}, +}; /// An iterator for stepping iterators by a custom amount. /// @@ -17,8 +22,10 @@ pub struct StepBy { } impl StepBy { + #[inline] pub(in crate::iter) fn new(iter: I, step: usize) -> StepBy { assert!(step != 0); + let iter = >::setup(iter, step); StepBy { iter, step: step - 1, first_take: true } } } @@ -32,16 +39,154 @@ where #[inline] fn next(&mut self) -> Option { + self.spec_next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.spec_size_hint() + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + self.spec_nth(n) + } + + fn try_fold(&mut self, acc: Acc, f: F) -> R + where + F: FnMut(Acc, Self::Item) -> R, + R: Try, + { + self.spec_try_fold(acc, f) + } + + #[inline] + fn fold(self, acc: Acc, f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + self.spec_fold(acc, f) + } +} + +impl StepBy +where + I: ExactSizeIterator, +{ + // The zero-based index starting from the end of the iterator of the + // last element. Used in the `DoubleEndedIterator` implementation. + fn next_back_index(&self) -> usize { + let rem = self.iter.len() % (self.step + 1); if self.first_take { - self.first_take = false; - self.iter.next() + if rem == 0 { self.step } else { rem - 1 } } else { - self.iter.nth(self.step) + rem } } +} +#[stable(feature = "double_ended_step_by_iterator", since = "1.38.0")] +impl DoubleEndedIterator for StepBy +where + I: DoubleEndedIterator + ExactSizeIterator, +{ #[inline] - fn size_hint(&self) -> (usize, Option) { + fn next_back(&mut self) -> Option { + self.spec_next_back() + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + self.spec_nth_back(n) + } + + fn try_rfold(&mut self, init: Acc, f: F) -> R + where + F: FnMut(Acc, Self::Item) -> R, + R: Try, + { + self.spec_try_rfold(init, f) + } + + #[inline] + fn rfold(self, init: Acc, f: F) -> Acc + where + Self: Sized, + F: FnMut(Acc, Self::Item) -> Acc, + { + self.spec_rfold(init, f) + } +} + +// StepBy can only make the iterator shorter, so the len will still fit. +#[stable(feature = "iterator_step_by", since = "1.28.0")] +impl ExactSizeIterator for StepBy where I: ExactSizeIterator {} + +trait SpecRangeSetup { + fn setup(inner: T, step: usize) -> T; +} + +impl SpecRangeSetup for T { + #[inline] + default fn setup(inner: T, _step: usize) -> T { + inner + } +} + +trait StepByImpl { + type Item; + + fn spec_next(&mut self) -> Option; + + fn spec_size_hint(&self) -> (usize, Option); + + fn spec_nth(&mut self, n: usize) -> Option; + + fn spec_try_fold(&mut self, acc: Acc, f: F) -> R + where + F: FnMut(Acc, Self::Item) -> R, + R: Try; + + fn spec_fold(self, acc: Acc, f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc; +} + +trait StepByBackImpl { + type Item; + + fn spec_next_back(&mut self) -> Option + where + I: DoubleEndedIterator + ExactSizeIterator; + + fn spec_nth_back(&mut self, n: usize) -> Option + where + I: DoubleEndedIterator + ExactSizeIterator; + + fn spec_try_rfold(&mut self, init: Acc, f: F) -> R + where + I: DoubleEndedIterator + ExactSizeIterator, + F: FnMut(Acc, Self::Item) -> R, + R: Try; + + fn spec_rfold(self, init: Acc, f: F) -> Acc + where + I: DoubleEndedIterator + ExactSizeIterator, + F: FnMut(Acc, Self::Item) -> Acc; +} + +impl StepByImpl for StepBy { + type Item = I::Item; + + #[inline] + default fn spec_next(&mut self) -> Option { + let step_size = if self.first_take { 0 } else { self.step }; + self.first_take = false; + self.iter.nth(step_size) + } + + #[inline] + default fn spec_size_hint(&self) -> (usize, Option) { #[inline] fn first_size(step: usize) -> impl Fn(usize) -> usize { move |n| if n == 0 { 0 } else { 1 + (n - 1) / (step + 1) } @@ -64,7 +209,7 @@ where } #[inline] - fn nth(&mut self, mut n: usize) -> Option { + default fn spec_nth(&mut self, mut n: usize) -> Option { if self.first_take { self.first_take = false; let first = self.iter.next(); @@ -108,7 +253,7 @@ where } } - fn try_fold(&mut self, mut acc: Acc, mut f: F) -> R + default fn spec_try_fold(&mut self, mut acc: Acc, mut f: F) -> R where F: FnMut(Acc, Self::Item) -> R, R: Try, @@ -128,7 +273,7 @@ where from_fn(nth(&mut self.iter, self.step)).try_fold(acc, f) } - fn fold(mut self, mut acc: Acc, mut f: F) -> Acc + default fn spec_fold(mut self, mut acc: Acc, mut f: F) -> Acc where F: FnMut(Acc, Self::Item) -> Acc, { @@ -148,34 +293,16 @@ where } } -impl StepBy -where - I: ExactSizeIterator, -{ - // The zero-based index starting from the end of the iterator of the - // last element. Used in the `DoubleEndedIterator` implementation. - fn next_back_index(&self) -> usize { - let rem = self.iter.len() % (self.step + 1); - if self.first_take { - if rem == 0 { self.step } else { rem - 1 } - } else { - rem - } - } -} +impl StepByBackImpl for StepBy { + type Item = I::Item; -#[stable(feature = "double_ended_step_by_iterator", since = "1.38.0")] -impl DoubleEndedIterator for StepBy -where - I: DoubleEndedIterator + ExactSizeIterator, -{ #[inline] - fn next_back(&mut self) -> Option { + default fn spec_next_back(&mut self) -> Option { self.iter.nth_back(self.next_back_index()) } #[inline] - fn nth_back(&mut self, n: usize) -> Option { + default fn spec_nth_back(&mut self, n: usize) -> Option { // `self.iter.nth_back(usize::MAX)` does the right thing here when `n` // is out of bounds because the length of `self.iter` does not exceed // `usize::MAX` (because `I: ExactSizeIterator`) and `nth_back` is @@ -184,7 +311,7 @@ where self.iter.nth_back(n) } - fn try_rfold(&mut self, init: Acc, mut f: F) -> R + default fn spec_try_rfold(&mut self, init: Acc, mut f: F) -> R where F: FnMut(Acc, Self::Item) -> R, R: Try, @@ -207,10 +334,10 @@ where } #[inline] - fn rfold(mut self, init: Acc, mut f: F) -> Acc + default fn spec_rfold(mut self, init: Acc, mut f: F) -> Acc where Self: Sized, - F: FnMut(Acc, Self::Item) -> Acc, + F: FnMut(Acc, I::Item) -> Acc, { #[inline] fn nth_back( @@ -230,6 +357,164 @@ where } } -// StepBy can only make the iterator shorter, so the len will still fit. -#[stable(feature = "iterator_step_by", since = "1.28.0")] -impl ExactSizeIterator for StepBy where I: ExactSizeIterator {} +macro_rules! spec_int_ranges { + ($($t:ty)*) => ($( + + const _: () = assert!(usize::BITS >= <$t>::BITS); + + impl SpecRangeSetup> for Range<$t> { + #[inline] + fn setup(mut r: Range<$t>, step: usize) -> Range<$t> { + let inner_len = r.size_hint().0; + // If step exceeds $t::MAX, then the count will be at most 1 and + // thus always fit into $t. + let yield_count = inner_len.div_ceil(step); + // Turn the range end into an iteration counter + r.end = yield_count as $t; + r + } + } + + impl StepByImpl> for StepBy> { + #[inline] + fn spec_next(&mut self) -> Option<$t> { + // if a step size larger than the type has been specified fall back to + // t::MAX, in which case remaining will be at most 1. + // The `+ 1` can't overflow since the constructor substracted 1 from the original value. + let step = <$t>::try_from(self.step + 1).unwrap_or(<$t>::MAX); + let remaining = self.iter.end; + if remaining > 0 { + let val = self.iter.start; + // this can only overflow during the last step, after which the value + // will not be used + self.iter.start = val.wrapping_add(step); + self.iter.end = remaining - 1; + Some(val) + } else { + None + } + } + + fn spec_size_hint(&self) -> (usize, Option) { + let remaining = self.iter.end as usize; + (remaining, Some(remaining)) + } + + // The methods below are all copied from the Iterator trait default impls. + // We have to repeat them here so that the specialization overrides the StepByImpl defaults + + fn spec_nth(&mut self, n: usize) -> Option { + self.advance_by(n).ok()?; + self.next() + } + + fn spec_try_fold(&mut self, init: Acc, mut f: F) -> R + where + F: FnMut(Acc, Self::Item) -> R, + R: Try + { + let mut accum = init; + while let Some(x) = self.next() { + accum = f(accum, x)?; + } + try { accum } + } + + #[inline] + fn spec_fold(self, init: Acc, mut f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc + { + // if a step size larger than the type has been specified fall back to + // t::MAX, in which case remaining will be at most 1. + let step = <$t>::try_from(self.step + 1).unwrap_or(<$t>::MAX); + let remaining = self.iter.end; + let mut acc = init; + let mut val = self.iter.start; + for _ in 0..remaining { + acc = f(acc, val); + // this can only overflow during the last step, after which the value + // will no longer be used + val = val.wrapping_add(step); + } + acc + } + } + )*) +} + +macro_rules! spec_int_ranges_r { + ($($t:ty)*) => ($( + const _: () = assert!(usize::BITS >= <$t>::BITS); + + impl StepByBackImpl> for StepBy> { + + fn spec_next_back(&mut self) -> Option + where Range<$t>: DoubleEndedIterator + ExactSizeIterator, + { + let step = (self.step + 1) as $t; + let remaining = self.iter.end; + if remaining > 0 { + let start = self.iter.start; + self.iter.end = remaining - 1; + Some(start + step * (remaining - 1)) + } else { + None + } + } + + // The methods below are all copied from the Iterator trait default impls. + // We have to repeat them here so that the specialization overrides the StepByImplBack defaults + + fn spec_nth_back(&mut self, n: usize) -> Option + where Self: DoubleEndedIterator, + { + if self.advance_back_by(n).is_err() { + return None; + } + self.next_back() + } + + fn spec_try_rfold(&mut self, init: Acc, mut f: F) -> R + where + Self: DoubleEndedIterator, + F: FnMut(Acc, Self::Item) -> R, + R: Try + { + let mut accum = init; + while let Some(x) = self.next_back() { + accum = f(accum, x)?; + } + try { accum } + } + + fn spec_rfold(mut self, init: Acc, mut f: F) -> Acc + where + Self: DoubleEndedIterator, + F: FnMut(Acc, Self::Item) -> Acc + { + let mut accum = init; + while let Some(x) = self.next_back() { + accum = f(accum, x); + } + accum + } + } + )*) +} + +#[cfg(target_pointer_width = "64")] +spec_int_ranges!(u8 u16 u32 u64 usize); +// DoubleEndedIterator requires ExactSizeIterator, which isn't implemented for Range +#[cfg(target_pointer_width = "64")] +spec_int_ranges_r!(u8 u16 u32 usize); + +#[cfg(target_pointer_width = "32")] +spec_int_ranges!(u8 u16 u32 usize); +#[cfg(target_pointer_width = "32")] +spec_int_ranges_r!(u8 u16 u32 usize); + +#[cfg(target_pointer_width = "16")] +spec_int_ranges!(u8 u16 usize); +#[cfg(target_pointer_width = "16")] +spec_int_ranges_r!(u8 u16 usize); diff --git a/library/core/tests/iter/adapters/step_by.rs b/library/core/tests/iter/adapters/step_by.rs index 94f2fa8c25e2d..4c5b1dd9a6bd1 100644 --- a/library/core/tests/iter/adapters/step_by.rs +++ b/library/core/tests/iter/adapters/step_by.rs @@ -244,3 +244,58 @@ fn test_step_by_skip() { assert_eq!((0..=50).step_by(10).nth(3), Some(30)); assert_eq!((200..=255u8).step_by(10).nth(3), Some(230)); } + + +struct DeOpt(I); + +impl Iterator for DeOpt { + type Item = I::Item; + + fn next(&mut self) -> core::option::Option { + self.0.next() + } +} + +impl DoubleEndedIterator for DeOpt { + fn next_back(&mut self) -> core::option::Option { + self.0.next_back() + } +} + +#[test] +fn test_step_by_fold_range_specialization() { + macro_rules! t { + ($range:expr, $var: ident, $body:tt) => { + { + // run the same tests for the non-optimized version + let mut $var = DeOpt($range); + $body + } + { + let mut $var = $range; + $body + } + } + } + + t!((1usize..5).step_by(1), r, { + assert_eq!(r.next_back(), Some(4)); + assert_eq!(r.sum::(), 6); + }); + + t!((0usize..4).step_by(2), r, { + assert_eq!(r.next(), Some(0)); + assert_eq!(r.sum::(), 2); + }); + + + t!((0usize..5).step_by(2), r, { + assert_eq!(r.next(), Some(0)); + assert_eq!(r.sum::(), 6); + }); + + t!((usize::MAX - 6 .. usize::MAX).step_by(5), r, { + assert_eq!(r.next(), Some(usize::MAX - 6)); + assert_eq!(r.sum::(), usize::MAX - 1); + }); +}