Skip to content

Commit

Permalink
Auto merge of rust-lang#83515 - tamird:string-remove-matches-rev, r=m…
Browse files Browse the repository at this point in the history
…-ou-se

String::remove_matches O(n^2) -> O(n)

Copy only non-matching bytes. Replace collection of matches into a
vector with iteration over rejections, exploiting the guarantee that we
mutate parts of the haystack that have already been searched over.

r? `@joshtriplett`
  • Loading branch information
bors committed Jun 8, 2021
2 parents e4a6032 + 977903b commit dda4a88
Showing 1 changed file with 39 additions and 22 deletions.
61 changes: 39 additions & 22 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ use core::fmt;
use core::hash;
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::iter::FusedIterator;
use core::iter::{from_fn, FusedIterator};
#[cfg(not(no_global_oom_handling))]
use core::ops::Add;
#[cfg(not(no_global_oom_handling))]
Expand Down Expand Up @@ -1290,32 +1290,49 @@ impl String {
{
use core::str::pattern::Searcher;

let matches = {
let rejections = {
let mut searcher = pat.into_searcher(self);
let mut matches = Vec::new();

while let Some(m) = searcher.next_match() {
matches.push(m);
}

matches
// Per Searcher::next:
//
// A Match result needs to contain the whole matched pattern,
// however Reject results may be split up into arbitrary many
// adjacent fragments. Both ranges may have zero length.
//
// In practice the implementation of Searcher::next_match tends to
// be more efficient, so we use it here and do some work to invert
// matches into rejections since that's what we want to copy below.
let mut front = 0;
let rejections: Vec<_> = from_fn(|| {
let (start, end) = searcher.next_match()?;
let prev_front = front;
front = end;
Some((prev_front, start))
})
.collect();
rejections.into_iter().chain(core::iter::once((front, self.len())))
};

let len = self.len();
let mut shrunk_by = 0;
let mut len = 0;
let ptr = self.vec.as_mut_ptr();

for (start, end) in rejections {
let count = end - start;
if start != len {
// SAFETY: per Searcher::next:
//
// The stream of Match and Reject values up to a Done will
// contain index ranges that are adjacent, non-overlapping,
// covering the whole haystack, and laying on utf8
// boundaries.
unsafe {
ptr::copy(ptr.add(start), ptr.add(len), count);
}
}
len += count;
}

// SAFETY: start and end will be on utf8 byte boundaries per
// the Searcher docs
unsafe {
for (start, end) in matches {
ptr::copy(
self.vec.as_mut_ptr().add(end - shrunk_by),
self.vec.as_mut_ptr().add(start - shrunk_by),
len - end,
);
shrunk_by += end - start;
}
self.vec.set_len(len - shrunk_by);
self.vec.set_len(len);
}
}

Expand Down

0 comments on commit dda4a88

Please sign in to comment.