From dc973417a8ae30b0e1e52c341fa666090a4b045f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 26 Sep 2016 18:15:50 +0200 Subject: [PATCH 1/4] Remove duplicate test. test_dedup_shared has been exactly the same as test_dedup_unique since 6f16df4aa, three years ago. --- src/libcollectionstest/slice.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/libcollectionstest/slice.rs b/src/libcollectionstest/slice.rs index 5b341ab62d097..6dc7737bb2194 100644 --- a/src/libcollectionstest/slice.rs +++ b/src/libcollectionstest/slice.rs @@ -344,18 +344,6 @@ fn test_dedup_unique() { // and/or rt should raise errors. } -#[test] -fn test_dedup_shared() { - let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; - v0.dedup(); - let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; - v1.dedup(); - let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; - v2.dedup(); - // If the pointers were leaked or otherwise misused, valgrind and/or - // rt should raise errors. -} - #[test] fn test_retain() { let mut v = vec![1, 2, 3, 4, 5]; From f14f4db6e88f4d1788033c220d9b6b6f41420789 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 26 Sep 2016 18:17:38 +0200 Subject: [PATCH 2/4] Move Vec::dedup tests from slice.rs to vec.rs --- src/libcollectionstest/slice.rs | 29 ----------------------------- src/libcollectionstest/vec.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/libcollectionstest/slice.rs b/src/libcollectionstest/slice.rs index 6dc7737bb2194..6d11ca1fbcfc3 100644 --- a/src/libcollectionstest/slice.rs +++ b/src/libcollectionstest/slice.rs @@ -315,35 +315,6 @@ fn test_clear() { // If the unsafe block didn't drop things properly, we blow up here. } -#[test] -fn test_dedup() { - fn case(a: Vec, b: Vec) { - let mut v = a; - v.dedup(); - assert_eq!(v, b); - } - case(vec![], vec![]); - case(vec![1], vec![1]); - case(vec![1, 1], vec![1]); - case(vec![1, 2, 3], vec![1, 2, 3]); - case(vec![1, 1, 2, 3], vec![1, 2, 3]); - case(vec![1, 2, 2, 3], vec![1, 2, 3]); - case(vec![1, 2, 3, 3], vec![1, 2, 3]); - case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]); -} - -#[test] -fn test_dedup_unique() { - let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; - v0.dedup(); - let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; - v1.dedup(); - let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; - v2.dedup(); - // If the boxed pointers were leaked or otherwise misused, valgrind - // and/or rt should raise errors. -} - #[test] fn test_retain() { let mut v = vec![1, 2, 3, 4, 5]; diff --git a/src/libcollectionstest/vec.rs b/src/libcollectionstest/vec.rs index ee2b898d5c2c2..1f4ab7f51a2c7 100644 --- a/src/libcollectionstest/vec.rs +++ b/src/libcollectionstest/vec.rs @@ -213,6 +213,35 @@ fn test_retain() { assert_eq!(vec, [2, 4]); } +#[test] +fn test_dedup() { + fn case(a: Vec, b: Vec) { + let mut v = a; + v.dedup(); + assert_eq!(v, b); + } + case(vec![], vec![]); + case(vec![1], vec![1]); + case(vec![1, 1], vec![1]); + case(vec![1, 2, 3], vec![1, 2, 3]); + case(vec![1, 1, 2, 3], vec![1, 2, 3]); + case(vec![1, 2, 2, 3], vec![1, 2, 3]); + case(vec![1, 2, 3, 3], vec![1, 2, 3]); + case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]); +} + +#[test] +fn test_dedup_unique() { + let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; + v0.dedup(); + let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; + v1.dedup(); + let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; + v2.dedup(); + // If the boxed pointers were leaked or otherwise misused, valgrind + // and/or rt should raise errors. +} + #[test] fn zero_sized_values() { let mut v = Vec::new(); From be34bac1abf16f1d741c7cfe56ab04c4b9ebf48f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 26 Sep 2016 18:26:49 +0200 Subject: [PATCH 3/4] Add Vec::dedup_by and Vec::dedup_by_key --- src/libcollections/vec.rs | 47 ++++++++++++++++++++++++++++++++++- src/libcollectionstest/lib.rs | 1 + src/libcollectionstest/vec.rs | 26 +++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index f8b4a92df2c5d..efbabb5a6f4ea 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1156,7 +1156,52 @@ impl Vec { /// assert_eq!(vec, [1, 2, 3, 2]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn dedup(&mut self) { + self.dedup_by(|a, b| a == b) + } +} + +impl Vec { + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + #[inline] + pub fn dedup_by_key(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// use std::ascii::AsciiExt; + /// + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool { unsafe { // Although we have a mutable reference to `self`, we cannot make // *arbitrary* changes. The `PartialEq` comparisons could panic, so we @@ -1228,7 +1273,7 @@ impl Vec { while r < ln { let p_r = p.offset(r as isize); let p_wm1 = p.offset((w - 1) as isize); - if *p_r != *p_wm1 { + if !same_bucket(&mut *p_r, &mut *p_wm1) { if r != w { let p_w = p_wm1.offset(1); mem::swap(&mut *p_r, &mut *p_w); diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs index 878581a4f296e..d6bad46c44dfb 100644 --- a/src/libcollectionstest/lib.rs +++ b/src/libcollectionstest/lib.rs @@ -16,6 +16,7 @@ #![feature(collections)] #![feature(collections_bound)] #![feature(const_fn)] +#![feature(dedup_by)] #![feature(enumset)] #![feature(pattern)] #![feature(rand)] diff --git a/src/libcollectionstest/vec.rs b/src/libcollectionstest/vec.rs index 1f4ab7f51a2c7..8417be289eb9e 100644 --- a/src/libcollectionstest/vec.rs +++ b/src/libcollectionstest/vec.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use std::ascii::AsciiExt; use std::borrow::Cow; use std::iter::{FromIterator, repeat}; use std::mem::size_of; @@ -230,6 +231,31 @@ fn test_dedup() { case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]); } +#[test] +fn test_dedup_by_key() { + fn case(a: Vec, b: Vec) { + let mut v = a; + v.dedup_by_key(|i| *i / 10); + assert_eq!(v, b); + } + case(vec![], vec![]); + case(vec![10], vec![10]); + case(vec![10, 11], vec![10]); + case(vec![10, 20, 30], vec![10, 20, 30]); + case(vec![10, 11, 20, 30], vec![10, 20, 30]); + case(vec![10, 20, 21, 30], vec![10, 20, 30]); + case(vec![10, 20, 30, 31], vec![10, 20, 30]); + case(vec![10, 11, 20, 21, 22, 30, 31], vec![10, 20, 30]); +} + +#[test] +fn test_dedup_by() { + let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + + assert_eq!(vec, ["foo", "bar", "baz", "bar"]); +} + #[test] fn test_dedup_unique() { let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; From 401f1c45db5343dc0189b4f89d4160bba24facfb Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 26 Sep 2016 18:28:00 +0200 Subject: [PATCH 4/4] Merge two `impl Vec` blocks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The show up separately in rustdoc. This is a separate commit to keep the previous one’s diff shorter. --- src/libcollections/vec.rs | 250 +++++++++++++++++++------------------- 1 file changed, 124 insertions(+), 126 deletions(-) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index efbabb5a6f4ea..bf9c2ae49a0c5 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -782,6 +782,130 @@ impl Vec { } } + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + #[inline] + pub fn dedup_by_key(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// use std::ascii::AsciiExt; + /// + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool { + unsafe { + // Although we have a mutable reference to `self`, we cannot make + // *arbitrary* changes. The `PartialEq` comparisons could panic, so we + // must ensure that the vector is in a valid state at all time. + // + // The way that we handle this is by using swaps; we iterate + // over all the elements, swapping as we go so that at the end + // the elements we wish to keep are in the front, and those we + // wish to reject are at the back. We can then truncate the + // vector. This operation is still O(n). + // + // Example: We start in this state, where `r` represents "next + // read" and `w` represents "next_write`. + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this is not a duplicate, so + // we swap self[r] and self[w] (no effect as r==w) and then increment both + // r and w, leaving us with: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this value is a duplicate, + // so we increment `r` but leave everything else unchanged: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this is not a duplicate, + // so swap self[r] and self[w] and advance r and w: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 2 | 1 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Not a duplicate, repeat: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 2 | 3 | 1 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Duplicate, advance r. End of vec. Truncate to w. + + let ln = self.len(); + if ln <= 1 { + return; + } + + // Avoid bounds checks by using raw pointers. + let p = self.as_mut_ptr(); + let mut r: usize = 1; + let mut w: usize = 1; + + while r < ln { + let p_r = p.offset(r as isize); + let p_wm1 = p.offset((w - 1) as isize); + if !same_bucket(&mut *p_r, &mut *p_wm1) { + if r != w { + let p_w = p_wm1.offset(1); + mem::swap(&mut *p_r, &mut *p_w); + } + w += 1; + } + r += 1; + } + + self.truncate(w); + } + } + /// Appends an element to the back of a collection. /// /// # Panics @@ -1162,132 +1286,6 @@ impl Vec { } } -impl Vec { - /// Removes consecutive elements in the vector that resolve to the same key. - /// - /// If the vector is sorted, this removes all duplicates. - /// - /// # Examples - /// - /// ``` - /// #![feature(dedup_by)] - /// - /// let mut vec = vec![10, 20, 21, 30, 20]; - /// - /// vec.dedup_by_key(|i| *i / 10); - /// - /// assert_eq!(vec, [10, 20, 30, 20]); - /// ``` - #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] - #[inline] - pub fn dedup_by_key(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq { - self.dedup_by(|a, b| key(a) == key(b)) - } - - /// Removes consecutive elements in the vector that resolve to the same key. - /// - /// If the vector is sorted, this removes all duplicates. - /// - /// # Examples - /// - /// ``` - /// #![feature(dedup_by)] - /// use std::ascii::AsciiExt; - /// - /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; - /// - /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); - /// - /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); - /// ``` - #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] - pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool { - unsafe { - // Although we have a mutable reference to `self`, we cannot make - // *arbitrary* changes. The `PartialEq` comparisons could panic, so we - // must ensure that the vector is in a valid state at all time. - // - // The way that we handle this is by using swaps; we iterate - // over all the elements, swapping as we go so that at the end - // the elements we wish to keep are in the front, and those we - // wish to reject are at the back. We can then truncate the - // vector. This operation is still O(n). - // - // Example: We start in this state, where `r` represents "next - // read" and `w` represents "next_write`. - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this is not a duplicate, so - // we swap self[r] and self[w] (no effect as r==w) and then increment both - // r and w, leaving us with: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this value is a duplicate, - // so we increment `r` but leave everything else unchanged: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this is not a duplicate, - // so swap self[r] and self[w] and advance r and w: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 2 | 1 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Not a duplicate, repeat: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 2 | 3 | 1 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Duplicate, advance r. End of vec. Truncate to w. - - let ln = self.len(); - if ln <= 1 { - return; - } - - // Avoid bounds checks by using raw pointers. - let p = self.as_mut_ptr(); - let mut r: usize = 1; - let mut w: usize = 1; - - while r < ln { - let p_r = p.offset(r as isize); - let p_wm1 = p.offset((w - 1) as isize); - if !same_bucket(&mut *p_r, &mut *p_wm1) { - if r != w { - let p_w = p_wm1.offset(1); - mem::swap(&mut *p_r, &mut *p_w); - } - w += 1; - } - r += 1; - } - - self.truncate(w); - } - } -} - //////////////////////////////////////////////////////////////////////////////// // Internal methods and functions ////////////////////////////////////////////////////////////////////////////////