From 2d61d67c062c98f9b2af8c1c9e5c8b467a6a5012 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 16 Sep 2021 18:22:13 +0530 Subject: [PATCH] [WIP] Add extend methods for `MutableUtf8Array` --- src/array/utf8/mutable.rs | 138 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 80e1e292ae6..7a712dcefba 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -223,7 +223,145 @@ impl> FromIterator> for MutableUtf8Array { } } +// TODO Move to tests/ +#[cfg(test)] +mod tests { + use super::*; + use crate::bitmap::Bitmap; + + #[test] + fn test_extend_trusted_len_values() { + let mut array = MutableUtf8Array::::new(); + + array.extend_trusted_len_values(["hi", "there"].iter()); + array.extend_trusted_len_values(["hello"].iter()); + + assert_eq!(array.values().as_slice(), b"hitherehello"); + assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 12]); + assert!(array.validity().is_none()); + } + + #[test] + fn test_extend_trusted_len() { + let mut array = MutableUtf8Array::::new(); + + // TODO Understand why the following is not possible + //array.extend_trusted_len([Some("hi"), Some("there")].into_iter()); + + array.extend_trusted_len(vec![Some("hi"), Some("there")].into_iter()); + array.extend_trusted_len(vec![None, Some("hello")].into_iter()); + + let array: Utf8Array = array.into(); + + assert_eq!(array.values().as_slice(), b"hitherehello"); + assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 7, 12]); + assert_eq!( + array.validity(), + &Some(Bitmap::from_u8_slice(&[0b00001011], 4)) + ); + } +} + impl MutableUtf8Array { + /// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len. + /// This differs from `extended_trusted_len` which accepts iterator of optional values. + #[inline] + pub fn extend_trusted_len_values(&mut self, iterator: I) + where + P: AsRef, + I: TrustedLen, + { + unsafe { self.extend_trusted_len_values_unchecked(iterator) } + } + + /// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len. + /// This differs from `extended_trusted_len_unchecked` which accepts iterator of optional + /// values. + /// # Safety + /// The iterator must be trusted len. + #[inline] + pub unsafe fn extend_trusted_len_values_unchecked(&mut self, iterator: I) + where + P: AsRef, + I: Iterator, + { + let (_, upper) = iterator.size_hint(); + let additional = upper.expect("extend_trusted_len_values requires an upper limit"); + + // TODO How to avoid creating vec, how to assign capacity without iterating + // the entire input iterator + let mut values = vec![]; + let mut offsets = Vec::with_capacity(additional); + + let mut length = self.last_offset(); + + for item in iterator { + let bytes = item.as_ref().as_bytes(); + + length += O::from_usize(bytes.len()).unwrap(); + + values.extend_from_slice(bytes); + offsets.push(length); + } + + self.values + .extend_from_trusted_len_iter_unchecked(values.into_iter()); + self.offsets + .extend_from_trusted_len_iter_unchecked(offsets.into_iter()); + + if let Some(validity) = self.validity.as_mut() { + validity.extend_constant(additional, true); + } + } + + /// Extends the [`MutableUtf8Array`] from an iterator of trusted len. + #[inline] + pub fn extend_trusted_len(&mut self, iterator: I) + where + P: AsRef, + I: TrustedLen>, + { + unsafe { self.extend_trusted_len_unchecked(iterator) } + } + + /// Extends [`MutableUtf8Array`] from an iterator of trusted len. + /// #Safety + /// The iterator must be trusted len. + #[inline] + pub unsafe fn extend_trusted_len_unchecked(&mut self, iterator: I) + where + P: AsRef, + I: Iterator>, + { + let (_, upper) = iterator.size_hint(); + let additional = upper.expect("extend_trusted_len_unzip requires an upper limit"); + + // TODO How to reserve space for `values` and `validity` and take advantage + // of `push_unchecked` + self.offsets.reserve(additional); + + for item in iterator { + if let Some(item) = item { + let bytes = item.as_ref().as_bytes(); + self.values.extend_from_slice(bytes); + + let size = O::from_usize(self.values.len()).unwrap(); + self.offsets.push_unchecked(size); + + match &mut self.validity { + Some(validity) => validity.push(true), + None => {} + } + } else { + self.offsets.push_unchecked(self.last_offset()); + match &mut self.validity { + Some(validity) => validity.push(false), + None => self.init_validity(), + } + }; + } + } + /// Creates a [`MutableUtf8Array`] from an iterator of trusted length. /// # Safety /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).