Skip to content

Commit

Permalink
Try suggested approach.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Feb 14, 2022
1 parent acc547a commit 7f59d2e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 121 deletions.
41 changes: 27 additions & 14 deletions arrow/src/array/array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use std::iter::IntoIterator;
use std::{convert::From, iter::FromIterator};

use super::{
make_array, Array, ArrayData, ArrayRef, DictionaryIter, PrimitiveArray,
PrimitiveBuilder, StringArray, StringBuilder, StringDictionaryBuilder,
make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder,
StringArray, StringBuilder, StringDictionaryBuilder,
};
use crate::datatypes::ArrowNativeType;
use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType};
Expand Down Expand Up @@ -105,10 +105,17 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
self.keys.is_empty()
}

// Currently exists for compatibility purposes with Arrow IPC.
/// Currently exists for compatibility purposes with Arrow IPC.
pub fn is_ordered(&self) -> bool {
self.is_ordered
}

/// Return an iterator over the keys (indexes into the dictionary)
pub fn keys_iter(&self) -> impl Iterator<Item = Option<usize>> + '_ {
self.keys
.iter()
.map(|key| key.map(|k| k.to_usize().expect("Dictionary index not usize")))
}
}

/// Constructs a `DictionaryArray` from an array data reference.
Expand Down Expand Up @@ -252,17 +259,11 @@ impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
}
}

impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
/// constructs a new iterator
pub fn iter<T: ArrowPrimitiveType>(&'a self) -> DictionaryIter<'a, K, T> {
DictionaryIter::<'a, K, T>::new(self)
}
}

#[cfg(test)]
mod tests {
use super::*;

use crate::array::Int8Array;
use crate::{
array::Int16Array,
datatypes::{Int32Type, Int8Type, UInt32Type, UInt8Type},
Expand Down Expand Up @@ -449,15 +450,27 @@ mod tests {
let value_type = DataType::Int8;
let dict_data_type =
DataType::Dictionary(Box::new(key_type), Box::new(value_type));
let dict_data = ArrayData::builder(dict_data_type.clone())
let dict_data = ArrayData::builder(dict_data_type)
.len(3)
.add_buffer(keys.clone())
.add_child_data(value_data.clone())
.add_buffer(keys)
.add_child_data(value_data)
.build()
.unwrap();
let dict_array = Int16DictionaryArray::from(dict_data);

let mut iter = dict_array.iter::<Int8Type>();
let mut key_iter = dict_array.keys_iter();
assert_eq!(2, key_iter.next().unwrap().unwrap());
assert_eq!(3, key_iter.next().unwrap().unwrap());
assert_eq!(4, key_iter.next().unwrap().unwrap());
assert!(key_iter.next().is_none());

let mut iter = dict_array
.values()
.as_any()
.downcast_ref::<Int8Array>()
.unwrap()
.take_iter(dict_array.keys_iter());

assert_eq!(12, iter.next().unwrap().unwrap());
assert_eq!(13, iter.next().unwrap().unwrap());
assert_eq!(14, iter.next().unwrap().unwrap());
Expand Down
10 changes: 10 additions & 0 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,16 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
};
PrimitiveArray::from(data)
}

/// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
pub fn take_iter<'a>(
&'a self,
indexes: impl Iterator<Item = Option<usize>> + 'a,
) -> impl Iterator<Item = Option<T::Native>> + 'a {
indexes.map(|opt_index| {
opt_index.map(|index| unsafe { self.value_unchecked(index) })
})
}
}

impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
Expand Down
111 changes: 4 additions & 107 deletions arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.

use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType};
use crate::datatypes::ArrowPrimitiveType;

use super::{
Array, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, DictionaryArray,
GenericBinaryArray, GenericListArray, GenericStringArray, OffsetSizeTrait,
PrimitiveArray, StringOffsetSizeTrait,
Array, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray,
GenericListArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
StringOffsetSizeTrait,
};

/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
Expand Down Expand Up @@ -403,109 +403,6 @@ impl<'a, S: OffsetSizeTrait> std::iter::ExactSizeIterator
{
}

/// an iterator that returns Some(T) or None, that can be used on any DictionaryArray
// Note: This implementation is based on std's [Vec]s' [IntoIter].
#[derive(Debug)]
pub struct DictionaryIter<'a, K: ArrowPrimitiveType, T: ArrowPrimitiveType> {
array: &'a DictionaryArray<K>,
values: &'a PrimitiveArray<T>,
current: usize,
current_end: usize,
}

impl<'a, K: ArrowPrimitiveType, T: ArrowPrimitiveType> DictionaryIter<'a, K, T> {
/// create a new iterator
pub fn new(array: &'a DictionaryArray<K>) -> Self {
DictionaryIter::<K, T> {
array,
values: array
.values()
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.unwrap(),
current: 0,
current_end: array.len(),
}
}
}

impl<'a, K: ArrowPrimitiveType, T: ArrowPrimitiveType> std::iter::Iterator
for DictionaryIter<'a, K, T>
{
type Item = Option<T::Native>;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.current == self.current_end {
None
} else if self.array.keys().is_null(self.current) {
self.current += 1;
Some(None)
} else {
let old = self.current;
self.current += 1;
// Safety:
// we just checked bounds in `self.current_end == self.current`
// this is safe on the premise that this struct is initialized with
// current = array.len()
// and that current_end is ever only decremented

unsafe {
let key = self
.array
.keys()
.value_unchecked(old)
.to_usize()
.expect("Dictionary index not usize");
Some(Some(self.values.value_unchecked(key)))
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
(
self.array.len() - self.current,
Some(self.array.len() - self.current),
)
}
}

impl<'a, K: ArrowPrimitiveType, T: ArrowPrimitiveType> std::iter::DoubleEndedIterator
for DictionaryIter<'a, K, T>
{
fn next_back(&mut self) -> Option<Self::Item> {
if self.current_end == self.current {
None
} else {
self.current_end -= 1;
Some(if self.array.keys().is_null(self.current_end) {
None
} else {
// Safety:
// we just checked bounds in `self.current_end == self.current`
// this is safe on the premise that this struct is initialized with
// current = array.len()
// and that current_end is ever only decremented
unsafe {
let key = self
.array
.keys()
.value_unchecked(self.current_end)
.to_usize()
.expect("Dictionary index not usize");
Some(self.values.value_unchecked(key))
}
})
}
}
}

/// all arrays have known size.
impl<'a, K: ArrowPrimitiveType, T: ArrowPrimitiveType> std::iter::ExactSizeIterator
for DictionaryIter<'a, K, T>
{
}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand Down

0 comments on commit 7f59d2e

Please sign in to comment.