From 98aed87aaa66bd593152ca2414493f51a9ea2108 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 22 Apr 2022 19:29:11 -0700 Subject: [PATCH 1/2] Read/Write nested dictionaries under FixedSizeList in IPC --- arrow/src/ipc/reader.rs | 39 +++++++++++++++++++++++++++++++++++++++ arrow/src/ipc/writer.rs | 16 ++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs index 33d6085764be..a3ccf902c624 100644 --- a/arrow/src/ipc/reader.rs +++ b/arrow/src/ipc/reader.rs @@ -1573,4 +1573,43 @@ mod tests { offsets, ); } + + #[test] + fn test_roundtrip_stream_dict_of_fixed_size_list_of_dict() { + let values = StringArray::from(vec![Some("a"), None, Some("c"), None]); + let keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3, 1, 2]); + let dict_array = DictionaryArray::::try_new(&keys, &values).unwrap(); + let dict_data = dict_array.data(); + + let list_data_type = DataType::FixedSizeList( + Box::new(Field::new_dict( + "item", + DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)), + true, + 1, + false, + )), + 3, + ); + let list_data = ArrayData::builder(list_data_type.clone()) + .len(3) + .add_child_data(dict_data.clone()) + .build() + .unwrap(); + let list_array = FixedSizeListArray::from(list_data); + + let keys_for_dict = Int8Array::from_iter_values([0, 1, 0, 1, 1, 2, 0, 1, 2]); + let dict_dict_array = + DictionaryArray::::try_new(&keys_for_dict, &list_array).unwrap(); + + let schema = Arc::new(Schema::new(vec![Field::new( + "f1", + dict_dict_array.data_type().clone(), + false, + )])); + let input_batch = + RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap(); + let output_batch = roundtrip_ipc_stream(&input_batch); + assert_eq!(input_batch, output_batch); + } } diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs index 1f73d16d2b74..efc878a12109 100644 --- a/arrow/src/ipc/writer.rs +++ b/arrow/src/ipc/writer.rs @@ -27,7 +27,7 @@ use flatbuffers::FlatBufferBuilder; use crate::array::{ as_large_list_array, as_list_array, as_map_array, as_struct_array, as_union_array, - make_array, Array, ArrayData, ArrayRef, + make_array, Array, ArrayData, ArrayRef, FixedSizeListArray, }; use crate::buffer::{Buffer, MutableBuffer}; use crate::datatypes::*; @@ -147,7 +147,6 @@ impl IpcDataGenerator { dictionary_tracker: &mut DictionaryTracker, write_options: &IpcWriteOptions, ) -> Result<()> { - // TODO: Handle other nested types (FixedSizeList) match column.data_type() { DataType::Struct(fields) => { let s = as_struct_array(column); @@ -181,6 +180,19 @@ impl IpcDataGenerator { write_options, )?; } + DataType::FixedSizeList(field, _) => { + let list = column + .as_any() + .downcast_ref::() + .expect("Unable to downcast to fixed size list array"); + self.encode_dictionaries( + field, + &list.values(), + encoded_dictionaries, + dictionary_tracker, + write_options, + )?; + } DataType::Map(field, _) => { let map_array = as_map_array(column); From f645b649944d22ac07b8b2b1676bd457b1914cef Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 22 Apr 2022 19:43:47 -0700 Subject: [PATCH 2/2] Fix clippy --- arrow/src/ipc/reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs index a3ccf902c624..8a26167dbae8 100644 --- a/arrow/src/ipc/reader.rs +++ b/arrow/src/ipc/reader.rs @@ -1591,7 +1591,7 @@ mod tests { )), 3, ); - let list_data = ArrayData::builder(list_data_type.clone()) + let list_data = ArrayData::builder(list_data_type) .len(3) .add_child_data(dict_data.clone()) .build()