Skip to content

Commit

Permalink
fix(data): map type ID to child index before indexing union child array
Browse files Browse the repository at this point in the history
  • Loading branch information
kawadakk committed Jul 31, 2023
1 parent 5a5a113 commit dcf5a1b
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions arrow-data/src/transform/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
let type_ids = array.buffer::<i8>(0);
let offsets = array.buffer::<i32>(1);

// Construct a mapping from type IDs to child indices
let arrow_schema::DataType::Union(fields, _) = array.data_type()
else { unreachable!() };

let type_id_to_usize = |x: i8| -> usize { x as i8 as usize };
let mut type_id_to_child_idx = [None::<u8>; 256];
for (i, (type_id, _)) in fields.iter().enumerate() {
type_id_to_child_idx[type_id_to_usize(type_id)] =
Some(i.try_into().expect("too many union fields"));
}

Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
// extends type_ids
Expand All @@ -48,14 +59,17 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
.extend_from_slice(&type_ids[start..start + len]);

(start..start + len).for_each(|i| {
let type_id = type_ids[i] as usize;
let type_id = type_ids[i];
let child_idx: usize = type_id_to_child_idx[type_id_to_usize(type_id)]
.expect("invalid type ID")
.into();
let src_offset = offsets[i] as usize;
let child_data = &mut mutable.child_data[type_id];
let child_data = &mut mutable.child_data[child_idx];
let dst_offset = child_data.len();

// Extend offsets
mutable.buffer2.push(dst_offset as i32);
mutable.child_data[type_id].extend(index, src_offset, src_offset + 1)
mutable.child_data[child_idx].extend(index, src_offset, src_offset + 1)
})
},
)
Expand Down

0 comments on commit dcf5a1b

Please sign in to comment.