Skip to content

Commit

Permalink
Fix FFI and add support for Struct type (#287)
Browse files Browse the repository at this point in the history
* fix: support nested types in FFI

Ported from https://github.com/jorgecarleitao/arrow2

Fix #20
Fix #251

Signed-off-by: roee88 <roee88@gmail.com>

* Removed Clone from FFI_ArrowArray

Signed-off-by: roee88 <roee88@gmail.com>

* Add nesting to FFI struct test

Signed-off-by: roee88 <roee88@gmail.com>
  • Loading branch information
roee88 authored May 17, 2021
1 parent de44c8c commit c863a2c
Show file tree
Hide file tree
Showing 2 changed files with 387 additions and 341 deletions.
122 changes: 42 additions & 80 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,111 +22,40 @@ use std::convert::TryFrom;
use crate::{
error::{ArrowError, Result},
ffi,
ffi::ArrowArrayRef,
};

use super::ArrayData;
use crate::datatypes::DataType;
use crate::ffi::ArrowArray;

impl TryFrom<ffi::ArrowArray> for ArrayData {
type Error = ArrowError;

fn try_from(value: ffi::ArrowArray) -> Result<Self> {
let child_data = value.children()?;

let child_type = if !child_data.is_empty() {
Some(child_data[0].data_type().clone())
} else {
None
};

let data_type = value.data_type(child_type)?;

let len = value.len();
let offset = value.offset();
let null_count = value.null_count();
let buffers = value.buffers()?;
let null_bit_buffer = value.null_bit_buffer();

Ok(ArrayData::new(
data_type,
len,
Some(null_count),
null_bit_buffer,
offset,
buffers,
child_data,
))
value.to_data()
}
}

impl TryFrom<ArrayData> for ffi::ArrowArray {
type Error = ArrowError;

fn try_from(value: ArrayData) -> Result<Self> {
// If parent is nullable, then children also must be nullable
// so we pass this nullable to the creation of hte child data
let nullable = match value.data_type() {
DataType::List(field) => field.is_nullable(),
DataType::LargeList(field) => field.is_nullable(),
_ => false,
};

let len = value.len();
let offset = value.offset() as usize;
let null_count = value.null_count();
let buffers = value.buffers().to_vec();
let null_buffer = value.null_buffer().cloned();
let child_data = value
.child_data()
.iter()
.map(|arr| {
let len = arr.len();
let offset = arr.offset() as usize;
let null_count = arr.null_count();
let buffers = arr.buffers().to_vec();
let null_buffer = arr.null_buffer().cloned();

// Note: the nullable comes from the parent data.
unsafe {
ArrowArray::try_new(
arr.data_type(),
len,
null_count,
null_buffer,
offset,
buffers,
vec![],
nullable,
)
.expect("infallible")
}
})
.collect::<Vec<_>>();

unsafe {
ffi::ArrowArray::try_new(
value.data_type(),
len,
null_count,
null_buffer,
offset,
buffers,
child_data,
nullable,
)
}
unsafe { ffi::ArrowArray::try_new(value) }
}
}

#[cfg(test)]
mod tests {
use crate::error::Result;
use crate::{
array::{Array, ArrayData, Int64Array, UInt32Array, UInt64Array},
array::{
Array, ArrayData, BooleanArray, Int64Array, StructArray, UInt32Array,
UInt64Array,
},
datatypes::{DataType, Field},
ffi::ArrowArray,
};
use std::convert::TryFrom;
use std::sync::Arc;

fn test_round_trip(expected: &ArrayData) -> Result<()> {
// create a `ArrowArray` from the data.
Expand Down Expand Up @@ -165,4 +94,37 @@ mod tests {
let data = array.data();
test_round_trip(data)
}

#[test]
fn test_struct() -> Result<()> {
let inner = StructArray::from(vec![
(
Field::new("a1", DataType::Boolean, false),
Arc::new(BooleanArray::from(vec![true, true, false, false]))
as Arc<dyn Array>,
),
(
Field::new("a2", DataType::UInt32, false),
Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
),
]);

let array = StructArray::from(vec![
(
Field::new("a", inner.data_type().clone(), false),
Arc::new(inner) as Arc<dyn Array>,
),
(
Field::new("b", DataType::Boolean, false),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
Field::new("c", DataType::UInt32, false),
Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
),
]);
let data = array.data();
test_round_trip(data)
}
}
Loading

0 comments on commit c863a2c

Please sign in to comment.