Skip to content

Commit

Permalink
ARROW-10826: [Rust] Add support for FixedSizeBinaryArray to MutableAr…
Browse files Browse the repository at this point in the history
…rayData

Closes #8852 from jorgecarleitao/mutable_fixed_binary

Authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Signed-off-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
  • Loading branch information
jorgecarleitao committed Dec 8, 2020
1 parent 6ba022f commit db94f24
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 6 deletions.
15 changes: 15 additions & 0 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,21 @@ impl FixedSizeBinaryArray {
}
}

impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray {
fn from(data: Vec<Vec<u8>>) -> Self {
let len = data.len();
assert!(len > 0);
let size = data[0].len();
assert!(data.iter().all(|item| item.len() == size));
let data = data.into_iter().flatten().collect::<Vec<_>>();
let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as i32))
.len(len)
.add_buffer(Buffer::from(&data))
.build();
FixedSizeBinaryArray::from(array_data)
}
}

impl From<ArrayDataRef> for FixedSizeBinaryArray {
fn from(data: ArrayDataRef) -> Self {
assert_eq!(
Expand Down
65 changes: 65 additions & 0 deletions rust/arrow/src/array/transform/fixed_binary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::{array::ArrayData, datatypes::DataType};

use super::{Extend, _MutableArrayData};

pub(super) fn build_extend(array: &ArrayData) -> Extend {
let size = match array.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};

let values = &array.buffers()[0].data()[array.offset() * size..];
if array.null_count() == 0 {
// fast case where we can copy regions without null issues
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
let buffer = &mut mutable.buffers[0];
buffer.extend_from_slice(&values[start * size..(start + len) * size]);
},
)
} else {
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
// nulls present: append item by item, ignoring null entries
let values_buffer = &mut mutable.buffers[0];

(start..start + len).for_each(|i| {
if array.is_valid(i) {
// append value
let bytes = &values[start * size..(start + len) * size];
values_buffer.extend_from_slice(bytes);
} else {
values_buffer.extend(size);
}
})
},
)
}
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
let size = match mutable.data_type {
DataType::FixedSizeBinary(i) => i as usize,
_ => unreachable!(),
};

let values_buffer = &mut mutable.buffers[0];
values_buffer.extend(len * size);
}
37 changes: 31 additions & 6 deletions rust/arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util};
use super::{ArrayData, ArrayDataRef};

mod boolean;
mod fixed_binary;
mod list;
mod primitive;
mod structure;
Expand Down Expand Up @@ -219,10 +220,10 @@ fn build_extend(array: &ArrayData) -> Extend {
_ => unreachable!(),
},
DataType::Struct(_) => structure::build_extend(array),
DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
DataType::Float16 => unreachable!(),
/*
DataType::Null => {}
DataType::FixedSizeBinary(_) => {}
DataType::FixedSizeList(_, _) => {}
DataType::Struct(_) => {}
DataType::Union(_) => {}
Expand Down Expand Up @@ -269,11 +270,10 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
_ => unreachable!(),
},
DataType::Struct(_) => structure::extend_nulls,
//DataType::Struct(_) => structure::build_extend(array),
DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls,
DataType::Float16 => unreachable!(),
/*
DataType::Null => {}
DataType::FixedSizeBinary(_) => {}
DataType::FixedSizeList(_, _) => {}
DataType::Union(_) => {}
*/
Expand Down Expand Up @@ -351,6 +351,9 @@ impl<'a> MutableArrayData<'a> {
buffer.extend_from_slice(&[0i64].to_byte_slice());
vec![buffer]
}
DataType::FixedSizeBinary(size) => {
vec![MutableBuffer::new(capacity * *size as usize)]
}
DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
DataType::UInt8 => vec![MutableBuffer::new(capacity * size_of::<u8>())],
DataType::UInt16 => vec![MutableBuffer::new(capacity * size_of::<u16>())],
Expand Down Expand Up @@ -484,9 +487,10 @@ mod tests {
use super::*;

use crate::array::{
Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, Int16Array,
Int16Type, Int32Array, Int64Builder, ListBuilder, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, StructArray, UInt8Array,
Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray,
FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Builder,
ListBuilder, PrimitiveBuilder, StringArray, StringDictionaryBuilder, StructArray,
UInt8Array,
};
use crate::{array::ListArray, error::Result};

Expand Down Expand Up @@ -842,4 +846,25 @@ mod tests {
.unwrap();
assert_eq!(array, expected)
}

#[test]
fn test_binary_fixed_sized_offsets() {
let array =
FixedSizeBinaryArray::from(vec![vec![0, 0], vec![0, 1], vec![0, 2]]).data();
let array = array.slice(1, 2);
// = [[0, 1], [0, 2]] due to the offset = 1

let arrays = vec![&array];

let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 2);
mutable.extend(0, 0, 1);

let result = mutable.freeze();
let result = FixedSizeBinaryArray::from(Arc::new(result));

let expected = FixedSizeBinaryArray::from(vec![vec![0, 2], vec![0, 1]]);
assert_eq!(result, expected);
}
}

0 comments on commit db94f24

Please sign in to comment.