Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid using Buffer api that accidentally copies data #6039

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,13 @@ mod tests {
let value_data = ArrayData::builder(DataType::Int8)
.len(8)
.add_buffer(Buffer::from(
&[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
let keys = Buffer::from(&[2_i16, 3, 4].to_byte_slice());
let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());

// Construct a dictionary array from the above two
let key_type = DataType::Int16;
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ mod tests {
assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");

// Valid as nulls in child masked by parent
let nulls = NullBuffer::new(BooleanBuffer::new(vec![0b0000101].into(), 0, 3));
let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3));
FixedSizeListArray::new(field, 2, values.clone(), Some(nulls));

let field = Arc::new(Field::new("item", DataType::Int64, true));
Expand Down
18 changes: 9 additions & 9 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,20 +448,20 @@ mod tests {
// Construct key and values
let keys_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let values_data = ArrayData::builder(DataType::UInt32)
.len(8)
.add_buffer(Buffer::from(
&[0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
[0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
Expand Down Expand Up @@ -493,21 +493,21 @@ mod tests {
// Construct key and values
let key_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let value_data = ArrayData::builder(DataType::UInt32)
.len(8)
.add_buffer(Buffer::from(
&[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
))
.null_bit_buffer(Some(Buffer::from(&[0b11010110])))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
Expand Down Expand Up @@ -617,18 +617,18 @@ mod tests {
// Construct key and values
let keys_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from(&[3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let values_data = ArrayData::builder(DataType::UInt32)
.len(5)
.add_buffer(Buffer::from(&[30u32, 40, 50, 60, 70].to_byte_slice()))
.add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 5].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());

let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,15 +549,15 @@ mod tests {
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build()
.unwrap();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[11_u8])))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
.build()
.unwrap();

Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ mod tests {

#[test]
fn test_access_concurrently() {
let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
let buffer = Buffer::from([1, 2, 3, 4, 5]);
let buffer2 = buffer.clone();
assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());

Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ mod tests {
const ALLOC_SIZE: usize = 4 * 1024;
let input = vec![0xFF_u8; ALLOC_SIZE];

let buffer: Buffer = Buffer::from(input);
let buffer: Buffer = Buffer::from_vec(input);

let bitchunks = buffer.bit_chunks(57, ALLOC_SIZE * 8 - 57);

Expand Down
8 changes: 5 additions & 3 deletions arrow-cast/src/base64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
//! [`StringArray`]: arrow_array::StringArray

use arrow_array::{Array, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_buffer::{Buffer, OffsetBuffer};
use arrow_schema::ArrowError;
use base64::encoded_len;
use base64::engine::Config;
Expand Down Expand Up @@ -50,7 +50,9 @@ pub fn b64_encode<E: Engine, O: OffsetSizeTrait>(
assert_eq!(offset, buffer_len);

// Safety: Base64 is valid UTF-8
unsafe { GenericStringArray::new_unchecked(offsets, buffer.into(), array.nulls().cloned()) }
unsafe {
GenericStringArray::new_unchecked(offsets, Buffer::from_vec(buffer), array.nulls().cloned())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another copy

}
}

/// Base64 decode each element of `array` with the provided [`Engine`]
Expand Down Expand Up @@ -79,7 +81,7 @@ pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(

Ok(GenericBinaryArray::new(
offsets,
buffer.into(),
Buffer::from_vec(buffer),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Saved a copy here

array.nulls().cloned(),
))
}
Expand Down
4 changes: 2 additions & 2 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1959,7 +1959,7 @@ mod tests {
.len(20)
.offset(5)
.add_buffer(b1)
.null_bit_buffer(Some(Buffer::from(vec![
.null_bit_buffer(Some(Buffer::from([
0b01011111, 0b10110101, 0b01100011, 0b00011110,
])))
.build()
Expand Down Expand Up @@ -2164,7 +2164,7 @@ mod tests {

#[test]
fn test_count_nulls() {
let buffer = Buffer::from(vec![0b00010110, 0b10011111]);
let buffer = Buffer::from([0b00010110, 0b10011111]);
let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
let count = count_nulls(Some(&buffer), 0, 16);
assert_eq!(count, 7);
Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub fn flight_data_to_arrow_batch(
})
.map(|batch| {
reader::read_record_batch(
&Buffer::from(&data.data_body),
&Buffer::from(data.data_body.as_ref()),
batch,
schema,
dictionaries_by_id,
Expand Down
6 changes: 3 additions & 3 deletions arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ pub fn array_from_json(
let list_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand All @@ -720,7 +720,7 @@ pub fn array_from_json(
let list_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand Down Expand Up @@ -839,7 +839,7 @@ pub fn array_from_json(
.collect();
let array_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ async fn receive_batch_flight_data(

while message.header_type() == ipc::MessageHeader::DictionaryBatch {
reader::read_dictionary(
&Buffer::from(&data.data_body),
&Buffer::from(data.data_body.as_ref()),
message
.header_as_dictionary_batch()
.expect("Error parsing dictionary"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ async fn save_uploaded_chunks(

let batch = record_batch_from_message(
message,
&Buffer::from(data.data_body),
&Buffer::from(data.data_body.as_ref()),
schema_ref.clone(),
&dictionaries_by_id,
)
Expand All @@ -375,7 +375,7 @@ async fn save_uploaded_chunks(
ipc::MessageHeader::DictionaryBatch => {
dictionary_from_message(
message,
&Buffer::from(data.data_body),
&Buffer::from(data.data_body.as_ref()),
schema_ref.clone(),
&mut dictionaries_by_id,
)
Expand Down
4 changes: 2 additions & 2 deletions arrow-ipc/src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ impl CompressionCodec {
} else if let Ok(decompressed_length) = usize::try_from(decompressed_length) {
// decompress data using the codec
let input_data = &input[(LENGTH_OF_PREFIX_DATA as usize)..];
self.decompress(input_data, decompressed_length as _)?
.into()
let v = self.decompress(input_data, decompressed_length as _)?;
Buffer::from_vec(v)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy!

} else {
return Err(ArrowError::IpcError(format!(
"Invalid uncompressed length: {decompressed_length}"
Expand Down
6 changes: 3 additions & 3 deletions arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ mod tests {
let c = ArrayDataBuilder::new(c_field.data_type().clone())
.len(7)
.add_child_data(d.to_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00111011])))
.null_bit_buffer(Some(Buffer::from([0b00111011])))
.build()
.unwrap();
let b = BooleanArray::from(vec![
Expand All @@ -1866,14 +1866,14 @@ mod tests {
.len(7)
.add_child_data(b.to_data())
.add_child_data(c.clone())
.null_bit_buffer(Some(Buffer::from(vec![0b00111111])))
.null_bit_buffer(Some(Buffer::from([0b00111111])))
.build()
.unwrap();
let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
.len(6)
.add_buffer(Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7]))
.add_child_data(a)
.null_bit_buffer(Some(Buffer::from(vec![0b00110111])))
.null_bit_buffer(Some(Buffer::from([0b00110111])))
.build()
.unwrap();
let expected = make_array(a_list);
Expand Down
18 changes: 9 additions & 9 deletions arrow-json/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -927,12 +927,12 @@ mod tests {

let a_values = StringArray::from(vec!["a", "a1", "b", "c", "d", "e"]);
// list column rows: ["a", "a1"], ["b"], ["c"], ["d"], ["e"]
let a_value_offsets = Buffer::from(&[0, 2, 3, 4, 5, 6].to_byte_slice());
let a_value_offsets = Buffer::from([0, 2, 3, 4, 5, 6].to_byte_slice());
let a_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.into_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00011111])))
.null_bit_buffer(Some(Buffer::from([0b00011111])))
.build()
.unwrap();
let a = ListArray::from(a_list_data);
Expand Down Expand Up @@ -976,17 +976,17 @@ mod tests {
// list column rows: [[1, 2], [3]], [], [[4, 5, 6]]
let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);

let a_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice());
let a_value_offsets = Buffer::from([0, 2, 3, 6].to_byte_slice());
// Construct a list array from the above two
let a_list_data = ArrayData::builder(list_inner_type.data_type().clone())
.len(3)
.add_buffer(a_value_offsets)
.null_bit_buffer(Some(Buffer::from(vec![0b00000111])))
.null_bit_buffer(Some(Buffer::from([0b00000111])))
.add_child_data(a_values.into_data())
.build()
.unwrap();

let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(3)
.add_buffer(c1_value_offsets)
Expand Down Expand Up @@ -1058,12 +1058,12 @@ mod tests {
// [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}],
// null,
// [{"c11": 5, "c12": {"c121": "g"}}]
let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(3)
.add_buffer(c1_value_offsets)
.add_child_data(struct_values.into_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00000101])))
.null_bit_buffer(Some(Buffer::from([0b00000101])))
.build()
.unwrap();
let c1 = ListArray::from(c1_list_data);
Expand Down Expand Up @@ -1225,7 +1225,7 @@ mod tests {
);

// [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}, {"quux": 50}, {}]
let entry_offsets = Buffer::from(&[0, 1, 1, 1, 4, 5, 5].to_byte_slice());
let entry_offsets = Buffer::from([0, 1, 1, 1, 4, 5, 5].to_byte_slice());
let valid_buffer = Buffer::from([0b00111101]);

let map_data = ArrayData::builder(map_data_type.clone())
Expand Down Expand Up @@ -1408,7 +1408,7 @@ mod tests {
);

// [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice());
let entry_offsets = Buffer::from([0, 2, 2, 3, 3].to_byte_slice());
let data = ArrayData::builder(field.data_type().clone())
.len(4)
.add_buffer(entry_offsets)
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ mod tests {

#[test]
fn test_merge_nulls() {
let buffer = Buffer::from("helloworldbingohelloworld");
let buffer = Buffer::from(b"helloworldbingohelloworld");
let offsets = OffsetBuffer::from_lengths([5, 5, 5, 5, 5]);
let nulls = NullBuffer::from(vec![true, false, true, true, true]);
let values = StringArray::new(offsets, buffer, Some(nulls));
Expand Down
4 changes: 2 additions & 2 deletions arrow-string/src/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ mod tests {
}

fn generic_string_with_non_zero_offset<O: OffsetSizeTrait>() {
let values = "hellotherearrow";
let values = b"hellotherearrow";
let offsets = &[
O::zero(),
O::from_usize(5).unwrap(),
Expand Down Expand Up @@ -867,7 +867,7 @@ mod tests {
let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
.add_buffer(Buffer::from(values.as_bytes()))
.null_bit_buffer(Some(Buffer::from(bitmap)))
.offset(1)
.build()
Expand Down
4 changes: 2 additions & 2 deletions arrow/examples/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ fn main() {
// buffer.
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let value_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

// Construct a list array from the above two
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
Expand Down
2 changes: 1 addition & 1 deletion arrow/examples/tensor_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn main() -> Result<()> {

// In order to build a tensor from an array the function to_byte_slice add the
// required padding to the elements in the array.
let buf = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
let buf = Buffer::from([0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
let tensor = Int32Tensor::try_new(buf, Some(vec![2, 5]), None, None)?;
println!("\nInt32 Tensor");
println!("{tensor:?}");
Expand Down
Loading
Loading