From a3d6ad468e083d968147857d0e4d4fc29e46ce1c Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Wed, 10 Jul 2024 17:15:34 -0400 Subject: [PATCH 1/9] add string view bench --- arrow/benches/row_format.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index b5298cbe3679..7ef4a634203e 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -24,7 +24,7 @@ use arrow::datatypes::{Int64Type, UInt64Type}; use arrow::row::{RowConverter, SortField}; use arrow::util::bench_util::{ create_boolean_array, create_dict_from_values, create_primitive_array, - create_string_array_with_len, create_string_dict_array, + create_string_array_with_len, create_string_dict_array, create_string_view_array_with_len, }; use arrow_array::types::Int32Type; use arrow_array::Array; @@ -87,6 +87,18 @@ fn row_bench(c: &mut Criterion) { let cols = vec![Arc::new(create_string_array_with_len::(4096, 0.5, 100)) as ArrayRef]; do_bench(c, "4096 string(100, 0.5)", cols); + let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 10, false)) as ArrayRef]; + do_bench(c, "4096 string view(10, 0)", cols); + + let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 30, false)) as ArrayRef]; + do_bench(c, "4096 string view(30, 0)", cols); + + let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 100, false)) as ArrayRef]; + do_bench(c, "4096 string view(100, 0)", cols); + + let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0.5, 100, false)) as ArrayRef]; + do_bench(c, "4096 string view(100, 0.5)", cols); + let cols = vec![Arc::new(create_string_dict_array::(4096, 0., 10)) as ArrayRef]; do_bench(c, "4096 string_dictionary(10, 0)", cols); From a9025b6ce4430997155144599bfab833d49d0719 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Thu, 11 Jul 2024 10:24:47 -0400 Subject: [PATCH 2/9] check in new impl --- arrow-row/src/variable.rs | 95 ++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index c5aa7d8ac323..ba0cf6ae00c2 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -20,8 +20,9 @@ use arrow_array::builder::BufferBuilder; use arrow_array::*; use arrow_buffer::bit_util::ceil; use arrow_buffer::MutableBuffer; -use arrow_data::ArrayDataBuilder; +use arrow_data::{ArrayDataBuilder, ByteView}; use arrow_schema::{DataType, SortOptions}; +use builder::make_view; /// The block size of the variable length encoding pub const BLOCK_SIZE: usize = 32; @@ -243,6 +244,74 @@ pub fn decode_binary( unsafe { GenericBinaryArray::from(builder.build_unchecked()) } } +/// Decodes a binary view array from `rows` with the provided `options` +pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray { + let len = rows.len(); + + let mut null_count = 0; + + let nulls = MutableBuffer::collect_bool(len, |x| { + let valid = rows[x][0] != null_sentinel(options); + null_count += !valid as usize; + valid + }); + + let values_capacity: usize = rows.iter().map(|row| decoded_len(row, options)).sum(); + let mut values = MutableBuffer::new(values_capacity); + let mut views = BufferBuilder::::new(len); + + for row in rows { + let start_offset = values.len(); + let offset = decode_blocks(row, options, |b| values.extend_from_slice(b)); + if row[0] == null_sentinel(options) { + debug_assert_eq!(offset, 1); + debug_assert_eq!(start_offset, values.len()); + views.append(0); + } else { + let view = make_view( + unsafe { values.get_unchecked(start_offset..) }, + 0, + start_offset as u32, + ); + views.append(view); + } + *row = &row[offset..]; + } + + if options.descending { + values.as_slice_mut().iter_mut().for_each(|o| *o = !*o); + for view in views.as_slice_mut() { + let len = *view as u32; + if len <= 12 { + let mut bytes = view.to_le_bytes(); + bytes + .iter_mut() + .skip(4) + .take(len as usize) + .for_each(|o| *o = !*o); + *view = u128::from_le_bytes(bytes); + } else { + let mut byte_view = ByteView::from(*view); + let mut prefix = byte_view.prefix.to_le_bytes(); + prefix.iter_mut().for_each(|o| *o = !*o); + byte_view.prefix = u32::from_le_bytes(prefix); + *view = byte_view.into(); + } + } + } + + let builder = ArrayDataBuilder::new(DataType::BinaryView) + .len(len) + .null_count(null_count) + .null_bit_buffer(Some(nulls.into())) + .add_buffer(views.finish()) + .add_buffer(values.into()); + + // SAFETY: + // Valid by construction above + unsafe { BinaryViewArray::from(builder.build_unchecked()) } +} + /// Decodes a string array from `rows` with the provided `options` /// /// # Safety @@ -269,16 +338,6 @@ pub unsafe fn decode_string( GenericStringArray::from(builder.build_unchecked()) } -/// Decodes a binary view array from `rows` with the provided `options` -pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray { - let decoded: GenericBinaryArray = decode_binary(rows, options); - - // Better performance might be to directly build the binary view instead of building to BinaryArray and then casting - // I suspect that the overhead is not a big deal. - // If it is, we can reimplement the `decode_binary_view` function to directly build the StringViewArray - BinaryViewArray::from(&decoded) -} - /// Decodes a string view array from `rows` with the provided `options` /// /// # Safety @@ -289,9 +348,13 @@ pub unsafe fn decode_string_view( options: SortOptions, validate_utf8: bool, ) -> StringViewArray { - let decoded: GenericStringArray = decode_string(rows, options, validate_utf8); - // Better performance might be to directly build the string view instead of building to StringArray and then casting - // I suspect that the overhead is not a big deal. - // If it is, we can reimplement the `decode_string_view` function to directly build the StringViewArray - StringViewArray::from(&decoded) + let decoded = decode_binary_view(rows, options); + return decoded.to_string_view_unchecked(); + // if !validate_utf8 { + // return decoded.to_string_view_unchecked(); + // } + + // decoded + // .to_string_view() + // .expect("Decoding string view encountered invalid utf8!") } From c20455e32631519747890021d6a8da6571ebaaad Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Thu, 11 Jul 2024 11:23:40 -0400 Subject: [PATCH 3/9] add utf8 --- arrow-row/src/variable.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index ba0cf6ae00c2..0cc89997b928 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -349,12 +349,11 @@ pub unsafe fn decode_string_view( validate_utf8: bool, ) -> StringViewArray { let decoded = decode_binary_view(rows, options); - return decoded.to_string_view_unchecked(); - // if !validate_utf8 { - // return decoded.to_string_view_unchecked(); - // } - - // decoded - // .to_string_view() - // .expect("Decoding string view encountered invalid utf8!") + if !validate_utf8 { + return decoded.to_string_view_unchecked(); + } + + decoded + .to_string_view() + .expect("Decoding string view encountered invalid utf8!") } From 6451fe0050382f47dfea4d804e2e85ddcbde8a3d Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Thu, 11 Jul 2024 11:34:00 -0400 Subject: [PATCH 4/9] quick utf8 validation --- arrow-row/src/variable.rs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 0cc89997b928..1b544fef7f43 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -244,8 +244,11 @@ pub fn decode_binary( unsafe { GenericBinaryArray::from(builder.build_unchecked()) } } -/// Decodes a binary view array from `rows` with the provided `options` -pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray { +fn decode_binary_view_inner( + rows: &mut [&[u8]], + options: SortOptions, + check_utf8: bool, +) -> BinaryViewArray { let len = rows.len(); let mut null_count = 0; @@ -300,6 +303,12 @@ pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryVie } } + if check_utf8 { + // the values contains all data, no matter if it is short or long + // we can validate utf8 in one go. + std::str::from_utf8(values.as_slice()).unwrap(); + } + let builder = ArrayDataBuilder::new(DataType::BinaryView) .len(len) .null_count(null_count) @@ -312,6 +321,11 @@ pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryVie unsafe { BinaryViewArray::from(builder.build_unchecked()) } } +/// Decodes a binary view array from `rows` with the provided `options` +pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray { + decode_binary_view_inner(rows, options, false) +} + /// Decodes a string array from `rows` with the provided `options` /// /// # Safety @@ -348,12 +362,6 @@ pub unsafe fn decode_string_view( options: SortOptions, validate_utf8: bool, ) -> StringViewArray { - let decoded = decode_binary_view(rows, options); - if !validate_utf8 { - return decoded.to_string_view_unchecked(); - } - - decoded - .to_string_view() - .expect("Decoding string view encountered invalid utf8!") + let view = decode_binary_view_inner(rows, options, validate_utf8); + view.to_string_view_unchecked() } From 9eb41d38c732b1de87d767bf0bca521cc11f881e Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 11:31:38 -0400 Subject: [PATCH 5/9] Update arrow-row/src/variable.rs Co-authored-by: Andrew Lamb --- arrow-row/src/variable.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 1b544fef7f43..5df2dea8b585 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -260,6 +260,9 @@ fn decode_binary_view_inner( }); let values_capacity: usize = rows.iter().map(|row| decoded_len(row, options)).sum(); + // Reserve the max amount of bytes possible for this buffer. + // However, only strings longer than 12 bytes are copied here + // This means it is less memory efficient but faster to do utf8 validation let mut values = MutableBuffer::new(values_capacity); let mut views = BufferBuilder::::new(len); From e2656c94dd5ff4fb2f486278feb346d44a7f5436 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 12:00:07 -0400 Subject: [PATCH 6/9] address comments --- arrow-row/src/variable.rs | 80 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 1b544fef7f43..3ac81947e406 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -20,7 +20,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::*; use arrow_buffer::bit_util::ceil; use arrow_buffer::MutableBuffer; -use arrow_data::{ArrayDataBuilder, ByteView}; +use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType, SortOptions}; use builder::make_view; @@ -259,54 +259,56 @@ fn decode_binary_view_inner( valid }); - let values_capacity: usize = rows.iter().map(|row| decoded_len(row, options)).sum(); - let mut values = MutableBuffer::new(values_capacity); + // we create two buffer, the inline buffer is used for quick utf8 validation. + let mut output_buffer_cap = 0; + let mut inline_buffer_cap = 0; + for r in rows.iter() { + let len = decoded_len(r, options); + if len > 12 { + output_buffer_cap += len; + } else { + inline_buffer_cap += len; + } + } + + let mut output_buffer = MutableBuffer::new(output_buffer_cap); + let mut inline_buffer = MutableBuffer::new(inline_buffer_cap); let mut views = BufferBuilder::::new(len); for row in rows { - let start_offset = values.len(); - let offset = decode_blocks(row, options, |b| values.extend_from_slice(b)); + let start_offset = output_buffer.len(); + let offset = decode_blocks(row, options, |b| { + let val = if b.len() <= 12 { + let old_len = inline_buffer.len(); + inline_buffer.extend_from_slice(b); + // Safety: we just extended the buffer with the length of `b` + unsafe { inline_buffer.get_unchecked_mut(old_len..) } + } else { + output_buffer.extend_from_slice(b); + debug_assert_eq!(b, &output_buffer[start_offset..]); + // Safety: we just extended the buffer with the length of `b` + unsafe { output_buffer.get_unchecked_mut(start_offset..) } + }; + if options.descending { + val.iter_mut().for_each(|o| *o = !*o); + } + + let view = make_view(val, 0, start_offset as u32); + views.append(view); + }); if row[0] == null_sentinel(options) { debug_assert_eq!(offset, 1); - debug_assert_eq!(start_offset, values.len()); + debug_assert_eq!(start_offset, output_buffer.len()); views.append(0); - } else { - let view = make_view( - unsafe { values.get_unchecked(start_offset..) }, - 0, - start_offset as u32, - ); - views.append(view); } *row = &row[offset..]; } - if options.descending { - values.as_slice_mut().iter_mut().for_each(|o| *o = !*o); - for view in views.as_slice_mut() { - let len = *view as u32; - if len <= 12 { - let mut bytes = view.to_le_bytes(); - bytes - .iter_mut() - .skip(4) - .take(len as usize) - .for_each(|o| *o = !*o); - *view = u128::from_le_bytes(bytes); - } else { - let mut byte_view = ByteView::from(*view); - let mut prefix = byte_view.prefix.to_le_bytes(); - prefix.iter_mut().for_each(|o| *o = !*o); - byte_view.prefix = u32::from_le_bytes(prefix); - *view = byte_view.into(); - } - } - } - if check_utf8 { - // the values contains all data, no matter if it is short or long - // we can validate utf8 in one go. - std::str::from_utf8(values.as_slice()).unwrap(); + // We validate the utf8 of the output buffer and the inline buffer + // This is much faster than validating each string individually + std::str::from_utf8(output_buffer.as_slice()).unwrap(); + std::str::from_utf8(inline_buffer.as_slice()).unwrap(); } let builder = ArrayDataBuilder::new(DataType::BinaryView) @@ -314,7 +316,7 @@ fn decode_binary_view_inner( .null_count(null_count) .null_bit_buffer(Some(nulls.into())) .add_buffer(views.finish()) - .add_buffer(values.into()); + .add_buffer(output_buffer.into()); // SAFETY: // Valid by construction above From 47adf220f5ca910f6099807967bcd75f9976f8c8 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 12:37:00 -0400 Subject: [PATCH 7/9] update --- arrow-row/src/variable.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 3ac81947e406..4cdebe187e64 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -263,16 +263,16 @@ fn decode_binary_view_inner( let mut output_buffer_cap = 0; let mut inline_buffer_cap = 0; for r in rows.iter() { - let len = decoded_len(r, options); - if len > 12 { - output_buffer_cap += len; + let l = decoded_len(r, options); + if l <= 12 { + inline_buffer_cap += l; } else { - inline_buffer_cap += len; + output_buffer_cap += l; } } - let mut output_buffer = MutableBuffer::new(output_buffer_cap); let mut inline_buffer = MutableBuffer::new(inline_buffer_cap); + let mut output_buffer = MutableBuffer::new(output_buffer_cap); let mut views = BufferBuilder::::new(len); for row in rows { @@ -307,8 +307,8 @@ fn decode_binary_view_inner( if check_utf8 { // We validate the utf8 of the output buffer and the inline buffer // This is much faster than validating each string individually - std::str::from_utf8(output_buffer.as_slice()).unwrap(); std::str::from_utf8(inline_buffer.as_slice()).unwrap(); + std::str::from_utf8(output_buffer.as_slice()).unwrap(); } let builder = ArrayDataBuilder::new(DataType::BinaryView) From 0921f3214c1baad842abdc58d0a285a37ad52a0d Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 13:04:18 -0400 Subject: [PATCH 8/9] Revert "address comments" This reverts commit e2656c94dd5ff4fb2f486278feb346d44a7f5436. --- arrow-row/src/variable.rs | 80 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 3ac81947e406..1b544fef7f43 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -20,7 +20,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::*; use arrow_buffer::bit_util::ceil; use arrow_buffer::MutableBuffer; -use arrow_data::ArrayDataBuilder; +use arrow_data::{ArrayDataBuilder, ByteView}; use arrow_schema::{DataType, SortOptions}; use builder::make_view; @@ -259,56 +259,54 @@ fn decode_binary_view_inner( valid }); - // we create two buffer, the inline buffer is used for quick utf8 validation. - let mut output_buffer_cap = 0; - let mut inline_buffer_cap = 0; - for r in rows.iter() { - let len = decoded_len(r, options); - if len > 12 { - output_buffer_cap += len; - } else { - inline_buffer_cap += len; - } - } - - let mut output_buffer = MutableBuffer::new(output_buffer_cap); - let mut inline_buffer = MutableBuffer::new(inline_buffer_cap); + let values_capacity: usize = rows.iter().map(|row| decoded_len(row, options)).sum(); + let mut values = MutableBuffer::new(values_capacity); let mut views = BufferBuilder::::new(len); for row in rows { - let start_offset = output_buffer.len(); - let offset = decode_blocks(row, options, |b| { - let val = if b.len() <= 12 { - let old_len = inline_buffer.len(); - inline_buffer.extend_from_slice(b); - // Safety: we just extended the buffer with the length of `b` - unsafe { inline_buffer.get_unchecked_mut(old_len..) } - } else { - output_buffer.extend_from_slice(b); - debug_assert_eq!(b, &output_buffer[start_offset..]); - // Safety: we just extended the buffer with the length of `b` - unsafe { output_buffer.get_unchecked_mut(start_offset..) } - }; - if options.descending { - val.iter_mut().for_each(|o| *o = !*o); - } - - let view = make_view(val, 0, start_offset as u32); - views.append(view); - }); + let start_offset = values.len(); + let offset = decode_blocks(row, options, |b| values.extend_from_slice(b)); if row[0] == null_sentinel(options) { debug_assert_eq!(offset, 1); - debug_assert_eq!(start_offset, output_buffer.len()); + debug_assert_eq!(start_offset, values.len()); views.append(0); + } else { + let view = make_view( + unsafe { values.get_unchecked(start_offset..) }, + 0, + start_offset as u32, + ); + views.append(view); } *row = &row[offset..]; } + if options.descending { + values.as_slice_mut().iter_mut().for_each(|o| *o = !*o); + for view in views.as_slice_mut() { + let len = *view as u32; + if len <= 12 { + let mut bytes = view.to_le_bytes(); + bytes + .iter_mut() + .skip(4) + .take(len as usize) + .for_each(|o| *o = !*o); + *view = u128::from_le_bytes(bytes); + } else { + let mut byte_view = ByteView::from(*view); + let mut prefix = byte_view.prefix.to_le_bytes(); + prefix.iter_mut().for_each(|o| *o = !*o); + byte_view.prefix = u32::from_le_bytes(prefix); + *view = byte_view.into(); + } + } + } + if check_utf8 { - // We validate the utf8 of the output buffer and the inline buffer - // This is much faster than validating each string individually - std::str::from_utf8(output_buffer.as_slice()).unwrap(); - std::str::from_utf8(inline_buffer.as_slice()).unwrap(); + // the values contains all data, no matter if it is short or long + // we can validate utf8 in one go. + std::str::from_utf8(values.as_slice()).unwrap(); } let builder = ArrayDataBuilder::new(DataType::BinaryView) @@ -316,7 +314,7 @@ fn decode_binary_view_inner( .null_count(null_count) .null_bit_buffer(Some(nulls.into())) .add_buffer(views.finish()) - .add_buffer(output_buffer.into()); + .add_buffer(values.into()); // SAFETY: // Valid by construction above From d18283061ce262c5cd6f34312a042a349f6094a3 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 13:41:20 -0400 Subject: [PATCH 9/9] addr comments --- arrow-row/src/variable.rs | 39 +++++++++++-------------------------- arrow/benches/row_format.rs | 4 ++-- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 1b544fef7f43..4d4bcddc0807 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -20,7 +20,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::*; use arrow_buffer::bit_util::ceil; use arrow_buffer::MutableBuffer; -use arrow_data::{ArrayDataBuilder, ByteView}; +use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType, SortOptions}; use builder::make_view; @@ -153,6 +153,8 @@ fn encode_blocks(out: &mut [u8], val: &[u8]) -> usize { end_offset } +/// Decodes a single block of data +/// The `f` function accepts a slice of the decoded data, it may be called multiple times pub fn decode_blocks(row: &[u8], options: SortOptions, mut f: impl FnMut(&[u8])) -> usize { let (non_empty_sentinel, continuation) = match options.descending { true => (!NON_EMPTY_SENTINEL, !BLOCK_CONTINUATION), @@ -271,36 +273,17 @@ fn decode_binary_view_inner( debug_assert_eq!(start_offset, values.len()); views.append(0); } else { - let view = make_view( - unsafe { values.get_unchecked(start_offset..) }, - 0, - start_offset as u32, - ); - views.append(view); - } - *row = &row[offset..]; - } + // Safety: we just appended the data to the end of the buffer + let val = unsafe { values.get_unchecked_mut(start_offset..) }; - if options.descending { - values.as_slice_mut().iter_mut().for_each(|o| *o = !*o); - for view in views.as_slice_mut() { - let len = *view as u32; - if len <= 12 { - let mut bytes = view.to_le_bytes(); - bytes - .iter_mut() - .skip(4) - .take(len as usize) - .for_each(|o| *o = !*o); - *view = u128::from_le_bytes(bytes); - } else { - let mut byte_view = ByteView::from(*view); - let mut prefix = byte_view.prefix.to_le_bytes(); - prefix.iter_mut().for_each(|o| *o = !*o); - byte_view.prefix = u32::from_le_bytes(prefix); - *view = byte_view.into(); + if options.descending { + val.iter_mut().for_each(|o| *o = !*o); } + + let view = make_view(val, 0, start_offset as u32); + views.append(view); } + *row = &row[offset..]; } if check_utf8 { diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 7ef4a634203e..0fb63b5b3240 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -93,8 +93,8 @@ fn row_bench(c: &mut Criterion) { let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 30, false)) as ArrayRef]; do_bench(c, "4096 string view(30, 0)", cols); - let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 100, false)) as ArrayRef]; - do_bench(c, "4096 string view(100, 0)", cols); + let cols = vec![Arc::new(create_string_view_array_with_len(40960, 0., 100, false)) as ArrayRef]; + do_bench(c, "40960 string view(100, 0)", cols); let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0.5, 100, false)) as ArrayRef]; do_bench(c, "4096 string view(100, 0.5)", cols);