Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: refine validation for decimal128 array #2428

Merged
merged 1 commit into from
Aug 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions arrow/benches/decimal_validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
#[macro_use]
extern crate criterion;

use arrow::array::{Array, Decimal128Array, Decimal256Array, Decimal256Builder};
use arrow::array::{
Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder,
};
use criterion::Criterion;
use num::BigInt;
use rand::Rng;

extern crate arrow;

Expand All @@ -34,7 +38,15 @@ fn validate_decimal256_array(array: Decimal256Array) {
}

fn validate_decimal128_benchmark(c: &mut Criterion) {
let decimal_array = Decimal128Array::from_iter_values(vec![12324; 20000]);
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
.unwrap();
}
let decimal_array = decimal_builder.finish();
let data = decimal_array.into_data();
c.bench_function("validate_decimal128_array 20000", |b| {
b.iter(|| {
Expand All @@ -45,13 +57,13 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
}

fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut decimal_builder = Decimal256Builder::new(20000, 76, 0);
let mut bytes = vec![0; 32];
bytes[0..16].clone_from_slice(&12324_i128.to_le_bytes());
for _ in 0..20000 {
decimal_builder
.append_value(&Decimal256::new(76, 0, &bytes))
.unwrap();
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
decimal_builder.append_value(&decimal).unwrap();
}
let decimal_array256_data = decimal_builder.finish();
let data = decimal_array256_data.into_data();
Expand Down
12 changes: 8 additions & 4 deletions arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,14 @@ impl Decimal128Array {
// Validates decimal values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
for v in self.iter().flatten() {
validate_decimal_precision(v.as_i128(), precision)?;
}
Ok(())
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let decimal = unsafe { self.value_unchecked(idx) };
validate_decimal_precision(decimal.as_i128(), precision)
} else {
Ok(())
}
})
}

/// Returns a Decimal array with the same data as self, with the
Expand Down
27 changes: 4 additions & 23 deletions arrow/src/array/builder/decimal_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,33 +85,14 @@ impl Decimal128Builder {
/// Appends a decimal value into the builder.
#[inline]
pub fn append_value(&mut self, value: impl Into<i128>) -> Result<()> {
let value = if self.value_validation {
validate_decimal_precision(value.into(), self.precision)?
} else {
value.into()
};

let value_as_bytes =
Self::from_i128_to_fixed_size_bytes(value, Self::BYTE_LENGTH as usize)?;
if Self::BYTE_LENGTH != value_as_bytes.len() as i32 {
return Err(ArrowError::InvalidArgumentError(
"Byte slice does not have the same length as Decimal128Builder value lengths".to_string()
));
let value = value.into();
if self.value_validation {
validate_decimal_precision(value, self.precision)?
}
let value_as_bytes: [u8; 16] = value.to_le_bytes();
self.builder.append_value(value_as_bytes.as_slice())
}

pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if size > 16 {
return Err(ArrowError::InvalidArgumentError(
"Decimal128Builder only supports values up to 16 bytes.".to_string(),
));
}
let res = v.to_le_bytes();
let start_byte = 16 - size;
Ok(res[start_byte..16].to_vec())
}

/// Append a null value to the array.
#[inline]
pub fn append_null(&mut self) {
Expand Down
6 changes: 1 addition & 5 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2825,11 +2825,7 @@ mod tests {
let byte_width = 16;
let mut fixed_size_builder =
FixedSizeListBuilder::new(values_builder, byte_width);
let value_as_bytes = Decimal128Builder::from_i128_to_fixed_size_bytes(
123456,
fixed_size_builder.value_length() as usize,
)
.unwrap();
let value_as_bytes = 123456_i128.to_le_bytes();
fixed_size_builder
.values()
.append_slice(value_as_bytes.as_slice());
Expand Down
10 changes: 8 additions & 2 deletions arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,14 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
if negative {
result = result.neg();
}
validate_decimal_precision(result, precision)
.map_err(|e| ArrowError::ParseError(format!("parse decimal overflow: {}", e)))

match validate_decimal_precision(result, precision) {
Ok(_) => Ok(result),
Err(e) => Err(ArrowError::ParseError(format!(
"parse decimal overflow: {}",
e
))),
}
} else {
Err(ArrowError::ParseError(format!(
"can't parse the string value {} to decimal",
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/datatypes/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ pub const DECIMAL_DEFAULT_SCALE: usize = 10;
/// Validates that the specified `i128` value can be properly
/// interpreted as a Decimal number with precision `precision`
#[inline]
pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<i128> {
pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<()> {
if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"Max precision of a Decimal128 is {}, but got {}",
Expand All @@ -1011,7 +1011,7 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
value, precision, min
)))
} else {
Ok(value)
Ok(())
}
}

Expand Down