Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support DecimalType in sort and take kernels #1172

Merged
merged 2 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 229 additions & 5 deletions arrow/src/compute/kernels/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ pub fn sort_to_indices(
let (v, n) = partition_validity(values);

Ok(match values.data_type() {
DataType::Decimal(_, _) => sort_decimal(values, v, n, cmp, &options, limit),
DataType::Boolean => sort_boolean(values, v, n, &options, limit),
DataType::Int8 => {
sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit)
Expand Down Expand Up @@ -293,7 +294,7 @@ pub fn sort_to_indices(
return Err(ArrowError::ComputeError(format!(
"Sort not supported for list type {:?}",
t
)))
)));
}
},
DataType::LargeList(field) => match field.data_type() {
Expand Down Expand Up @@ -321,7 +322,7 @@ pub fn sort_to_indices(
return Err(ArrowError::ComputeError(format!(
"Sort not supported for list type {:?}",
t
)))
)));
}
},
DataType::FixedSizeList(field, _) => match field.data_type() {
Expand Down Expand Up @@ -349,7 +350,7 @@ pub fn sort_to_indices(
return Err(ArrowError::ComputeError(format!(
"Sort not supported for list type {:?}",
t
)))
)));
}
},
DataType::Dictionary(key_type, value_type)
Expand Down Expand Up @@ -384,7 +385,7 @@ pub fn sort_to_indices(
return Err(ArrowError::ComputeError(format!(
"Sort not supported for dictionary key type {:?}",
t
)))
)));
}
}
}
Expand All @@ -396,7 +397,7 @@ pub fn sort_to_indices(
return Err(ArrowError::ComputeError(format!(
"Sort not supported for data type {:?}",
t
)))
)));
}
})
}
Expand Down Expand Up @@ -509,6 +510,30 @@ fn sort_boolean(
UInt32Array::from(result_data)
}

/// Sort Decimal array
fn sort_decimal<F>(
decimal_values: &ArrayRef,
value_indices: Vec<u32>,
null_indices: Vec<u32>,
cmp: F,
options: &SortOptions,
limit: Option<usize>,
) -> UInt32Array
where
F: Fn(i128, i128) -> std::cmp::Ordering,
{
// downcast to decimal array
let decimal_array = decimal_values
.as_any()
.downcast_ref::<DecimalArray>()
.expect("Unable to downcast to decimal array");
let valids = value_indices
.into_iter()
.map(|index| (index, decimal_array.value(index as usize)))
.collect::<Vec<(u32, i128)>>();
sort_primitive_inner(decimal_values, null_indices, cmp, options, limit, valids)
}

/// Sort primitive values
fn sort_primitive<T, F>(
values: &ArrayRef,
Expand Down Expand Up @@ -1080,6 +1105,49 @@ mod tests {
use std::convert::TryFrom;
use std::sync::Arc;

fn create_decimal_array(data: &[Option<i128>]) -> DecimalArray {
let mut builder = DecimalBuilder::new(20, 23, 6);

for d in data {
if let Some(v) = d {
builder.append_value(*v).unwrap();
} else {
builder.append_null().unwrap();
}
}
builder.finish()
}

fn test_sort_to_indices_decimal_array(
data: Vec<Option<i128>>,
options: Option<SortOptions>,
limit: Option<usize>,
expected_data: Vec<u32>,
) {
let output = create_decimal_array(&data);
let expected = UInt32Array::from(expected_data);
let output =
sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
assert_eq!(output, expected)
}

fn test_sort_decimal_array(
data: Vec<Option<i128>>,
options: Option<SortOptions>,
limit: Option<usize>,
expected_data: Vec<Option<i128>>,
) {
let output = create_decimal_array(&data);
let expected = Arc::new(create_decimal_array(&expected_data)) as ArrayRef;
let output = match limit {
Some(_) => {
sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
}
_ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
};
assert_eq!(&output, &expected)
}

fn test_sort_to_indices_boolean_arrays(
data: Vec<Option<bool>>,
options: Option<SortOptions>,
Expand Down Expand Up @@ -1659,6 +1727,162 @@ mod tests {
);
}

#[test]
fn test_sort_indices_decimal128() {
// decimal default
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
None,
vec![0, 6, 4, 2, 3, 5, 1],
);
// decimal descending
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
None,
vec![1, 5, 3, 2, 4, 6, 0],
);
// decimal null_first and descending
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
None,
vec![6, 0, 1, 5, 3, 2, 4],
);
// decimal null_first
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
None,
vec![0, 6, 4, 2, 3, 5, 1],
);
// limit
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
Some(3),
vec![0, 6, 4],
);
// limit descending
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
Some(3),
vec![1, 5, 3],
);
// limit descending null_first
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
Some(3),
vec![6, 0, 1],
);
// limit null_first
test_sort_to_indices_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
Some(3),
vec![0, 6, 4],
);
}

#[test]
fn test_sort_decimal128() {
// decimal default
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
None,
vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
);
// decimal descending
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
None,
vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
);
// decimal null_first and descending
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
None,
vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
);
// decimal null_first
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
None,
vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
);
// limit
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
Some(3),
vec![None, None, Some(1)],
);
// limit descending
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: false,
}),
Some(3),
vec![Some(5), Some(4), Some(3)],
);
// limit descending null_first
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
nulls_first: true,
}),
Some(3),
vec![None, None, Some(5)],
);
// limit null_first
test_sort_decimal_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
nulls_first: true,
}),
Some(3),
vec![None, None, Some(1)],
);
}

#[test]
fn test_sort_primitives() {
// default case
Expand Down
Loading