Skip to content

Commit

Permalink
Implement min max support for string/binary view types (#6053)
Browse files Browse the repository at this point in the history
* add

* implement min max support for string/binary view

* update tests
  • Loading branch information
XiangpengHao authored Jul 15, 2024
1 parent 31b8ba0 commit 6d4e2f2
Showing 1 changed file with 142 additions and 46 deletions.
188 changes: 142 additions & 46 deletions arrow-arith/src/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,21 +415,41 @@ pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
}

/// Returns the maximum value in the binary view array, according to the natural order.
pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
}

/// Returns the minimum value in the binary array, according to the natural order.
pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
}

/// Returns the minimum value in the binary view array, according to the natural order.
pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
}

/// Returns the maximum value in the string array, according to the natural order.
pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
}

/// Returns the maximum value in the string view array, according to the natural order.
pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
}

/// Returns the minimum value in the string array, according to the natural order.
pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
}

/// Returns the minimum value in the string view array, according to the natural order.
pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
}

/// Returns the sum of values in the array.
///
/// This doesn't detect overflow. Once overflowing, the result will wrap around.
Expand Down Expand Up @@ -1132,61 +1152,137 @@ mod tests {
assert!(max(&a).unwrap().is_nan());
}

#[test]
fn test_binary_min_max_with_nulls() {
let a = BinaryArray::from(vec![
Some("b".as_bytes()),
macro_rules! test_binary {
($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
#[test]
fn $NAME() {
let binary = BinaryArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_binary(&binary));
assert_eq!($EXPECTED_MAX, max_binary(&binary));

let large_binary = LargeBinaryArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
assert_eq!($EXPECTED_MAX, max_binary(&large_binary));

let binary_view = BinaryViewArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
}
};
}

test_binary!(
test_binary_min_max_with_nulls,
vec![
Some("b01234567890123".as_bytes()), // long bytes
None,
None,
Some(b"a"),
Some(b"c"),
]);
assert_eq!(Some("a".as_bytes()), min_binary(&a));
assert_eq!(Some("c".as_bytes()), max_binary(&a));
}

#[test]
fn test_binary_min_max_no_null() {
let a = BinaryArray::from(vec![Some("b".as_bytes()), Some(b"a"), Some(b"c")]);
assert_eq!(Some("a".as_bytes()), min_binary(&a));
assert_eq!(Some("c".as_bytes()), max_binary(&a));
}
Some(b"abcdedfg0123456"),
],
Some("a".as_bytes()),
Some("c".as_bytes())
);

test_binary!(
test_binary_min_max_no_null,
vec![
Some("b".as_bytes()),
Some(b"abcdefghijklmnopqrst"), // long bytes
Some(b"c"),
Some(b"b01234567890123"), // long bytes for view types
],
Some("abcdefghijklmnopqrst".as_bytes()),
Some("c".as_bytes())
);

#[test]
fn test_binary_min_max_all_nulls() {
let a = BinaryArray::from(vec![None, None]);
assert_eq!(None, min_binary(&a));
assert_eq!(None, max_binary(&a));
}
test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);

#[test]
fn test_binary_min_max_1() {
let a = BinaryArray::from(vec![None, None, Some("b".as_bytes()), Some(b"a")]);
assert_eq!(Some("a".as_bytes()), min_binary(&a));
assert_eq!(Some("b".as_bytes()), max_binary(&a));
}

#[test]
fn test_string_min_max_with_nulls() {
let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]);
assert_eq!(Some("a"), min_string(&a));
assert_eq!(Some("c"), max_string(&a));
test_binary!(
test_binary_min_max_1,
vec![
None,
Some("b01234567890123435".as_bytes()), // long bytes for view types
None,
Some(b"b0123xxxxxxxxxxx"),
Some(b"a")
],
Some("a".as_bytes()),
Some("b0123xxxxxxxxxxx".as_bytes())
);

macro_rules! test_string {
($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
#[test]
fn $NAME() {
let string = StringArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_string(&string));
assert_eq!($EXPECTED_MAX, max_string(&string));

let large_string = LargeStringArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_string(&large_string));
assert_eq!($EXPECTED_MAX, max_string(&large_string));

let string_view = StringViewArray::from($ARRAY);
assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
}
};
}

#[test]
fn test_string_min_max_all_nulls() {
let v: Vec<Option<&str>> = vec![None, None];
let a = StringArray::from(v);
assert_eq!(None, min_string(&a));
assert_eq!(None, max_string(&a));
}
test_string!(
test_string_min_max_with_nulls,
vec![
Some("b012345678901234"), // long bytes for view types
None,
None,
Some("a"),
Some("c"),
Some("b0123xxxxxxxxxxx")
],
Some("a"),
Some("c")
);

test_string!(
test_string_min_max_no_null,
vec![
Some("b"),
Some("b012345678901234"), // long bytes for view types
Some("a"),
Some("b012xxxxxxxxxxxx")
],
Some("a"),
Some("b012xxxxxxxxxxxx")
);

test_string!(
test_string_min_max_all_nulls,
Vec::<Option<&str>>::from_iter([None, None]),
None,
None
);

#[test]
fn test_string_min_max_1() {
let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
assert_eq!(Some("a"), min_string(&a));
assert_eq!(Some("b"), max_string(&a));
}
test_string!(
test_string_min_max_1,
vec![
None,
Some("c12345678901234"), // long bytes for view types
None,
Some("b"),
Some("c1234xxxxxxxxxx")
],
Some("b"),
Some("c1234xxxxxxxxxx")
);

test_string!(
test_string_min_max_empty,
Vec::<Option<&str>>::new(),
None,
None
);

#[test]
fn test_boolean_min_max_empty() {
Expand Down

0 comments on commit 6d4e2f2

Please sign in to comment.