Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

suppport bitwise and as an example #1653

Merged
merged 5 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/src/logical_plan/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ pub enum Operator {
RegexNotMatch,
/// Case insensitive regex not match
RegexNotIMatch,
/// Bitwise and, like `&`
BitwiseAnd,
}

impl fmt::Display for Operator {
Expand All @@ -90,6 +92,7 @@ impl fmt::Display for Operator {
Operator::RegexNotIMatch => "!~*",
Operator::IsDistinctFrom => "IS DISTINCT FROM",
Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
Operator::BitwiseAnd => "&",
};
write!(f, "{}", display)
}
Expand Down
20 changes: 20 additions & 0 deletions datafusion/src/physical_plan/coercion_rule/binary_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub(crate) fn coerce_types(
) -> Result<DataType> {
// This result MUST be compatible with `binary_coerce`
let result = match op {
Operator::BitwiseAnd => bitwise_coercion(lhs_type, rhs_type),
Operator::And | Operator::Or => match (lhs_type, rhs_type) {
// logical binary boolean operators can only be evaluated in bools
(DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
Expand Down Expand Up @@ -72,6 +73,25 @@ pub(crate) fn coerce_types(
}
}

fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;

if !is_numeric(left_type) || !is_numeric(right_type) {
return None;
}
if left_type == right_type && !is_dictionary(left_type) {
return Some(left_type.clone());
}
// TODO support other data type
match (left_type, right_type) {
(Int64, _) | (_, Int64) => Some(Int64),
(Int32, _) | (_, Int32) => Some(Int32),
(Int16, _) | (_, Int16) => Some(Int16),
(Int8, _) | (_, Int8) => Some(Int8),
_ => None,
}
}

fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
// can't compare dictionaries directly due to
// https://github.com/apache/arrow-rs/issues/1201
Expand Down
131 changes: 131 additions & 0 deletions datafusion/src/physical_plan/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,100 @@ fn modulus_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<DecimalA
Ok(decimal_builder.finish())
}

/// The binary_bitwise_array_op macro only evaluates for integer types
/// like int64, int32.
/// It is used to do bitwise operation.
macro_rules! binary_bitwise_array_op {
($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
let len = $LEFT.len();
let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let result = (0..len)
.into_iter()
.map(|i| {
if left.is_null(i) || right.is_null(i) {
None
} else {
Some(left.value(i) & right.value(i))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As below, I think this is meant to be $OP

Suggested change
Some(left.value(i) & right.value(i))
Some(left.value(i) $OP right.value(i))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed in f81bffb (I was trying to make a follow on PR but then I found the logical conflict so I figured I would fix it directly on this PR).

I hope that is OK with you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is from my mistake.

}
})
.collect::<$ARRAY_TYPE>();
Ok(Arc::new(result))
}};
}

/// The binary_bitwise_array_op macro only evaluates for integer types
/// like int64, int32.
/// It is used to do bitwise operation on an array with a scalar.
macro_rules! binary_bitwise_array_scalar {
($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
let len = $LEFT.len();
let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let scalar = $RIGHT;
if scalar.is_null() {
Ok(new_null_array(array.data_type(), len))
} else {
let right: $TYPE = scalar.try_into().unwrap();
let result = (0..len)
.into_iter()
.map(|i| {
if array.is_null(i) {
None
} else {
Some(array.value(i) & right)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be $OP rather than &?

Suggested change
Some(array.value(i) & right)
Some(array.value(i) $OP right)

I don't think it matters for correctness but it may be confusing in the future

}
})
.collect::<$ARRAY_TYPE>();
Ok(Arc::new(result) as ArrayRef)
}
}};
}

fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result<ArrayRef> {
match &left.data_type() {
DataType::Int8 => {
binary_bitwise_array_op!(left, right, &, Int8Array, i8)
}
DataType::Int16 => {
binary_bitwise_array_op!(left, right, &, Int16Array, i16)
}
DataType::Int32 => {
binary_bitwise_array_op!(left, right, &, Int32Array, i32)
}
DataType::Int64 => {
binary_bitwise_array_op!(left, right, &, Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn arrays",
other,
Operator::BitwiseAnd
))),
}
}

fn bitwise_and_scalar(array: &ArrayRef, scalar: ScalarValue) -> Option<Result<ArrayRef>> {
let result = match array.data_type() {
DataType::Int8 => {
binary_bitwise_array_scalar!(array, scalar, &, Int8Array, i8)
}
DataType::Int16 => {
binary_bitwise_array_scalar!(array, scalar, &, Int16Array, i16)
}
DataType::Int32 => {
binary_bitwise_array_scalar!(array, scalar, &, Int32Array, i32)
}
DataType::Int64 => {
binary_bitwise_array_scalar!(array, scalar, &, Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn arrays",
other,
Operator::BitwiseAnd
))),
};
Some(result)
}

/// Binary expression
#[derive(Debug)]
pub struct BinaryExpr {
Expand Down Expand Up @@ -811,6 +905,8 @@ pub fn binary_operator_data_type(
| Operator::RegexNotIMatch
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Ok(DataType::Boolean),
// bitwise operations return the common coerced type
Operator::BitwiseAnd => Ok(result_type),
// math operations return the same value as the common coerced type
Operator::Plus
| Operator::Minus
Expand Down Expand Up @@ -939,6 +1035,7 @@ impl BinaryExpr {
true,
true
),
Operator::BitwiseAnd => bitwise_and_scalar(array, scalar.clone()),
// if scalar operation is not supported - fallback to array implementation
_ => None,
};
Expand Down Expand Up @@ -1027,6 +1124,7 @@ impl BinaryExpr {
Operator::RegexNotIMatch => {
binary_string_array_flag_op!(left, right, regexp_is_match, true, true)
}
Operator::BitwiseAnd => bitwise_and(left, right),
}
}
}
Expand Down Expand Up @@ -1464,6 +1562,18 @@ mod tests {
DataType::Boolean,
vec![false, false, false, false, true]
);
test_coercion!(
Int16Array,
DataType::Int16,
vec![1i16, 2i16, 3i16],
Int64Array,
DataType::Int64,
vec![10i64, 4i64, 5i64],
Operator::BitwiseAnd,
Int64Array,
DataType::Int64,
vec![0i64, 0i64, 1i64]
);
Ok(())
}

Expand Down Expand Up @@ -2838,4 +2948,25 @@ mod tests {

Ok(())
}

#[test]
fn bitwise_array_test() -> Result<()> {
let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef;
let right =
Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(7)])) as ArrayRef;
let result = bitwise_and(left, right)?;
let expected = Int32Array::from(vec![Some(0), None, Some(3)]);
assert_eq!(result.as_ref(), &expected);
Ok(())
}

#[test]
fn bitwise_scalar_test() -> Result<()> {
let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef;
let right = ScalarValue::from(3i32);
let result = bitwise_and_scalar(&left, right).unwrap()?;
let expected = Int32Array::from(vec![Some(0), None, Some(3)]);
assert_eq!(result.as_ref(), &expected);
Ok(())
}
}
1 change: 1 addition & 0 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
_ => Err(DataFusionError::NotImplemented(format!(
"Unsupported SQL binary operator {:?}",
op
Expand Down