diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs index fdfd3f3ca267..14ccab0537bd 100644 --- a/datafusion/src/logical_plan/operators.rs +++ b/datafusion/src/logical_plan/operators.rs @@ -64,6 +64,8 @@ pub enum Operator { RegexNotMatch, /// Case insensitive regex not match RegexNotIMatch, + /// Bitwise and, like `&` + BitwiseAnd, } impl fmt::Display for Operator { @@ -90,6 +92,7 @@ impl fmt::Display for Operator { Operator::RegexNotIMatch => "!~*", Operator::IsDistinctFrom => "IS DISTINCT FROM", Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM", + Operator::BitwiseAnd => "&", }; write!(f, "{}", display) } diff --git a/datafusion/src/physical_plan/coercion_rule/binary_rule.rs b/datafusion/src/physical_plan/coercion_rule/binary_rule.rs index 982a4cb1bbc4..426d59f033e9 100644 --- a/datafusion/src/physical_plan/coercion_rule/binary_rule.rs +++ b/datafusion/src/physical_plan/coercion_rule/binary_rule.rs @@ -31,6 +31,7 @@ pub(crate) fn coerce_types( ) -> Result { // This result MUST be compatible with `binary_coerce` let result = match op { + Operator::BitwiseAnd => bitwise_coercion(lhs_type, rhs_type), Operator::And | Operator::Or => match (lhs_type, rhs_type) { // logical binary boolean operators can only be evaluated in bools (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean), @@ -72,6 +73,25 @@ pub(crate) fn coerce_types( } } +fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option { + use arrow::datatypes::DataType::*; + + if !is_numeric(left_type) || !is_numeric(right_type) { + return None; + } + if left_type == right_type && !is_dictionary(left_type) { + return Some(left_type.clone()); + } + // TODO support other data type + match (left_type, right_type) { + (Int64, _) | (_, Int64) => Some(Int64), + (Int32, _) | (_, Int32) => Some(Int32), + (Int16, _) | (_, Int16) => Some(Int16), + (Int8, _) | (_, Int8) => Some(Int8), + _ => None, + } +} + fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { // can't compare dictionaries directly due to // https://github.com/apache/arrow-rs/issues/1201 diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs index 4680dd0a49d9..d1fc3bcdc029 100644 --- a/datafusion/src/physical_plan/expressions/binary.rs +++ b/datafusion/src/physical_plan/expressions/binary.rs @@ -348,6 +348,103 @@ fn modulus_decimal(left: &DecimalArray, right: &DecimalArray) -> Result {{ + let len = $LEFT.len(); + let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); + let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); + let result = (0..len) + .into_iter() + .map(|i| { + if left.is_null(i) || right.is_null(i) { + None + } else { + Some(left.value(i) $OP right.value(i)) + } + }) + .collect::<$ARRAY_TYPE>(); + Ok(Arc::new(result)) + }}; +} + +/// The binary_bitwise_array_op macro only evaluates for integer types +/// like int64, int32. +/// It is used to do bitwise operation on an array with a scalar. +macro_rules! binary_bitwise_array_scalar { + ($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{ + let len = $LEFT.len(); + let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap(); + let scalar = $RIGHT; + if scalar.is_null() { + Ok(new_null_array(array.data_type(), len)) + } else { + let right: $TYPE = scalar.try_into().unwrap(); + let result = (0..len) + .into_iter() + .map(|i| { + if array.is_null(i) { + None + } else { + Some(array.value(i) $OP right) + } + }) + .collect::<$ARRAY_TYPE>(); + Ok(Arc::new(result) as ArrayRef) + } + }}; +} + +fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result { + match &left.data_type() { + DataType::Int8 => { + binary_bitwise_array_op!(left, right, &, Int8Array, i8) + } + DataType::Int16 => { + binary_bitwise_array_op!(left, right, &, Int16Array, i16) + } + DataType::Int32 => { + binary_bitwise_array_op!(left, right, &, Int32Array, i32) + } + DataType::Int64 => { + binary_bitwise_array_op!(left, right, &, Int64Array, i64) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseAnd + ))), + } +} + +fn bitwise_and_scalar( + array: &dyn Array, + scalar: ScalarValue, +) -> Option> { + let result = match array.data_type() { + DataType::Int8 => { + binary_bitwise_array_scalar!(array, scalar, &, Int8Array, i8) + } + DataType::Int16 => { + binary_bitwise_array_scalar!(array, scalar, &, Int16Array, i16) + } + DataType::Int32 => { + binary_bitwise_array_scalar!(array, scalar, &, Int32Array, i32) + } + DataType::Int64 => { + binary_bitwise_array_scalar!(array, scalar, &, Int64Array, i64) + } + other => Err(DataFusionError::Internal(format!( + "Data type {:?} not supported for binary operation '{}' on dyn arrays", + other, + Operator::BitwiseAnd + ))), + }; + Some(result) +} + /// Binary expression #[derive(Debug)] pub struct BinaryExpr { @@ -880,6 +977,8 @@ pub fn binary_operator_data_type( | Operator::RegexNotIMatch | Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => Ok(DataType::Boolean), + // bitwise operations return the common coerced type + Operator::BitwiseAnd => Ok(result_type), // math operations return the same value as the common coerced type Operator::Plus | Operator::Minus @@ -1055,6 +1154,7 @@ impl BinaryExpr { true, true ), + Operator::BitwiseAnd => bitwise_and_scalar(array, scalar.clone()), // if scalar operation is not supported - fallback to array implementation _ => None, }; @@ -1143,6 +1243,7 @@ impl BinaryExpr { Operator::RegexNotIMatch => { binary_string_array_flag_op!(left, right, regexp_is_match, true, true) } + Operator::BitwiseAnd => bitwise_and(left, right), } } } @@ -1580,6 +1681,18 @@ mod tests { DataType::Boolean, vec![false, false, false, false, true] ); + test_coercion!( + Int16Array, + DataType::Int16, + vec![1i16, 2i16, 3i16], + Int64Array, + DataType::Int64, + vec![10i64, 4i64, 5i64], + Operator::BitwiseAnd, + Int64Array, + DataType::Int64, + vec![0i64, 0i64, 1i64] + ); Ok(()) } @@ -2954,4 +3067,25 @@ mod tests { Ok(()) } + + #[test] + fn bitwise_array_test() -> Result<()> { + let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef; + let right = + Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(7)])) as ArrayRef; + let result = bitwise_and(left, right)?; + let expected = Int32Array::from(vec![Some(0), None, Some(3)]); + assert_eq!(result.as_ref(), &expected); + Ok(()) + } + + #[test] + fn bitwise_scalar_test() -> Result<()> { + let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef; + let right = ScalarValue::from(3i32); + let result = bitwise_and_scalar(&left, right).unwrap()?; + let expected = Int32Array::from(vec![Some(0), None, Some(3)]); + assert_eq!(result.as_ref(), &expected); + Ok(()) + } } diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index a74c44665de1..462977274ecb 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1276,6 +1276,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch), BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch), BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch), + BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd), _ => Err(DataFusionError::NotImplemented(format!( "Unsupported SQL binary operator {:?}", op