From fe7b8782f03d699416d54757576cf628e54d50e4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 11 Jul 2024 08:03:54 -0600 Subject: [PATCH] refactor errors --- native/core/src/errors.rs | 18 ++--- .../execution/datafusion/expressions/mod.rs | 6 +- .../datafusion/expressions/negative.rs | 6 +- native/spark-expr/src/abs.rs | 7 +- native/spark-expr/src/error.rs | 80 +++++++++++++++++++ native/spark-expr/src/lib.rs | 64 +-------------- 6 files changed, 97 insertions(+), 84 deletions(-) create mode 100644 native/spark-expr/src/error.rs diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs index 03c9fd00f..ff89e77d2 100644 --- a/native/core/src/errors.rs +++ b/native/core/src/errors.rs @@ -63,15 +63,10 @@ pub enum CometError { #[error("Comet Internal Error: {0}")] Internal(String), - #[error("[NUMERIC_VALUE_OUT_OF_RANGE] {value} cannot be represented as Decimal({precision}, {scale}). If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error, and return NULL instead.")] - NumericValueOutOfRange { - value: String, - precision: u8, - scale: i8, - }, - - #[error("[ARITHMETIC_OVERFLOW] {from_type} overflow. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")] - ArithmeticOverflow { from_type: String }, + /// CometError::Spark is typically used in native code to emulate the same errors + /// that Spark would return + #[error(transparent)] + Spark(SparkError), #[error(transparent)] Arrow { @@ -147,9 +142,6 @@ pub enum CometError { msg: String, throwable: GlobalRef, }, - - #[error(transparent)] - Spark(SparkError), } pub fn init() { @@ -222,7 +214,7 @@ impl jni::errors::ToException for CometError { class: "java/lang/NullPointerException".to_string(), msg: self.to_string(), }, - CometError::Spark(_) => Exception { + CometError::Spark { .. } => Exception { class: "org/apache/spark/SparkException".to_string(), msg: self.to_string(), }, diff --git a/native/core/src/execution/datafusion/expressions/mod.rs b/native/core/src/execution/datafusion/expressions/mod.rs index c537c06a9..f6fb26b6a 100644 --- a/native/core/src/execution/datafusion/expressions/mod.rs +++ b/native/core/src/execution/datafusion/expressions/mod.rs @@ -43,10 +43,10 @@ mod utils; pub mod variance; pub mod xxhash64; -pub use datafusion_comet_spark_expr::EvalMode; +pub use datafusion_comet_spark_expr::{EvalMode, SparkError}; fn arithmetic_overflow_error(from_type: &str) -> CometError { - CometError::ArithmeticOverflow { + CometError::Spark(SparkError::ArithmeticOverflow { from_type: from_type.to_string(), - } + }) } diff --git a/native/core/src/execution/datafusion/expressions/negative.rs b/native/core/src/execution/datafusion/expressions/negative.rs index cd0e9bccf..9e82812be 100644 --- a/native/core/src/execution/datafusion/expressions/negative.rs +++ b/native/core/src/execution/datafusion/expressions/negative.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use super::arithmetic_overflow_error; use crate::errors::CometError; use arrow::{compute::kernels::numeric::neg_wrapping, datatypes::IntervalDayTimeType}; use arrow_array::RecordBatch; @@ -24,6 +25,7 @@ use datafusion::{ logical_expr::{interval_arithmetic::Interval, ColumnarValue}, physical_expr::PhysicalExpr, }; +use datafusion_comet_spark_expr::SparkError; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::sort_properties::ExprProperties; use datafusion_physical_expr::aggregate::utils::down_cast_any_ref; @@ -33,8 +35,6 @@ use std::{ sync::Arc, }; -use super::arithmetic_overflow_error; - pub fn create_negate_expr( expr: Arc, fail_on_error: bool, @@ -234,7 +234,7 @@ impl PhysicalExpr for NegativeExpr { || child_interval.lower() == &ScalarValue::Int64(Some(i64::MIN)) || child_interval.upper() == &ScalarValue::Int64(Some(i64::MIN)) { - return Err(CometError::ArithmeticOverflow { + return Err(SparkError::ArithmeticOverflow { from_type: "long".to_string(), } .into()); diff --git a/native/spark-expr/src/abs.rs b/native/spark-expr/src/abs.rs index 198a96e57..fa25a7775 100644 --- a/native/spark-expr/src/abs.rs +++ b/native/spark-expr/src/abs.rs @@ -77,9 +77,10 @@ impl ScalarUDFImpl for Abs { if self.eval_mode == EvalMode::Legacy { Ok(args[0].clone()) } else { - Err(DataFusionError::External(Box::new( - SparkError::ArithmeticOverflow(self.data_type_name.clone()), - ))) + Err(SparkError::ArithmeticOverflow { + from_type: self.data_type_name.clone(), + } + .into()) } } other => other, diff --git a/native/spark-expr/src/error.rs b/native/spark-expr/src/error.rs new file mode 100644 index 000000000..66c082750 --- /dev/null +++ b/native/spark-expr/src/error.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::ArrowError; +use datafusion_common::DataFusionError; +use std::error::Error; +use std::fmt::{Display, Formatter}; + +#[derive(Debug)] +pub enum SparkError { + ArithmeticOverflow { + from_type: String, + }, + CastInvalidValue { + value: String, + from_type: String, + to_type: String, + }, + CastOverFlow { + value: String, + from_type: String, + to_type: String, + }, + NumericValueOutOfRange { + value: String, + precision: u8, + scale: i8, + }, + Arrow(ArrowError), + Internal(String), +} + +pub type SparkResult = Result; + +impl From for SparkError { + fn from(value: ArrowError) -> Self { + SparkError::Arrow(value) + } +} + +impl From for DataFusionError { + fn from(value: SparkError) -> Self { + DataFusionError::External(Box::new(value)) + } +} + +impl Error for SparkError {} + +impl Display for SparkError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::ArithmeticOverflow { from_type } => + write!(f, "[ARITHMETIC_OVERFLOW] {from_type} overflow. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error."), + Self::CastOverFlow { value, from_type, to_type } => write!(f, "[CAST_OVERFLOW] The value {value} of the type \"{from_type}\" cannot be cast to \"{to_type}\" \ + due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary \ + set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error."), + Self::CastInvalidValue { value, from_type, to_type } => write!(f, "[CAST_INVALID_INPUT] The value '{value}' of the type \"{from_type}\" cannot be cast to \"{to_type}\" \ + because it is malformed. Correct the value as per the syntax, or change its target type. \ + Use `try_cast` to tolerate malformed input and return NULL instead. If necessary \ + set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error."), + Self::NumericValueOutOfRange { value, precision, scale } => write!(f, "[NUMERIC_VALUE_OUT_OF_RANGE] {value} cannot be represented as Decimal({precision}, {scale}). If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error, and return NULL instead."), + Self::Arrow(e) => write!(f, "ArrowError: {e}"), + Self::Internal(e) => write!(f, "{e}"), + } + } +} diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs index 817e2704a..93c7f249e 100644 --- a/native/spark-expr/src/lib.rs +++ b/native/spark-expr/src/lib.rs @@ -15,16 +15,13 @@ // specific language governing permissions and limitations // under the License. -use arrow_schema::ArrowError; -use datafusion_common::DataFusionError; -use std::error::Error; -use std::fmt::{Display, Formatter}; - mod abs; pub mod cast; +mod error; mod if_expr; pub use abs::Abs; +pub use error::{SparkError, SparkResult}; pub use if_expr::IfExpr; /// Spark supports three evaluation modes when evaluating expressions, which affect @@ -45,60 +42,3 @@ pub enum EvalMode { /// failing the entire query. Try, } - -#[derive(Debug)] -pub enum SparkError { - ArithmeticOverflow(String), - CastOverFlow { - value: String, - from_type: String, - to_type: String, - }, - CastInvalidValue { - value: String, - from_type: String, - to_type: String, - }, - NumericValueOutOfRange { - value: String, - precision: u8, - scale: i8, - }, - Arrow(ArrowError), - Internal(String), -} - -pub type SparkResult = Result; - -impl From for SparkError { - fn from(value: ArrowError) -> Self { - SparkError::Arrow(value) - } -} - -impl From for DataFusionError { - fn from(value: SparkError) -> Self { - DataFusionError::External(Box::new(value)) - } -} - -impl Error for SparkError {} - -impl Display for SparkError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::ArithmeticOverflow(data_type) => - write!(f, "[ARITHMETIC_OVERFLOW] {} overflow. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.", data_type), - Self::CastOverFlow { value, from_type, to_type } => write!(f, "[CAST_OVERFLOW] The value {value} of the type \"{from_type}\" cannot be cast to \"{to_type}\" \ - due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary \ - set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error."), - Self::CastInvalidValue { value, from_type, to_type } => write!(f, "[CAST_INVALID_INPUT] The value '{value}' of the type \"{from_type}\" cannot be cast to \"{to_type}\" \ - because it is malformed. Correct the value as per the syntax, or change its target type. \ - Use `try_cast` to tolerate malformed input and return NULL instead. If necessary \ - set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error."), - Self::NumericValueOutOfRange { value, precision, scale } => write!(f, "[NUMERIC_VALUE_OUT_OF_RANGE] {value} cannot be represented as Decimal({precision}, {scale}). If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error, and return NULL instead."), - Self::Arrow(e) => write!(f, "ArrowError: {e}"), - Self::Internal(e) => write!(f, "{e}"), - } - } -}