From 9d4c8a5ad23434630023333a62e7a2e598bdb780 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 12 Jul 2024 16:13:38 -0600 Subject: [PATCH] remove utils crate and move utils into spark-expr crate (#658) --- native/Cargo.lock | 16 +------- native/Cargo.toml | 3 +- native/core/Cargo.toml | 1 - .../execution/datafusion/expressions/utils.rs | 2 +- native/core/src/execution/mod.rs | 2 +- native/spark-expr/Cargo.toml | 3 +- native/spark-expr/src/cast.rs | 2 +- native/spark-expr/src/if_expr.rs | 2 +- native/spark-expr/src/lib.rs | 3 ++ native/{utils => spark-expr}/src/timezone.rs | 0 .../src/lib.rs => spark-expr/src/utils.rs} | 4 +- native/utils/Cargo.toml | 39 ------------------- native/utils/README.md | 22 ----------- 13 files changed, 13 insertions(+), 86 deletions(-) rename native/{utils => spark-expr}/src/timezone.rs (100%) rename native/{utils/src/lib.rs => spark-expr/src/utils.rs} (99%) delete mode 100644 native/utils/Cargo.toml delete mode 100644 native/utils/README.md diff --git a/native/Cargo.lock b/native/Cargo.lock index f64b7b634..f73f28629 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -866,7 +866,6 @@ dependencies = [ "criterion", "datafusion", "datafusion-comet-spark-expr", - "datafusion-comet-utils", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", @@ -910,29 +909,18 @@ dependencies = [ "arrow-array", "arrow-schema", "chrono", + "chrono-tz 0.8.6", "datafusion", - "datafusion-comet-utils", "datafusion-common", "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", + "datafusion-physical-plan", "num", "regex", "thiserror", ] -[[package]] -name = "datafusion-comet-utils" -version = "0.1.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "chrono", - "chrono-tz 0.8.6", - "datafusion-physical-plan", -] - [[package]] name = "datafusion-common" version = "40.0.0" diff --git a/native/Cargo.toml b/native/Cargo.toml index 09865742b..4f306452a 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -16,7 +16,7 @@ # under the License. [workspace] -members = ["core", "spark-expr", "utils"] +members = ["core", "spark-expr"] resolver = "2" [workspace.package] @@ -47,7 +47,6 @@ datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", r datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion.git", rev = "40.0.0", default-features = false } datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "40.0.0", default-features = false } datafusion-comet-spark-expr = { path = "spark-expr", version = "0.1.0" } -datafusion-comet-utils = { path = "utils", version = "0.1.0" } chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } num = "0.4" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 8e02324c6..c252fad6d 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -77,7 +77,6 @@ regex = { workspace = true } crc32fast = "1.3.2" simd-adler32 = "0.3.7" datafusion-comet-spark-expr = { workspace = true } -datafusion-comet-utils = { workspace = true } [build-dependencies] prost-build = "0.9.0" diff --git a/native/core/src/execution/datafusion/expressions/utils.rs b/native/core/src/execution/datafusion/expressions/utils.rs index 04e41e0ba..d253b251f 100644 --- a/native/core/src/execution/datafusion/expressions/utils.rs +++ b/native/core/src/execution/datafusion/expressions/utils.rs @@ -16,4 +16,4 @@ // under the License. // re-export for legacy reasons -pub use datafusion_comet_utils::{array_with_timezone, down_cast_any_ref}; +pub use datafusion_comet_spark_expr::utils::{array_with_timezone, down_cast_any_ref}; diff --git a/native/core/src/execution/mod.rs b/native/core/src/execution/mod.rs index a13a1bc85..cdd429231 100644 --- a/native/core/src/execution/mod.rs +++ b/native/core/src/execution/mod.rs @@ -26,7 +26,7 @@ pub mod operators; pub mod serde; pub mod shuffle; pub(crate) mod sort; -pub use datafusion_comet_utils::timezone; +pub use datafusion_comet_spark_expr::timezone; pub(crate) mod utils; mod memory_pool; diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml index 220417fe8..976a1f36f 100644 --- a/native/spark-expr/Cargo.toml +++ b/native/spark-expr/Cargo.toml @@ -36,7 +36,8 @@ datafusion-common = { workspace = true } datafusion-functions = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } -datafusion-comet-utils = { workspace = true } +datafusion-physical-plan = { workspace = true } +chrono-tz = { workspace = true } num = { workspace = true } regex = { workspace = true } thiserror = { workspace = true } diff --git a/native/spark-expr/src/cast.rs b/native/spark-expr/src/cast.rs index b9cf2790b..7f53583e8 100644 --- a/native/spark-expr/src/cast.rs +++ b/native/spark-expr/src/cast.rs @@ -55,7 +55,7 @@ use num::{ }; use regex::Regex; -use datafusion_comet_utils::{array_with_timezone, down_cast_any_ref}; +use crate::utils::{array_with_timezone, down_cast_any_ref}; use crate::{EvalMode, SparkError, SparkResult}; diff --git a/native/spark-expr/src/if_expr.rs b/native/spark-expr/src/if_expr.rs index c04494ec4..fa52c5d5b 100644 --- a/native/spark-expr/src/if_expr.rs +++ b/native/spark-expr/src/if_expr.rs @@ -31,7 +31,7 @@ use datafusion::logical_expr::ColumnarValue; use datafusion_common::{cast::as_boolean_array, Result}; use datafusion_physical_expr::PhysicalExpr; -use datafusion_comet_utils::down_cast_any_ref; +use crate::utils::down_cast_any_ref; #[derive(Debug, Hash)] pub struct IfExpr { diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs index 93c7f249e..3c726f52a 100644 --- a/native/spark-expr/src/lib.rs +++ b/native/spark-expr/src/lib.rs @@ -20,6 +20,9 @@ pub mod cast; mod error; mod if_expr; +pub mod timezone; +pub mod utils; + pub use abs::Abs; pub use error::{SparkError, SparkResult}; pub use if_expr::IfExpr; diff --git a/native/utils/src/timezone.rs b/native/spark-expr/src/timezone.rs similarity index 100% rename from native/utils/src/timezone.rs rename to native/spark-expr/src/timezone.rs diff --git a/native/utils/src/lib.rs b/native/spark-expr/src/utils.rs similarity index 99% rename from native/utils/src/lib.rs rename to native/spark-expr/src/utils.rs index 4600abfaf..6945e82b3 100644 --- a/native/utils/src/lib.rs +++ b/native/spark-expr/src/utils.rs @@ -23,14 +23,12 @@ use arrow_schema::{ArrowError, DataType}; use std::any::Any; use std::sync::Arc; +use crate::timezone::Tz; use arrow::{ array::{as_dictionary_array, Array, ArrayRef, PrimitiveArray}, temporal_conversions::as_datetime, }; use chrono::{DateTime, Offset, TimeZone}; -use timezone::Tz; - -pub mod timezone; use datafusion_physical_plan::PhysicalExpr; diff --git a/native/utils/Cargo.toml b/native/utils/Cargo.toml deleted file mode 100644 index f9ae47433..000000000 --- a/native/utils/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "datafusion-comet-utils" -description = "DataFusion Comet Utilities" -version = { workspace = true } -homepage = { workspace = true } -repository = { workspace = true } -authors = { workspace = true } -readme = { workspace = true } -license = { workspace = true } -edition = { workspace = true } - -[dependencies] -arrow = { workspace = true } -arrow-array = { workspace = true } -arrow-schema = { workspace = true } -chrono = { workspace = true } -chrono-tz = { workspace = true } -datafusion-physical-plan = { workspace = true } - -[lib] -name = "datafusion_comet_utils" -path = "src/lib.rs" diff --git a/native/utils/README.md b/native/utils/README.md deleted file mode 100644 index 513c6245e..000000000 --- a/native/utils/README.md +++ /dev/null @@ -1,22 +0,0 @@ - - -# datafusion-comet-utils - -This crate provides utilities for use in the [Apache DataFusion Comet](https://github.com/apache/datafusion-comet/) project. \ No newline at end of file