From 810cce7e2b11aaf383f34c9997ede787cf71aaef Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Jun 2024 07:19:33 -0400 Subject: [PATCH 1/7] Update `string-view` branch to arrow-rs main (#10966) * Pin to arrow main * Fix clippy with latest arrow * Uncomment test that needs new arrow-rs to work * Update datafusion-cli Cargo.lock * Update Cargo.lock * tapelo --- Cargo.toml | 16 + datafusion-cli/Cargo.lock | 442 ++++++++++++++---- datafusion-cli/Cargo.toml | 15 + datafusion/common/src/scalar/mod.rs | 4 - datafusion/expr/src/type_coercion/binary.rs | 2 +- .../expr/src/type_coercion/functions.rs | 4 +- datafusion/functions/src/datetime/date_bin.rs | 14 +- 7 files changed, 402 insertions(+), 95 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aa1ba1f214d5..290dd64021b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,3 +153,19 @@ large_futures = "warn" [workspace.lints.rust] unused_imports = "deny" + +## Temporary arrow-rs patch until 52.1.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index c5b34df4f1cf..b0b41a12328d 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -131,8 +131,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae9728f104939be6d8d9b368a354b4929b0569160ea1641f0721b55a861ce38" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-arith", "arrow-array", @@ -152,8 +151,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7029a5b3efbeafbf4a12d12dc16b8f9e9bff20a410b8c25c5d28acc089e1043" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,8 +165,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d33238427c60271710695f17742f45b1a5dc5bcfc5c15331c25ddfe7abf70d97" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "ahash", "arrow-buffer", @@ -184,8 +181,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9b95e825ae838efaf77e366c00d3fc8cca78134c9db497d6bda425f2e7b7c1" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "bytes", "half", @@ -195,8 +191,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cf8385a9d5b5fcde771661dd07652b79b9139fea66193eda6a88664400ccab" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,8 +211,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea5068bef430a86690059665e40034625ec323ffa4dd21972048eebb0127adc" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,8 +229,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb29be98f987bcf217b070512bb7afba2f65180858bca462edf4a39d84a23e10" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,8 +240,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc68f6523970aa6f7ce1dc9a33a7d9284cfb9af77d4ad3e617dbe5d79cc6ec8" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,8 +254,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2041380f94bd6437ab648e6c2085a045e45a0c44f91a1b9a4fe3fed3d379bfb1" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,8 +273,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb56ed1547004e12203652f12fe12e824161ff9d1e5cf2a7dc4ff02ba94f413" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,8 +287,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "575b42f1fc588f2da6977b94a5ca565459f5ab07b60545e17243fb9a7ed6d43e" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "ahash", "arrow-array", @@ -312,14 +301,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32aae6a60458a2389c0da89c9de0b7932427776127da1a738e2efc21d32f3393" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" [[package]] name = "arrow-select" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de36abaef8767b4220d7b4a8c2fe5ffc78b47db81b03d77e2136091c3ba39102" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "ahash", "arrow-array", @@ -332,8 +319,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e435ada8409bcafc910bc3e0077f532a4daa20e99060a496685c0e3e53cc2597" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -714,9 +700,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -1498,6 +1484,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -1911,12 +1908,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http 1.1.0", "http-body 1.0.0", "pin-project-lite", @@ -1924,9 +1921,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" [[package]] name = "httpdate" @@ -2001,18 +1998,19 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.26.0" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", "hyper 1.3.1", "hyper-util", - "rustls 0.22.4", + "rustls 0.23.10", + "rustls-native-certs 0.7.0", "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls 0.26.0", "tower-service", ] @@ -2059,14 +2057,134 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -2278,6 +2396,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -2326,9 +2450,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "mimalloc" @@ -2483,9 +2607,9 @@ dependencies = [ [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -2580,8 +2704,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c3b5322cc1bbf67f11c079c42be41a55949099b78732f7dba9e15edde40eab" +source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" dependencies = [ "ahash", "arrow-array", @@ -2811,6 +2934,53 @@ dependencies = [ "serde", ] +[[package]] +name = "quinn" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.10", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +dependencies = [ + "bytes", + "rand", + "ring 0.17.8", + "rustc-hash", + "rustls 0.23.10", + "slab", + "thiserror", + "tinyvec", + "tracing", +] + +[[package]] +name = "quinn-udp" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +dependencies = [ + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.36" @@ -2862,9 +3032,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ "bitflags 2.5.0", ] @@ -2917,9 +3087,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" dependencies = [ "base64 0.22.1", "bytes", @@ -2930,7 +3100,7 @@ dependencies = [ "http-body 1.0.0", "http-body-util", "hyper 1.3.1", - "hyper-rustls 0.26.0", + "hyper-rustls 0.27.2", "hyper-util", "ipnet", "js-sys", @@ -2939,7 +3109,8 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.22.4", + "quinn", + "rustls 0.23.10", "rustls-native-certs 0.7.0", "rustls-pemfile 2.1.2", "rustls-pki-types", @@ -2948,7 +3119,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls 0.26.0", "tokio-util", "tower-service", "url", @@ -3027,6 +3198,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.4.0" @@ -3063,11 +3240,11 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.4" +version = "0.23.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" dependencies = [ - "log", + "once_cell", "ring 0.17.8", "rustls-pki-types", "rustls-webpki", @@ -3395,6 +3572,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -3484,9 +3667,20 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] [[package]] name = "tempfile" @@ -3591,6 +3785,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -3649,11 +3853,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.22.4", + "rustls 0.23.10", "rustls-pki-types", "tokio", ] @@ -3784,27 +3988,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -3831,9 +4020,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", "idna", @@ -3846,6 +4035,18 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -4196,6 +4397,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xmlparser" version = "0.13.6" @@ -4211,6 +4424,30 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -4231,12 +4468,55 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "zstd" version = "0.12.4" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 8f4b3cd81f36..b4883264731e 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -62,3 +62,18 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.17" + +## Temporary arrow-rs patch until 52.1.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 3daf347ae4ff..96bf4216d9a1 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -5446,16 +5446,12 @@ mod tests { DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), ); - // needs https://github.com/apache/arrow-rs/issues/5893 - /* check_scalar_cast(ScalarValue::Utf8(None), DataType::Utf8View); check_scalar_cast(ScalarValue::from("foo"), DataType::Utf8View); check_scalar_cast( ScalarValue::from("larger than 12 bytes string"), DataType::Utf8View, ); - - */ } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 615bb3ac568c..d7cb4b1a3ef6 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -1078,7 +1078,7 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Microsecond, (l, r) => { assert_eq!(l, r); - l.clone() + *l } }; diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 5f060a4a4f16..40693fc03236 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -607,11 +607,11 @@ fn coerced_from<'a>( (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => { match type_from { Timestamp(_, Some(from_tz)) => { - Some(Timestamp(unit.clone(), Some(from_tz.clone()))) + Some(Timestamp(*unit, Some(from_tz.clone()))) } Null | Date32 | Utf8 | LargeUtf8 | Timestamp(_, None) => { // In the absence of any other information assume the time zone is "+00" (UTC). - Some(Timestamp(unit.clone(), Some("+00".into()))) + Some(Timestamp(*unit, Some("+00".into()))) } _ => None, } diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index e777e5ea95d0..997f1a36ad04 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -57,35 +57,35 @@ impl DateBinFunc { vec![ Exact(vec![ DataType::Interval(MonthDayNano), - Timestamp(array_type.clone(), None), + Timestamp(array_type, None), Timestamp(Nanosecond, None), ]), Exact(vec![ DataType::Interval(MonthDayNano), - Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())), + Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())), Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), ]), Exact(vec![ DataType::Interval(DayTime), - Timestamp(array_type.clone(), None), + Timestamp(array_type, None), Timestamp(Nanosecond, None), ]), Exact(vec![ DataType::Interval(DayTime), - Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())), + Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())), Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())), ]), Exact(vec![ DataType::Interval(MonthDayNano), - Timestamp(array_type.clone(), None), + Timestamp(array_type, None), ]), Exact(vec![ DataType::Interval(MonthDayNano), - Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())), + Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())), ]), Exact(vec![ DataType::Interval(DayTime), - Timestamp(array_type.clone(), None), + Timestamp(array_type, None), ]), Exact(vec![ DataType::Interval(DayTime), From 507d978a3b2b9fe873239ae2d4640286e423086a Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Wed, 19 Jun 2024 19:38:03 +0800 Subject: [PATCH 2/7] feat: Implement equality = and inequality <> support for StringView (#10985) * feat: Implement equality = and inequality <> support for StringView * chore: Add tests for the StringView * chore * chore: Update tests for NULL * fix: Used build_array_string! * chore: Update string_coercion function to handle Utf8View type in binary.rs * chore: add tests * chore: ci --- Cargo.toml | 24 ++-- datafusion-cli/Cargo.lock | 30 ++--- datafusion-cli/Cargo.toml | 22 ++-- datafusion/common/src/scalar/mod.rs | 2 +- datafusion/expr/src/type_coercion/binary.rs | 1 + .../sqllogictest/test_files/string_view.slt | 113 ++++++++++++++++++ 6 files changed, 153 insertions(+), 39 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/string_view.slt diff --git a/Cargo.toml b/Cargo.toml index 290dd64021b7..be6e0c672f6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -157,15 +157,15 @@ unused_imports = "deny" ## Temporary arrow-rs patch until 52.1.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index b0b41a12328d..15f7809ee5f5 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -131,7 +131,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-arith", "arrow-array", @@ -151,7 +151,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -165,7 +165,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "ahash", "arrow-buffer", @@ -181,7 +181,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "bytes", "half", @@ -191,7 +191,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,7 +211,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,7 +229,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-buffer", "arrow-schema", @@ -240,7 +240,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -254,7 +254,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -273,7 +273,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -287,7 +287,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "ahash", "arrow-array", @@ -301,12 +301,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" [[package]] name = "arrow-select" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "ahash", "arrow-array", @@ -319,7 +319,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "arrow-array", "arrow-buffer", @@ -2704,7 +2704,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=72467c670f8c38130e4743347407f1a542e59e0c#72467c670f8c38130e4743347407f1a542e59e0c" +source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index b4883264731e..0e7b712d8b19 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -66,14 +66,14 @@ rstest = "0.17" ## Temporary arrow-rs patch until 52.1.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 96bf4216d9a1..86ac115cca02 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -1570,6 +1570,7 @@ impl ScalarValue { DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16), DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32), DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64), + DataType::Utf8View => build_array_string!(StringViewArray, Utf8View), DataType::Utf8 => build_array_string!(StringArray, Utf8), DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8), DataType::Binary => build_array_string!(BinaryArray, Binary), @@ -1726,7 +1727,6 @@ impl ScalarValue { | DataType::Time64(TimeUnit::Millisecond) | DataType::Map(_, _) | DataType::RunEndEncoded(_, _) - | DataType::Utf8View | DataType::BinaryView | DataType::ListView(_) | DataType::LargeListView(_) => { diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index d7cb4b1a3ef6..d57b5228cb74 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -932,6 +932,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option (LargeUtf8, Utf8) => Some(LargeUtf8), (Utf8, LargeUtf8) => Some(LargeUtf8), (LargeUtf8, LargeUtf8) => Some(LargeUtf8), + (Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) => Some(Utf8View), _ => None, } } diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt new file mode 100644 index 000000000000..3be3c94770db --- /dev/null +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -0,0 +1,113 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# test StringViewArray with Utf8View columns +statement ok +create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')), + (arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')), + (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')), + (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View')); + +query B +select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); +---- +false + +query B +select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); +---- +true + +query B +select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); +---- +true + +query B +select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); +---- +true + +query ?? +select * from test where column1 = column2; +---- +Xiangpeng Xiangpeng + +query ?? +select * from test where column1 <> column2; +---- +Andrew X +Raphael R + +query ?? +select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +---- +Andrew X + +query ?? +select * from test where column1 = 'Andrew'; +---- +Andrew X + +query ?? +select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +---- +Xiangpeng Xiangpeng +Raphael R + +query ?? +select * from test where column1 <> 'Andrew'; +---- +Xiangpeng Xiangpeng +Raphael R + +statement ok +drop table test; + + +# test StringViewArray with Utf8 and Utf8View columns +statement ok +create table test as values ('Andrew', arrow_cast('X', 'Utf8View')), + ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')), + ('Raphael', arrow_cast('R', 'Utf8View')), + (NULL, arrow_cast('R', 'Utf8View')); + +query T? +select * from test where column1 = column2; +---- +Xiangpeng Xiangpeng + +query T? +select * from test where column1 <> column2; +---- +Andrew X +Raphael R + +query T? +select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +---- +Andrew X + +query T? +select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +---- +Xiangpeng Xiangpeng +Raphael R + +statement ok +drop table test; From 5b4c365e11f23fdd6f3aaee9c336bdfd8baa30f8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Jun 2024 10:28:59 -0400 Subject: [PATCH 3/7] Add more StringView comparison test coverage (#10997) * Add more StringView comparison test coverage * add reference * Add another test showing casting on columns works correctly --- .../sqllogictest/test_files/string_view.slt | 270 ++++++++++++++---- 1 file changed, 211 insertions(+), 59 deletions(-) diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 3be3c94770db..f8824b23d1b9 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -15,99 +15,251 @@ # specific language governing permissions and limitations # under the License. +######## +## Test setup +######## -# test StringViewArray with Utf8View columns statement ok -create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')), - (arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')), - (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')), - (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View')); +create table test_source as values + ('Andrew', 'X'), + ('Xiangpeng', 'Xiangpeng'), + ('Raphael', 'R'), + (NULL, 'R') +; -query B -select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); ----- -false +# Table with the different combination of column types +statement ok +create table test as +SELECT + arrow_cast(column1, 'Utf8') as column1_utf8, + arrow_cast(column2, 'Utf8') as column2_utf8, + arrow_cast(column1, 'Utf8View') as column1_utf8view, + arrow_cast(column2, 'Utf8View') as column2_utf8view, + arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict, + arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict +FROM test_source; -query B -select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); ----- -true +statement ok +drop table test_source -query B -select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); ----- -true +######## +## StringView to StringView +######## -query B -select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); +# StringView scalar to StringView scalar + +query BBBB +select + arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); ---- -true +false true true true + + +# StringView column to StringView column comparison as filters -query ?? -select * from test where column1 = column2; +query TT +select column1_utf8, column2_utf8 from test where column1_utf8view = column2_utf8view; ---- Xiangpeng Xiangpeng -query ?? -select * from test where column1 <> column2; +query TT +select column1_utf8, column2_utf8 from test where column1_utf8view <> column2_utf8view; ---- Andrew X Raphael R -query ?? -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +# StringView column to StringView column +query TTBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_utf8view, + column1_utf8view <> column2_utf8view +from test; ---- -Andrew X +Andrew X false true +Xiangpeng Xiangpeng true false +Raphael R false true +NULL R NULL NULL -query ?? -select * from test where column1 = 'Andrew'; +# StringView column to StringView scalar comparison +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_utf8view +from test; ---- -Andrew X +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL -query ?? -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +######## +## StringView to String +######## + +# test StringViewArray with Utf8 columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_utf8, + column2_utf8 = column1_utf8view, + column1_utf8view <> column2_utf8, + column2_utf8 <> column1_utf8view +from test; ---- -Xiangpeng Xiangpeng -Raphael R +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL -query ?? -select * from test where column1 <> 'Andrew'; +# StringView column to String scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Utf8'), + arrow_cast('Andrew', 'Utf8') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Utf8'), + arrow_cast('Andrew', 'Utf8') <> column1_utf8view +from test; ---- -Xiangpeng Xiangpeng -Raphael R +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL -statement ok -drop table test; +# String column to StringView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8 = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_utf8, + column1_utf8 <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_utf8 +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + + +######## +## StringView to Dictionary +######## + +# test StringViewArray with Dictionary columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_dict, + column2_dict = column1_utf8view, + column1_utf8view <> column2_dict, + column2_dict <> column1_utf8view +from test; +---- +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# StringView column to Dict scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# Dict column to StringView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_dict = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_dict, + column1_dict <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_dict +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + + +######## +## Coercion Rules +######## -# test StringViewArray with Utf8 and Utf8View columns statement ok -create table test as values ('Andrew', arrow_cast('X', 'Utf8View')), - ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')), - ('Raphael', arrow_cast('R', 'Utf8View')), - (NULL, arrow_cast('R', 'Utf8View')); +set datafusion.explain.logical_plan_only = true; -query T? -select * from test where column1 = column2; + +# Filter should have a StringView literal and no column cast +query TT +explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew'; ---- -Xiangpeng Xiangpeng +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] -query T? -select * from test where column1 <> column2; +# reverse order should be the same +query TT +explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view; ---- -Andrew X -Raphael R +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] -query T? -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +# should not be casting the column: https://github.com/apache/datafusion/issues/10998 +query TT +explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View'); ---- -Andrew X +logical_plan +01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew") +02)--TableScan: test projection=[column1_utf8] -query T? -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +query TT +explain SELECT column1_utf8 from test where column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'); ---- -Xiangpeng Xiangpeng -Raphael R +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] + +# compare string / stringview +# Should cast string -> stringview (which is cheap), not stringview -> string (which is not) +query TT +explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8; +---- +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View) +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] + +query TT +explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view; +---- +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] + statement ok drop table test; From 959856be77ae131e232b40be0ccd2357f4926458 Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Thu, 20 Jun 2024 18:19:39 +0800 Subject: [PATCH 4/7] feat: Implement equality = and inequality <> support for BinaryView (#11004) * feat: Implement equality = and inequality <> support for BinaryView Signed-off-by: Chojan Shang * chore: make fmt happy Signed-off-by: Chojan Shang --------- Signed-off-by: Chojan Shang --- datafusion/common/src/scalar/mod.rs | 2 +- datafusion/expr/src/type_coercion/binary.rs | 3 + .../sqllogictest/test_files/binary_view.slt | 154 ++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 datafusion/sqllogictest/test_files/binary_view.slt diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 86ac115cca02..e163fb68db2b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -1573,6 +1573,7 @@ impl ScalarValue { DataType::Utf8View => build_array_string!(StringViewArray, Utf8View), DataType::Utf8 => build_array_string!(StringArray, Utf8), DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8), + DataType::BinaryView => build_array_string!(BinaryViewArray, BinaryView), DataType::Binary => build_array_string!(BinaryArray, Binary), DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary), DataType::Date32 => build_array_primitive!(Date32Array, Date32), @@ -1727,7 +1728,6 @@ impl ScalarValue { | DataType::Time64(TimeUnit::Millisecond) | DataType::Map(_, _) | DataType::RunEndEncoded(_, _) - | DataType::BinaryView | DataType::ListView(_) | DataType::LargeListView(_) => { return _internal_err!( diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index d57b5228cb74..694e5e13f9e1 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -991,6 +991,9 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary), (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary) | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary), + (BinaryView, BinaryView) | (BinaryView, Binary) | (Binary, BinaryView) => { + Some(BinaryView) + } _ => None, } } diff --git a/datafusion/sqllogictest/test_files/binary_view.slt b/datafusion/sqllogictest/test_files/binary_view.slt new file mode 100644 index 000000000000..2728d4803ce7 --- /dev/null +++ b/datafusion/sqllogictest/test_files/binary_view.slt @@ -0,0 +1,154 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +######## +## Test setup +######## + +statement ok +create table test_source as values + ('Andrew', 'X'), + ('Xiangpeng', 'Xiangpeng'), + ('Raphael', 'R'), + (NULL, 'R') +; + +# Table with the different combination of column types +statement ok +CREATE TABLE test AS +SELECT + arrow_cast(column1, 'Utf8') as column1_utf8, + arrow_cast(column2, 'Utf8') as column2_utf8, + arrow_cast(column1, 'Binary') AS column1_binary, + arrow_cast(column2, 'Binary') AS column2_binary, + arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS column1_binaryview, + arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS column2_binaryview, + arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict, + arrow_cast(column2, 'Dictionary(Int32, Binary)') AS column2_dict +FROM test_source; + +statement ok +drop table test_source + +######## +## BinaryView to BinaryView +######## + +# BinaryView scalar to BinaryView scalar + +query BBBB +SELECT + arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison1, + arrow_cast(arrow_cast('NULL', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison2, + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison3, + arrow_cast(arrow_cast('Xiangpeng', 'Binary'), 'BinaryView') <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') AS comparison4; +---- +false true true true + + +# BinaryView column to BinaryView column comparison as filters + +query TT +select column1_utf8, column2_utf8 from test where column1_binaryview = column2_binaryview; +---- +Xiangpeng Xiangpeng + +query TT +select column1_utf8, column2_utf8 from test where column1_binaryview <> column2_binaryview; +---- +Andrew X +Raphael R + +# BinaryView column to BinaryView column +query TTBB +select + column1_utf8, column2_utf8, + column1_binaryview = column2_binaryview, + column1_binaryview <> column2_binaryview +from test; +---- +Andrew X false true +Xiangpeng Xiangpeng true false +Raphael R false true +NULL R NULL NULL + +# BinaryView column to BinaryView scalar comparison +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binaryview = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binaryview, + column1_binaryview <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binaryview +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + +######## +## BinaryView to Binary +######## + +# test BinaryViewArray with Binary columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binaryview = column2_binary, + column2_binary = column1_binaryview, + column1_binaryview <> column2_binary, + column2_binary <> column1_binaryview +from test; +---- +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# BinaryView column to Binary scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binaryview = arrow_cast('Andrew', 'Binary'), + arrow_cast('Andrew', 'Binary') = column1_binaryview, + column1_binaryview <> arrow_cast('Andrew', 'Binary'), + arrow_cast('Andrew', 'Binary') <> column1_binaryview +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# Binary column to BinaryView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binary = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_binary, + column1_binary <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_binary +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + +statement ok +drop table test; \ No newline at end of file From 19ed182918b102bf0b67b59e3359adae85839a01 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Fri, 21 Jun 2024 12:18:33 -0400 Subject: [PATCH 5/7] Implement support for LargeString and LargeBinary for StringView and BinaryView (#11034) * implement large binary * add tests for large string * better comments for string coercion --- datafusion/expr/src/type_coercion/binary.rs | 36 +++++++++----- .../sqllogictest/test_files/binary_view.slt | 48 +++++++++++++++++++ .../sqllogictest/test_files/string_view.slt | 47 ++++++++++++++++++ 3 files changed, 119 insertions(+), 12 deletions(-) diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 694e5e13f9e1..36100a0b727c 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -922,17 +922,21 @@ fn string_concat_internal_coercion( } } -/// Coercion rules for string types (Utf8/LargeUtf8): If at least one argument is -/// a string type and both arguments can be coerced into a string type, coerce -/// to string type. +/// Coercion rules for string view types (Utf8/LargeUtf8/Utf8View): +/// If at least one argument is a string view, we coerce to string view +/// based on the observation that StringArray to StringViewArray is cheap but not vice versa. +/// +/// Between Utf8 and LargeUtf8, we coerce to LargeUtf8. fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { + // If Utf8View is in any side, we coerce to Utf8View. + (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => { + Some(Utf8View) + } + // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8. + (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8), (Utf8, Utf8) => Some(Utf8), - (LargeUtf8, Utf8) => Some(LargeUtf8), - (Utf8, LargeUtf8) => Some(LargeUtf8), - (LargeUtf8, LargeUtf8) => Some(LargeUtf8), - (Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) => Some(Utf8View), _ => None, } } @@ -982,18 +986,26 @@ fn binary_to_string_coercion( } } -/// Coercion rules for binary types (Binary/LargeBinary): If at least one argument is +/// Coercion rules for binary types (Binary/LargeBinary/BinaryView): If at least one argument is /// a binary type and both arguments can be coerced into a binary type, coerce /// to binary type. fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary), - (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary) - | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary), - (BinaryView, BinaryView) | (BinaryView, Binary) | (Binary, BinaryView) => { + // If BinaryView is in any side, we coerce to BinaryView. + (BinaryView, BinaryView | Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View) + | (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, BinaryView) => { Some(BinaryView) } + // Prefer LargeBinary over Binary + (LargeBinary | Binary | Utf8 | LargeUtf8 | Utf8View, LargeBinary) + | (LargeBinary, Binary | Utf8 | LargeUtf8 | Utf8View) => Some(LargeBinary), + + // If Utf8View/LargeUtf8 presents need to be large Binary + (Utf8View | LargeUtf8, Binary) | (Binary, Utf8View | LargeUtf8) => { + Some(LargeBinary) + } + (Binary, Utf8) | (Utf8, Binary) => Some(Binary), _ => None, } } diff --git a/datafusion/sqllogictest/test_files/binary_view.slt b/datafusion/sqllogictest/test_files/binary_view.slt index 2728d4803ce7..de0f0bea7ffb 100644 --- a/datafusion/sqllogictest/test_files/binary_view.slt +++ b/datafusion/sqllogictest/test_files/binary_view.slt @@ -35,6 +35,8 @@ SELECT arrow_cast(column2, 'Utf8') as column2_utf8, arrow_cast(column1, 'Binary') AS column1_binary, arrow_cast(column2, 'Binary') AS column2_binary, + arrow_cast(column1, 'LargeBinary') AS column1_large_binary, + arrow_cast(column2, 'LargeBinary') AS column2_large_binary, arrow_cast(arrow_cast(column1, 'Binary'), 'BinaryView') AS column1_binaryview, arrow_cast(arrow_cast(column2, 'Binary'), 'BinaryView') AS column2_binaryview, arrow_cast(column1, 'Dictionary(Int32, Binary)') AS column1_dict, @@ -120,6 +122,21 @@ Xiangpeng Xiangpeng true true false false Raphael R false false true true NULL R NULL NULL NULL NULL +# test BinaryViewArray with LargeBinary columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binaryview = column2_large_binary, + column2_large_binary = column1_binaryview, + column1_binaryview <> column2_large_binary, + column2_large_binary <> column1_binaryview +from test; +---- +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL + # BinaryView column to Binary scalar query TTBBBB select @@ -135,6 +152,21 @@ Xiangpeng Xiangpeng false false true true Raphael R false false true true NULL R NULL NULL NULL NULL +# BinaryView column to LargeBinary scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_binaryview = arrow_cast('Andrew', 'LargeBinary'), + arrow_cast('Andrew', 'LargeBinary') = column1_binaryview, + column1_binaryview <> arrow_cast('Andrew', 'LargeBinary'), + arrow_cast('Andrew', 'LargeBinary') <> column1_binaryview +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + # Binary column to BinaryView scalar query TTBBBB select @@ -150,5 +182,21 @@ Xiangpeng Xiangpeng false false true true Raphael R false false true true NULL R NULL NULL NULL NULL + +# LargeBinary column to BinaryView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_large_binary = arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') = column1_large_binary, + column1_large_binary <> arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView'), + arrow_cast(arrow_cast('Andrew', 'Binary'), 'BinaryView') <> column1_large_binary +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + statement ok drop table test; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index f8824b23d1b9..7c9fbf4735fb 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -33,6 +33,8 @@ create table test as SELECT arrow_cast(column1, 'Utf8') as column1_utf8, arrow_cast(column2, 'Utf8') as column2_utf8, + arrow_cast(column1, 'LargeUtf8') as column1_large_utf8, + arrow_cast(column2, 'LargeUtf8') as column2_large_utf8, arrow_cast(column1, 'Utf8View') as column1_utf8view, arrow_cast(column2, 'Utf8View') as column2_utf8view, arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict, @@ -118,6 +120,22 @@ Xiangpeng Xiangpeng true true false false Raphael R false false true true NULL R NULL NULL NULL NULL +# test StringViewArray with LargeUtf8 columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_large_utf8, + column2_large_utf8 = column1_utf8view, + column1_utf8view <> column2_large_utf8, + column2_large_utf8 <> column1_utf8view +from test; +---- +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL + + # StringView column to String scalar query TTBBBB select @@ -133,6 +151,21 @@ Xiangpeng Xiangpeng false false true true Raphael R false false true true NULL R NULL NULL NULL NULL +# StringView column to LargeString scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'LargeUtf8'), + arrow_cast('Andrew', 'LargeUtf8') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'LargeUtf8'), + arrow_cast('Andrew', 'LargeUtf8') <> column1_utf8view +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + # String column to StringView scalar query TTBBBB select @@ -148,6 +181,20 @@ Xiangpeng Xiangpeng false false true true Raphael R false false true true NULL R NULL NULL NULL NULL +# LargeString column to StringView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_large_utf8 = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_large_utf8, + column1_large_utf8 <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_large_utf8 +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL ######## ## StringView to Dictionary From 9e6cd31a1fffe58ec71bcbcd645b6d672b6b7309 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Thu, 27 Jun 2024 01:31:42 +0800 Subject: [PATCH 6/7] Improve filter predicates with `Utf8View` literals (#11043) * refactor: Improve type coercion logic in TypeCoercionRewriter * refactor: Improve type coercion logic in TypeCoercionRewriter * chore * chore: Update test * refactor: Improve type coercion logic in TypeCoercionRewriter * refactor: Remove unused import and update code formatting in unwrap_cast_in_comparison.rs --- .../src/unwrap_cast_in_comparison.rs | 26 +++++++------------ .../sqllogictest/test_files/string_view.slt | 18 +++++++++++-- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs index 07a946c1add9..a43c64a813b8 100644 --- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs +++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs @@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result, ScalarValue}; use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast}; use datafusion_expr::utils::merge_schema; -use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator}; +use datafusion_expr::{lit, Expr, ExprSchemable, LogicalPlan}; /// [`UnwrapCastInComparison`] attempts to remove casts from /// comparisons to literals ([`ScalarValue`]s) by applying the casts @@ -154,7 +154,7 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { }; is_supported_type(&left_type) && is_supported_type(&right_type) - && is_comparison_op(op) + && op.is_comparison_operator() } => { match (left.as_mut(), right.as_mut()) { @@ -270,18 +270,6 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { } } -fn is_comparison_op(op: &Operator) -> bool { - matches!( - op, - Operator::Eq - | Operator::NotEq - | Operator::Gt - | Operator::GtEq - | Operator::Lt - | Operator::LtEq - ) -} - /// Returns true if [UnwrapCastExprRewriter] supports this data type fn is_supported_type(data_type: &DataType) -> bool { is_supported_numeric_type(data_type) @@ -308,7 +296,10 @@ fn is_supported_numeric_type(data_type: &DataType) -> bool { /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a string fn is_supported_string_type(data_type: &DataType) -> bool { - matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) + matches!( + data_type, + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View + ) } /// Returns true if [UnwrapCastExprRewriter] supports casting this value as a dictionary @@ -481,12 +472,15 @@ fn try_cast_string_literal( target_type: &DataType, ) -> Option { let string_value = match lit_value { - ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => s.clone(), + ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) | ScalarValue::Utf8View(s) => { + s.clone() + } _ => return None, }; let scalar_value = match target_type { DataType::Utf8 => ScalarValue::Utf8(string_value), DataType::LargeUtf8 => ScalarValue::LargeUtf8(string_value), + DataType::Utf8View => ScalarValue::Utf8View(string_value), _ => return None, }; Some(scalar_value) diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 7c9fbf4735fb..3ba4e271c2f6 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -273,12 +273,18 @@ logical_plan 02)--Filter: test.column1_utf8view = Utf8View("Andrew") 03)----TableScan: test projection=[column1_utf8, column1_utf8view] -# should not be casting the column: https://github.com/apache/datafusion/issues/10998 query TT explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View'); ---- logical_plan -01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew") +01)Filter: test.column1_utf8 = Utf8("Andrew") +02)--TableScan: test projection=[column1_utf8] + +query TT +explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Utf8View') = column1_utf8; +---- +logical_plan +01)Filter: test.column1_utf8 = Utf8("Andrew") 02)--TableScan: test projection=[column1_utf8] query TT @@ -289,6 +295,14 @@ logical_plan 02)--Filter: test.column1_utf8view = Utf8View("Andrew") 03)----TableScan: test projection=[column1_utf8, column1_utf8view] +query TT +explain SELECT column1_utf8 from test where arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view; +---- +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] + # compare string / stringview # Should cast string -> stringview (which is cheap), not stringview -> string (which is not) query TT From 2f0a7ecedad3805aa948d613a2d96c2744cd560a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 10 Jul 2024 17:03:08 -0400 Subject: [PATCH 7/7] Remove arrow-patch --- Cargo.toml | 16 ------------ datafusion-cli/Cargo.lock | 55 --------------------------------------- datafusion-cli/Cargo.toml | 15 ----------- 3 files changed, 86 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c0cca4c7f572..6dd434abc87c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,19 +153,3 @@ large_futures = "warn" [workspace.lints.rust] unused_imports = "deny" - -## Temporary arrow-rs patch until 52.1.0 is released - -[patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 5f6f6960c6b6..8af42cb43932 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -4344,58 +4344,3 @@ dependencies = [ "cc", "pkg-config", ] - -[[patch.unused]] -name = "arrow" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-array" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-buffer" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-cast" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-data" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-ipc" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-ord" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-schema" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-select" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "arrow-string" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" - -[[patch.unused]] -name = "parquet" -version = "52.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=d0a88c651991b7fc4b970cf94fa77f4ec3def22d#d0a88c651991b7fc4b970cf94fa77f4ec3def22d" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index d0085c228db4..860dc123fa94 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -62,18 +62,3 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.17" - -## Temporary arrow-rs patch until 52.1.0 is released - -[patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }