From 22d7d6e33550357b82716c327228b5f2901c146d Mon Sep 17 00:00:00 2001 From: kazdy Date: Sat, 21 Sep 2024 13:12:20 +0200 Subject: [PATCH 1/4] build(deps): Upgrade datafusion from 0.41 to 0.42 --- python/Cargo.toml | 4 ++-- python/pyproject.toml | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index e992c70..9033667 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -46,5 +46,5 @@ futures = { workspace = true } tokio = { workspace = true } [dependencies.pyo3] -version = "0.21.2" -features = ["extension-module", "abi3", "abi3-py38", "gil-refs", "anyhow"] +version = "0.22.2" +features = ["extension-module", "abi3", "abi3-py39", "anyhow"] diff --git a/python/pyproject.toml b/python/pyproject.toml index 367cf46..9d076da 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -23,19 +23,18 @@ build-backend = "maturin" name = "hudi" description = "Native Python binding for Apache Hudi, based on hudi-rs." urls = { repository = "https://github.com/apache/hudi-rs/tree/main/python/" } -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["apachehudi", "hudi", "datalake", "arrow"] license = "Apache License 2.0" classifiers = [ "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12" ] dependencies = [ - "pyarrow>=8", + "pyarrow>=17", "pyarrow-hotfix", ] @@ -52,7 +51,7 @@ dynamic = ["version"] module-name = "hudi._internal" [tool.ruff] -target-version = 'py38' +target-version = 'py39' # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. lint.select = [ "E4", From c9d0d04683f02cd62202c74d170781e3b700b3c8 Mon Sep 17 00:00:00 2001 From: kazdy Date: Sat, 21 Sep 2024 13:22:37 +0200 Subject: [PATCH 2/4] build(deps): Upgrade datafusion from 0.41 to 0.42 --- Cargo.toml | 34 +++++++++++++++++----------------- crates/datafusion/src/lib.rs | 10 +++++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a48d003..024ba94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,25 +35,25 @@ repository = "https://github.com/apache/hudi-rs" [workspace.dependencies] # arrow -arrow = { version = "= 52.2.0", features = ["pyarrow"] } -arrow-arith = { version = "= 52.2.0" } -arrow-array = { version = "= 52.2.0" } -arrow-buffer = { version = "= 52.2.0" } -arrow-cast = { version = "= 52.2.0" } -arrow-ipc = { version = "= 52.2.0" } -arrow-json = { version = "= 52.2.0" } -arrow-ord = { version = "= 52.2.0" } -arrow-row = { version = "= 52.2.0" } -arrow-schema = { version = "= 52.2.0", features = ["serde"] } -arrow-select = { version = "= 52.2.0" } -object_store = { version = "= 0.10.2", features = ["aws", "azure", "gcp"] } -parquet = { version = "= 52.2.0", features = ["async", "object_store"] } +arrow = { version = "= 53.0.0", features = ["pyarrow"] } +arrow-arith = { version = "= 53.0.0" } +arrow-array = { version = "= 53.0.0" } +arrow-buffer = { version = "= 53.0.0" } +arrow-cast = { version = "= 53.0.0" } +arrow-ipc = { version = "= 53.0.0" } +arrow-json = { version = "= 53.0.0" } +arrow-ord = { version = "= 53.0.0" } +arrow-row = { version = "= 53.0.0" } +arrow-schema = { version = "= 53.0.0", features = ["serde"] } +arrow-select = { version = "= 53.0.0" } +object_store = { version = "= 0.11.0", features = ["aws", "azure", "gcp"] } +parquet = { version = "= 53.0.0", features = ["async", "object_store"] } # datafusion -datafusion = { version = "= 41.0.0" } -datafusion-expr = { version = "= 41.0.0" } -datafusion-common = { version = "= 41.0.0" } -datafusion-physical-expr = { version = "= 41.0.0" } +datafusion = { version = "= 42.0.0" } +datafusion-expr = { version = "= 42.0.0" } +datafusion-common = { version = "= 42.0.0" } +datafusion-physical-expr = { version = "= 42.0.0" } # serde serde = { version = "1.0.203", features = ["derive"] } diff --git a/crates/datafusion/src/lib.rs b/crates/datafusion/src/lib.rs index 8c4168f..3e2fdaf 100644 --- a/crates/datafusion/src/lib.rs +++ b/crates/datafusion/src/lib.rs @@ -178,7 +178,7 @@ mod tests { ) -> SessionContext { let config = SessionConfig::new().set( "datafusion.sql_parser.enable_ident_normalization", - ScalarValue::from(false), + &ScalarValue::from(false), ); let ctx = SessionContext::new_with_config(config); let base_url = test_table.url(); @@ -201,16 +201,16 @@ mod tests { let explaining_rb = explaining_rb.first().unwrap(); let plan = get_str_column(explaining_rb, "plan").join(""); let plan_lines: Vec<&str> = plan.lines().map(str::trim).collect(); - assert!(plan_lines[2].starts_with("SortExec: TopK(fetch=10)")); - assert!(plan_lines[3].starts_with(&format!( + assert!(plan_lines[1].starts_with("SortExec: TopK(fetch=10)")); + assert!(plan_lines[2].starts_with(&format!( "ProjectionExec: expr=[id@0 as id, name@1 as name, isActive@2 as isActive, \ get_field(structField@3, field2) as {}.structField[field2]]", table_name ))); - assert!(plan_lines[5].starts_with( + assert!(plan_lines[4].starts_with( "FilterExec: CAST(id@0 AS Int64) % 2 = 0 AND get_field(structField@3, field2) > 30" )); - assert!(plan_lines[6].contains(&format!("input_partitions={}", planned_input_partitioned))); + assert!(plan_lines[5].contains(&format!("input_partitions={}", planned_input_partitioned))); } async fn verify_data(ctx: &SessionContext, sql: &str, table_name: &str) { From 94b0b69c19b4254be9ad45474af88f79ad45ab7c Mon Sep 17 00:00:00 2001 From: kazdy Date: Sun, 22 Sep 2024 11:37:27 +0200 Subject: [PATCH 3/4] chore: use python 3.9 in ci --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7061dd7..d7e08f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,10 +77,10 @@ jobs: matrix: # TODO: add windows os: [ ubuntu-22.04, macos-14 ] - python-version: [ '3.8', '3.12' ] + python-version: [ '3.9', '3.12' ] exclude: - os: macos-14 - python-version: '3.8' + python-version: '3.9' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From 12464895b3e13337d2673d9b06dc290afddf2bac Mon Sep 17 00:00:00 2001 From: kazdy Date: Sun, 22 Sep 2024 19:24:45 +0200 Subject: [PATCH 4/4] build(deps) use pyarrow >=11 --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 9d076da..de68f5f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ "Programming Language :: Python :: 3.12" ] dependencies = [ - "pyarrow>=17", + "pyarrow>=11.0.0", "pyarrow-hotfix", ]