Skip to content

Commit

Permalink
Upgrade deps to datafusion 41 (#802)
Browse files Browse the repository at this point in the history
* update datafusion deps to point to githuc.com/apache/datafusion

Datafusion 41 is not yet released on crates.io.

* update TableProvider::scan

Ref: apache/datafusion#11516

* use SessionStateBuilder

The old constructor is deprecated.

Ref: apache/datafusion#11403

* update AggregateFunction

Upstream Changes:
- The field name was switched from `func_name` to func.
- AggregateFunctionDefinition was removed

Ref: apache/datafusion#11803

* update imports in catalog

Catlog API was extracted to a separate crate.

Ref: apache/datafusion#11516

* use appropriate path for approx_distinct

Ref: apache/datafusion#11644

* migrate AggregateExt to ExprFunctionExt

Also removed `sqlparser` dependency since it's re-exported upstream.

Ref: apache/datafusion#11550

* update regr_count tests for new return type

Ref: apache/datafusion#11731

* migrate from function-array to functions-nested

The package was renamed upstream.

Ref: apache/datafusion#11602

* cargo fmt

* lock datafusion deps to 41

* remove todo from cargo.toml

All the datafusion dependencies are re-exported, but I still need to figure out *why*.
  • Loading branch information
Michael-J-Ward authored Aug 23, 2024
1 parent 805183b commit 3b5085e
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 95 deletions.
108 changes: 72 additions & 36 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync
rand = "0.8"
pyo3 = { version = "0.21", features = ["extension-module", "abi3", "abi3-py38"] }
arrow = { version = "52", feature = ["pyarrow"] }
datafusion = { version = "40.0.0", features = ["pyarrow", "avro", "unicode_expressions"] }
datafusion-common = { version = "40.0.0", features = ["pyarrow"] }
datafusion-expr = "40.0.0"
datafusion-functions-array = "40.0.0"
datafusion-optimizer = "40.0.0"
datafusion-sql = "40.0.0"
datafusion-substrait = { version = "40.0.0", optional = true }
datafusion = { version = "41.0.0", features = ["pyarrow", "avro", "unicode_expressions"] }
datafusion-common = { version = "41.0.0", features = ["pyarrow"] }
datafusion-expr = { version = "41.0.0" }
datafusion-functions-nested = { version = "41.0.0" }
datafusion-optimizer = { version = "41.0.0" }
datafusion-sql = { version = "41.0.0" }
datafusion-substrait = { version = "41.0.0", optional = true }
prost = "0.12" # keep in line with `datafusion-substrait`
prost-types = "0.12" # keep in line with `datafusion-substrait`
uuid = { version = "1.9", features = ["v4"] }
Expand All @@ -56,7 +56,6 @@ parking_lot = "0.12"
regex-syntax = "0.8"
syn = "2.0.68"
url = "2"
sqlparser = "0.47.0"

[build-dependencies]
pyo3-build-config = "0.21"
Expand Down
6 changes: 3 additions & 3 deletions python/datafusion/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def test_regr_funcs_sql(df):

assert result[0].column(0) == pa.array([None], type=pa.float64())
assert result[0].column(1) == pa.array([None], type=pa.float64())
assert result[0].column(2) == pa.array([1], type=pa.float64())
assert result[0].column(2) == pa.array([1], type=pa.uint64())
assert result[0].column(3) == pa.array([None], type=pa.float64())
assert result[0].column(4) == pa.array([1], type=pa.float64())
assert result[0].column(5) == pa.array([1], type=pa.float64())
Expand Down Expand Up @@ -840,7 +840,7 @@ def test_regr_funcs_sql_2():
# Assertions for SQL results
assert result_sql[0].column(0) == pa.array([2], type=pa.float64())
assert result_sql[0].column(1) == pa.array([0], type=pa.float64())
assert result_sql[0].column(2) == pa.array([3], type=pa.float64()) # todo: i would not expect this to be float
assert result_sql[0].column(2) == pa.array([3], type=pa.uint64())
assert result_sql[0].column(3) == pa.array([1], type=pa.float64())
assert result_sql[0].column(4) == pa.array([2], type=pa.float64())
assert result_sql[0].column(5) == pa.array([4], type=pa.float64())
Expand All @@ -852,7 +852,7 @@ def test_regr_funcs_sql_2():
@pytest.mark.parametrize("func, expected", [
pytest.param(f.regr_slope, pa.array([2], type=pa.float64()), id="regr_slope"),
pytest.param(f.regr_intercept, pa.array([0], type=pa.float64()), id="regr_intercept"),
pytest.param(f.regr_count, pa.array([3], type=pa.float64()), id="regr_count"), # TODO: I would expect this to return an int array
pytest.param(f.regr_count, pa.array([3], type=pa.uint64()), id="regr_count"),
pytest.param(f.regr_r2, pa.array([1], type=pa.float64()), id="regr_r2"),
pytest.param(f.regr_avgx, pa.array([2], type=pa.float64()), id="regr_avgx"),
pytest.param(f.regr_avgy, pa.array([4], type=pa.float64()), id="regr_avgy"),
Expand Down
2 changes: 1 addition & 1 deletion src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::errors::DataFusionError;
use crate::utils::wait_for_future;
use datafusion::{
arrow::pyarrow::ToPyArrow,
catalog::{schema::SchemaProvider, CatalogProvider},
catalog::{CatalogProvider, SchemaProvider},
datasource::{TableProvider, TableType},
};

Expand Down
17 changes: 9 additions & 8 deletions src/common/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use datafusion::arrow::array::Array;
use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
use datafusion_common::{DataFusionError, ScalarValue};
use datafusion_expr::sqlparser::ast::NullTreatment as DFNullTreatment;
use pyo3::{exceptions::PyValueError, prelude::*};

use crate::errors::py_datafusion_err;
Expand Down Expand Up @@ -775,20 +776,20 @@ pub enum NullTreatment {
RESPECT_NULLS,
}

impl From<NullTreatment> for sqlparser::ast::NullTreatment {
fn from(null_treatment: NullTreatment) -> sqlparser::ast::NullTreatment {
impl From<NullTreatment> for DFNullTreatment {
fn from(null_treatment: NullTreatment) -> DFNullTreatment {
match null_treatment {
NullTreatment::IGNORE_NULLS => sqlparser::ast::NullTreatment::IgnoreNulls,
NullTreatment::RESPECT_NULLS => sqlparser::ast::NullTreatment::RespectNulls,
NullTreatment::IGNORE_NULLS => DFNullTreatment::IgnoreNulls,
NullTreatment::RESPECT_NULLS => DFNullTreatment::RespectNulls,
}
}
}

impl From<sqlparser::ast::NullTreatment> for NullTreatment {
fn from(null_treatment: sqlparser::ast::NullTreatment) -> NullTreatment {
impl From<DFNullTreatment> for NullTreatment {
fn from(null_treatment: DFNullTreatment) -> NullTreatment {
match null_treatment {
sqlparser::ast::NullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
sqlparser::ast::NullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
DFNullTreatment::IgnoreNulls => NullTreatment::IGNORE_NULLS,
DFNullTreatment::RespectNulls => NullTreatment::RESPECT_NULLS,
}
}
}
Loading

0 comments on commit 3b5085e

Please sign in to comment.