From 54135bc6fe87dea21edb8e6c4f78a5d15d0de369 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 6 Nov 2024 16:56:47 +0100 Subject: [PATCH] refactor(rust): Delegate feature flags for polars-stream (#19659) --- Cargo.lock | 1 - crates/polars-lazy/Cargo.toml | 26 +++++++++++++++---- crates/polars-python/Cargo.toml | 2 +- crates/polars-python/src/functions/lazy.rs | 1 + crates/polars-python/src/functions/misc.rs | 1 + crates/polars-python/src/lazyframe/general.rs | 1 + .../src/lazyframe/visitor/expr_nodes.rs | 2 +- .../src/lazyframe/visitor/nodes.rs | 1 + crates/polars-stream/Cargo.toml | 14 ++++++---- .../polars-stream/src/nodes/io_sinks/mod.rs | 1 + crates/polars-stream/src/nodes/mod.rs | 1 + crates/polars-stream/src/physical_plan/fmt.rs | 8 ++++++ .../src/physical_plan/lower_ir.rs | 2 ++ .../src/physical_plan/to_graph.rs | 2 ++ crates/polars/Cargo.toml | 1 + 15 files changed, 51 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 51d28defc357..5176bd831139 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3191,7 +3191,6 @@ dependencies = [ "polars-ops", "polars-parquet", "polars-plan", - "polars-stream", "polars-time", "polars-utils", "pyo3", diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 78f8274fb079..3f8c64dd1970 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -47,6 +47,7 @@ parquet = [ "polars-pipe?/parquet", "polars-expr/parquet", "polars-mem-engine/parquet", + "polars-stream?/parquet", ] async = [ "polars-plan/async", @@ -54,11 +55,26 @@ async = [ "polars-pipe?/async", "polars-mem-engine/async", ] -cloud = ["async", "polars-pipe?/cloud", "polars-plan/cloud", "tokio", "futures", "polars-mem-engine/cloud"] +cloud = [ + "async", + "polars-pipe?/cloud", + "polars-plan/cloud", + "tokio", + "futures", + "polars-mem-engine/cloud", + "polars-stream?/cloud", +] cloud_write = ["cloud"] -ipc = ["polars-io/ipc", "polars-plan/ipc", "polars-pipe?/ipc", "polars-mem-engine/ipc"] -json = ["polars-io/json", "polars-plan/json", "polars-json", "polars-pipe?/json", "polars-mem-engine/json"] -csv = ["polars-io/csv", "polars-plan/csv", "polars-pipe?/csv", "polars-mem-engine/csv"] +ipc = ["polars-io/ipc", "polars-plan/ipc", "polars-pipe?/ipc", "polars-mem-engine/ipc", "polars-stream?/ipc"] +json = [ + "polars-io/json", + "polars-plan/json", + "polars-json", + "polars-pipe?/json", + "polars-mem-engine/json", + "polars-stream?/json", +] +csv = ["polars-io/csv", "polars-plan/csv", "polars-pipe?/csv", "polars-mem-engine/csv", "polars-stream?/csv"] temporal = [ "dtype-datetime", "dtype-date", @@ -223,7 +239,7 @@ string_reverse = ["polars-plan/string_reverse"] string_to_integer = ["polars-plan/string_to_integer"] arg_where = ["polars-plan/arg_where"] search_sorted = ["polars-plan/search_sorted"] -merge_sorted = ["polars-plan/merge_sorted"] +merge_sorted = ["polars-plan/merge_sorted", "polars-stream?/merge_sorted"] meta = ["polars-plan/meta"] pivot = ["polars-core/rows", "polars-ops/pivot", "polars-plan/pivot"] top_k = ["polars-plan/top_k"] diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index 16af7a3071df..1f1624fa7b0f 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -22,7 +22,7 @@ polars-time = { workspace = true } polars-utils = { workspace = true } # TODO! remove this once truly activated. This is required to make sdist building work -polars-stream = { workspace = true } +# polars-stream = { workspace = true } ahash = { workspace = true } arboard = { workspace = true, optional = true } diff --git a/crates/polars-python/src/functions/lazy.rs b/crates/polars-python/src/functions/lazy.rs index 24db48144508..d3ebb376d10f 100644 --- a/crates/polars-python/src/functions/lazy.rs +++ b/crates/polars-python/src/functions/lazy.rs @@ -469,6 +469,7 @@ pub fn lit(value: &Bound<'_, PyAny>, allow_object: bool, is_scalar: bool) -> PyR ) })?; match av { + #[cfg(feature = "object")] AnyValue::ObjectOwned(_) => { let s = Python::with_gil(|py| { PySeries::new_object(py, "", vec![ObjectValue::from(value.into_py(py))], false) diff --git a/crates/polars-python/src/functions/misc.rs b/crates/polars-python/src/functions/misc.rs index 2ade770d728e..1df25a222b16 100644 --- a/crates/polars-python/src/functions/misc.rs +++ b/crates/polars-python/src/functions/misc.rs @@ -66,5 +66,6 @@ pub fn register_plugin_function( #[pyfunction] pub fn __register_startup_deps() { + #[cfg(feature = "object")] crate::on_startup::register_startup_deps() } diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs index 13529cfd9d1f..fd89884ece82 100644 --- a/crates/polars-python/src/lazyframe/general.rs +++ b/crates/polars-python/src/lazyframe/general.rs @@ -1130,6 +1130,7 @@ impl PyLazyFrame { ldf.tail(n).into() } + #[cfg(feature = "pivot")] #[pyo3(signature = (on, index, value_name, variable_name))] fn unpivot( &self, diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index 06a98e3fe970..e3425b52ccd9 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -973,6 +973,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { StringFunction::ExtractMany { .. } => { return Err(PyNotImplementedError::new_err("extract_many")) }, + #[cfg(feature = "regex")] StringFunction::EscapeRegex => { (PyStringFunction::EscapeRegex.into_py(py),).to_object(py) }, @@ -1221,7 +1222,6 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { FunctionExpr::Mode => ("mode",).to_object(py), FunctionExpr::Skew(bias) => ("skew", bias).to_object(py), FunctionExpr::Kurtosis(fisher, bias) => ("kurtosis", fisher, bias).to_object(py), - #[cfg(feature = "dtype-array")] FunctionExpr::Reshape(_) => return Err(PyNotImplementedError::new_err("reshape")), #[cfg(feature = "repeat_by")] FunctionExpr::RepeatBy => ("repeat_by",).to_object(py), diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 28c5e459b1e5..05a56d920719 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -584,6 +584,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { columns.iter().map(|s| s.to_string()).collect::>(), ) .to_object(py), + #[cfg(feature = "pivot")] FunctionIR::Unpivot { args, schema: _ } => ( "unpivot", args.index.iter().map(|s| s.as_str()).collect::>(), diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml index fc130a035140..c40f477ff741 100644 --- a/crates/polars-stream/Cargo.toml +++ b/crates/polars-stream/Cargo.toml @@ -16,7 +16,7 @@ futures = { workspace = true } memmap = { workspace = true } parking_lot = { workspace = true } pin-project-lite = { workspace = true } -polars-io = { workspace = true, features = ["async", "cloud", "aws"] } +polars-io = { workspace = true } polars-utils = { workspace = true } rand = { workspace = true } rayon = { workspace = true } @@ -26,11 +26,10 @@ tokio = { workspace = true } polars-core = { workspace = true } polars-error = { workspace = true } -polars-expr = { workspace = true, features = ["dtype-full"] } -# TODO: feature gate -polars-mem-engine = { workspace = true, features = ["parquet", "csv", "json", "ipc", "cloud", "python", "dtype-categorical", "dtype-i8", "dtype-i16", "dtype-u8", "dtype-u16", "dtype-decimal", "dtype-struct", "object"] } +polars-expr = { workspace = true } +polars-mem-engine = { workspace = true } polars-parquet = { workspace = true } -polars-plan = { workspace = true, features = ["parquet", "csv", "json", "ipc", "cloud", "python", "serde", "dtype-categorical", "dtype-i8", "dtype-i16", "dtype-u8", "dtype-u16", "dtype-decimal", "dtype-struct", "object"] } +polars-plan = { workspace = true } [build-dependencies] version_check = { workspace = true } @@ -41,6 +40,11 @@ bitwise = ["polars-core/bitwise", "polars-plan/bitwise", "polars-expr/bitwise"] merge_sorted = ["polars-plan/merge_sorted"] dynamic_group_by = [] strings = [] +ipc = ["polars-mem-engine/ipc", "polars-plan/ipc"] +parquet = ["polars-mem-engine/parquet", "polars-plan/parquet"] +csv = ["polars-mem-engine/csv", "polars-plan/csv"] +json = ["polars-mem-engine/json", "polars-plan/json"] +cloud = ["polars-mem-engine/cloud", "polars-plan/cloud", "polars-io/cloud"] # We need to specify default features here to match workspace defaults. # Otherwise we get warnings with cargo check/clippy. diff --git a/crates/polars-stream/src/nodes/io_sinks/mod.rs b/crates/polars-stream/src/nodes/io_sinks/mod.rs index ce14ad3b0f7a..cc1682199a2a 100644 --- a/crates/polars-stream/src/nodes/io_sinks/mod.rs +++ b/crates/polars-stream/src/nodes/io_sinks/mod.rs @@ -1 +1,2 @@ +#[cfg(feature = "ipc")] pub mod ipc; diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs index 559e4717c4e9..4fb42daddd6b 100644 --- a/crates/polars-stream/src/nodes/mod.rs +++ b/crates/polars-stream/src/nodes/mod.rs @@ -8,6 +8,7 @@ pub mod io_sinks; pub mod map; pub mod multiplexer; pub mod ordered_union; +#[cfg(feature = "parquet")] pub mod parquet_source; pub mod reduce; pub mod select; diff --git a/crates/polars-stream/src/physical_plan/fmt.rs b/crates/polars-stream/src/physical_plan/fmt.rs index ed0f08a0d48f..e0735144da79 100644 --- a/crates/polars-stream/src/physical_plan/fmt.rs +++ b/crates/polars-stream/src/physical_plan/fmt.rs @@ -99,9 +99,13 @@ fn visualize_plan_rec( PhysNodeKind::FileSink { input, file_type, .. } => match file_type { + #[cfg(feature = "parquet")] FileType::Parquet(_) => ("parquet-sink".to_string(), from_ref(input)), + #[cfg(feature = "ipc")] FileType::Ipc(_) => ("ipc-sink".to_string(), from_ref(input)), + #[cfg(feature = "csv")] FileType::Csv(_) => ("csv-sink".to_string(), from_ref(input)), + #[cfg(feature = "json")] FileType::Json(_) => ("json-sink".to_string(), from_ref(input)), }, PhysNodeKind::InMemoryMap { input, map: _ } => { @@ -140,9 +144,13 @@ fn visualize_plan_rec( file_options, } => { let name = match scan_type { + #[cfg(feature = "parquet")] FileScan::Parquet { .. } => "parquet-source", + #[cfg(feature = "csv")] FileScan::Csv { .. } => "csv-source", + #[cfg(feature = "ipc")] FileScan::Ipc { .. } => "ipc-source", + #[cfg(feature = "json")] FileScan::NDJson { .. } => "ndjson-source", FileScan::Anonymous { .. } => "anonymous-source", }; diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs index 485bbf03a7fe..d57a8667c479 100644 --- a/crates/polars-stream/src/physical_plan/lower_ir.rs +++ b/crates/polars-stream/src/physical_plan/lower_ir.rs @@ -212,6 +212,7 @@ pub fn lower_ir( let file_type = file_type.clone(); match file_type { + #[cfg(feature = "ipc")] FileType::Ipc(_) => { let phys_input = lower_ir!(*input)?; PhysNodeKind::FileSink { @@ -223,6 +224,7 @@ pub fn lower_ir( _ => todo!(), } }, + #[cfg(feature = "cloud")] SinkType::Cloud { .. } => todo!(), }, diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs index d9253e48dfa5..472cf982a253 100644 --- a/crates/polars-stream/src/physical_plan/to_graph.rs +++ b/crates/polars-stream/src/physical_plan/to_graph.rs @@ -213,6 +213,7 @@ fn to_graph_rec<'a>( let input_key = to_graph_rec(*input, ctx)?; match file_type { + #[cfg(feature = "ipc")] FileType::Ipc(ipc_writer_options) => ctx.graph.add_node( nodes::io_sinks::ipc::IpcSinkNode::new(input_schema, path, ipc_writer_options)?, [input_key], @@ -341,6 +342,7 @@ fn to_graph_rec<'a>( use polars_plan::prelude::FileScan; match scan_type { + #[cfg(feature = "parquet")] FileScan::Parquet { options, cloud_options, diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index 685ed71d8306..490be628ff7f 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -311,6 +311,7 @@ dtype-array = [ "polars-core/dtype-array", "polars-lazy?/dtype-array", "polars-ops/dtype-array", + "polars-plan?/dtype-array", ] dtype-i8 = [ "polars-core/dtype-i8",