From 6ed624f5a7f0682311792f5f7d800509c49665d4 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 12 Jun 2024 15:43:24 +0200 Subject: [PATCH] refactor: Refactor parts of IR. (#16899) --- crates/polars-io/src/csv/read/options.rs | 4 ++-- crates/polars-io/src/csv/read/read_impl.rs | 2 +- crates/polars-io/src/ipc/ipc_reader_async.rs | 6 +++--- .../src/physical_plan/executors/python_scan.rs | 3 +-- .../src/physical_plan/executors/scan/ipc.rs | 9 ++------- .../src/physical_plan/executors/scan/mod.rs | 8 +++----- .../src/physical_plan/executors/scan/parquet.rs | 6 +----- crates/polars-lazy/src/physical_plan/planner/lp.rs | 2 +- .../physical_plan/streaming/construct_pipeline.rs | 2 +- crates/polars-lazy/src/tests/optimization_checks.rs | 6 +++--- crates/polars-pipe/src/executors/sources/parquet.rs | 13 +++---------- crates/polars-pipe/src/pipeline/convert.rs | 4 ++-- crates/polars-plan/src/dsl/meta.rs | 2 +- .../polars-plan/src/logical_plan/anonymous_scan.rs | 2 +- crates/polars-plan/src/logical_plan/builder_dsl.rs | 2 +- .../src/logical_plan/conversion/dsl_to_ir.rs | 6 +++--- .../polars-plan/src/logical_plan/conversion/mod.rs | 4 ++-- .../polars-plan/src/logical_plan/{alp => ir}/dot.rs | 10 +++++----- .../src/logical_plan/{alp => ir}/format.rs | 2 +- .../src/logical_plan/{alp => ir}/inputs.rs | 8 +++++--- .../polars-plan/src/logical_plan/{alp => ir}/mod.rs | 4 ++-- .../src/logical_plan/{alp => ir}/schema.rs | 0 .../src/logical_plan/{alp => ir}/tree_format.rs | 6 +++--- crates/polars-plan/src/logical_plan/mod.rs | 12 ++++++------ .../logical_plan/optimizer/cluster_with_columns.rs | 2 +- .../src/logical_plan/optimizer/count_star.rs | 2 +- .../optimizer/predicate_pushdown/mod.rs | 6 +++--- .../optimizer/projection_pushdown/mod.rs | 8 ++++---- .../src/logical_plan/optimizer/stack_opt.rs | 2 +- crates/polars-plan/src/logical_plan/options.rs | 4 ++-- crates/polars-plan/src/logical_plan/visitor/hash.rs | 6 +++--- crates/polars/tests/it/io/csv.rs | 4 +--- py-polars/polars/lazyframe/frame.py | 2 +- py-polars/src/batched_csv.rs | 2 +- py-polars/src/dataframe/io.rs | 2 +- py-polars/src/lazyframe/visitor/nodes.rs | 2 +- 36 files changed, 73 insertions(+), 92 deletions(-) rename crates/polars-plan/src/logical_plan/{alp => ir}/dot.rs (98%) rename crates/polars-plan/src/logical_plan/{alp => ir}/format.rs (99%) rename crates/polars-plan/src/logical_plan/{alp => ir}/inputs.rs (98%) rename crates/polars-plan/src/logical_plan/{alp => ir}/mod.rs (98%) rename crates/polars-plan/src/logical_plan/{alp => ir}/schema.rs (100%) rename crates/polars-plan/src/logical_plan/{alp => ir}/tree_format.rs (99%) diff --git a/crates/polars-io/src/csv/read/options.rs b/crates/polars-io/src/csv/read/options.rs index 3338eec7cd49..97db6df3abf8 100644 --- a/crates/polars-io/src/csv/read/options.rs +++ b/crates/polars-io/src/csv/read/options.rs @@ -21,7 +21,7 @@ pub struct CsvReadOptions { pub n_rows: Option, pub row_index: Option, // Column-wise options - pub columns: Option>>, + pub columns: Option>, pub projection: Option>>, pub schema: Option, pub schema_overwrite: Option, @@ -145,7 +145,7 @@ impl CsvReadOptions { } /// Which columns to select. - pub fn with_columns(mut self, columns: Option>>) -> Self { + pub fn with_columns(mut self, columns: Option>) -> Self { self.columns = columns; self } diff --git a/crates/polars-io/src/csv/read/read_impl.rs b/crates/polars-io/src/csv/read/read_impl.rs index 3fa509189a60..d0248941b8c8 100644 --- a/crates/polars-io/src/csv/read/read_impl.rs +++ b/crates/polars-io/src/csv/read/read_impl.rs @@ -144,7 +144,7 @@ impl<'a> CoreReader<'a> { has_header: bool, ignore_errors: bool, schema: Option, - columns: Option>>, + columns: Option>, encoding: CsvEncoding, mut n_threads: Option, schema_overwrite: Option, diff --git a/crates/polars-io/src/ipc/ipc_reader_async.rs b/crates/polars-io/src/ipc/ipc_reader_async.rs index 896f9223b409..f18046f550d5 100644 --- a/crates/polars-io/src/ipc/ipc_reader_async.rs +++ b/crates/polars-io/src/ipc/ipc_reader_async.rs @@ -25,7 +25,7 @@ pub struct IpcReaderAsync { #[derive(Default, Clone)] pub struct IpcReadOptions { // Names of the columns to include in the output. - projection: Option>, + projection: Option>, // The maximum number of rows to include in the output. row_limit: Option, @@ -38,8 +38,8 @@ pub struct IpcReadOptions { } impl IpcReadOptions { - pub fn with_projection(mut self, indices: impl Into>>) -> Self { - self.projection = indices.into(); + pub fn with_projection(mut self, projection: Option>) -> Self { + self.projection = projection; self } diff --git a/crates/polars-lazy/src/physical_plan/executors/python_scan.rs b/crates/polars-lazy/src/physical_plan/executors/python_scan.rs index d48f7b14fb96..b8dd3e469fc7 100644 --- a/crates/polars-lazy/src/physical_plan/executors/python_scan.rs +++ b/crates/polars-lazy/src/physical_plan/executors/python_scan.rs @@ -26,8 +26,7 @@ impl Executor for PythonScanExec { let python_scan_function = self.options.scan_fn.take().unwrap().0; - let with_columns = - with_columns.map(|mut cols| std::mem::take(Arc::make_mut(&mut cols))); + let with_columns = with_columns.map(|cols| cols.iter().cloned().collect::>()); let out = callable .call1(( diff --git a/crates/polars-lazy/src/physical_plan/executors/scan/ipc.rs b/crates/polars-lazy/src/physical_plan/executors/scan/ipc.rs index 5b8d20e511e0..eaafe10b8c9b 100644 --- a/crates/polars-lazy/src/physical_plan/executors/scan/ipc.rs +++ b/crates/polars-lazy/src/physical_plan/executors/scan/ipc.rs @@ -62,10 +62,7 @@ impl IpcExec { } let projection = materialize_projection( - self.file_options - .with_columns - .as_deref() - .map(|cols| cols.deref()), + self.file_options.with_columns.as_deref(), &self.schema, None, self.file_options.row_index.is_some(), @@ -193,9 +190,7 @@ impl IpcExec { }), ) .with_row_index(this.file_options.row_index.clone()) - .with_projection( - this.file_options.with_columns.as_deref().cloned(), - ), + .with_projection(this.file_options.with_columns.as_ref().cloned()), verbose, ) .await?; diff --git a/crates/polars-lazy/src/physical_plan/executors/scan/mod.rs b/crates/polars-lazy/src/physical_plan/executors/scan/mod.rs index bc57098c0001..c03ad56615e6 100644 --- a/crates/polars-lazy/src/physical_plan/executors/scan/mod.rs +++ b/crates/polars-lazy/src/physical_plan/executors/scan/mod.rs @@ -10,8 +10,6 @@ mod parquet; #[cfg(feature = "ipc")] mod support; use std::mem; -#[cfg(any(feature = "parquet", feature = "ipc", feature = "cse"))] -use std::ops::Deref; #[cfg(feature = "csv")] pub(crate) use csv::CsvExec; @@ -38,7 +36,7 @@ type Predicate = Option>; #[cfg(any(feature = "ipc", feature = "parquet"))] fn prepare_scan_args( predicate: Option>, - with_columns: &mut Option>>, + with_columns: &mut Option>, schema: &mut SchemaRef, has_row_index: bool, hive_partitions: Option<&[Series]>, @@ -47,7 +45,7 @@ fn prepare_scan_args( let schema = mem::take(schema); let projection = materialize_projection( - with_columns.as_deref().map(|cols| cols.deref()), + with_columns.as_deref(), &schema, hive_partitions, has_row_index, @@ -62,7 +60,7 @@ fn prepare_scan_args( pub struct DataFrameExec { pub(crate) df: Arc, pub(crate) selection: Option>, - pub(crate) projection: Option>>, + pub(crate) projection: Option>, pub(crate) predicate_has_windows: bool, } diff --git a/crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs b/crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs index 695cc58c98c0..53d1a8ce285c 100644 --- a/crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs +++ b/crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs @@ -173,11 +173,7 @@ impl ParquetExec { .unwrap_left(); let first_metadata = &self.metadata; let cloud_options = self.cloud_options.as_ref(); - let with_columns = self - .file_options - .with_columns - .as_ref() - .map(|v| v.as_slice()); + let with_columns = self.file_options.with_columns.as_ref().map(|v| v.as_ref()); let mut result = vec![]; let batch_size = get_file_prefetch_size(); diff --git a/crates/polars-lazy/src/physical_plan/planner/lp.rs b/crates/polars-lazy/src/physical_plan/planner/lp.rs index 754b0cf4d2e6..e6e29d35a2af 100644 --- a/crates/polars-lazy/src/physical_plan/planner/lp.rs +++ b/crates/polars-lazy/src/physical_plan/planner/lp.rs @@ -351,7 +351,7 @@ fn create_physical_plan_impl( DataFrameScan { df, projection, - selection: predicate, + filter: predicate, schema, .. } => { diff --git a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs index 9b63808129ab..a6e92a6c9b6d 100644 --- a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs +++ b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs @@ -242,7 +242,7 @@ fn get_pipeline_node( schema: Arc::new(Schema::new()), output_schema: None, projection: None, - selection: None, + filter: None, }); IR::MapFunction { diff --git a/crates/polars-lazy/src/tests/optimization_checks.rs b/crates/polars-lazy/src/tests/optimization_checks.rs index 6ab02775cc00..5d96890fb66e 100644 --- a/crates/polars-lazy/src/tests/optimization_checks.rs +++ b/crates/polars-lazy/src/tests/optimization_checks.rs @@ -31,7 +31,7 @@ pub(crate) fn predicate_at_scan(q: LazyFrame) -> bool { matches!( lp, DataFrameScan { - selection: Some(_), + filter: Some(_), .. } | Scan { predicate: Some(_), @@ -50,7 +50,7 @@ pub(crate) fn predicate_at_all_scans(q: LazyFrame) -> bool { matches!( lp, DataFrameScan { - selection: Some(_), + filter: Some(_), .. } | Scan { predicate: Some(_), @@ -482,7 +482,7 @@ fn test_with_column_prune() -> PolarsResult<()> { match lp { DataFrameScan { projection, .. } => { let projection = projection.as_ref().unwrap(); - let projection = projection.as_slice(); + let projection = projection.as_ref(); assert_eq!(projection.len(), 1); let name = &projection[0]; assert_eq!(name, "c1"); diff --git a/crates/polars-pipe/src/executors/sources/parquet.rs b/crates/polars-pipe/src/executors/sources/parquet.rs index 79e7380c6291..0f35a5b702f6 100644 --- a/crates/polars-pipe/src/executors/sources/parquet.rs +++ b/crates/polars-pipe/src/executors/sources/parquet.rs @@ -1,5 +1,5 @@ use std::collections::VecDeque; -use std::ops::{Deref, Range}; +use std::ops::Range; use std::path::PathBuf; use std::sync::Arc; @@ -86,10 +86,7 @@ impl ParquetSource { .map(|hive| hive.materialize_partition_columns()); let projection = materialize_projection( - file_options - .with_columns - .as_deref() - .map(|cols| cols.deref()), + file_options.with_columns.as_deref(), &schema, hive_partitions.as_deref(), false, @@ -144,11 +141,7 @@ impl ParquetSource { fn finish_init_reader(&mut self, batched_reader: BatchedParquetReader) -> PolarsResult<()> { if self.processed_paths >= 1 { - let with_columns = self - .file_options - .with_columns - .as_ref() - .map(|v| v.as_slice()); + let with_columns = self.file_options.with_columns.as_ref().map(|v| v.as_ref()); check_projected_arrow_schema( batched_reader.schema().as_ref(), self.file_info diff --git a/crates/polars-pipe/src/pipeline/convert.rs b/crates/polars-pipe/src/pipeline/convert.rs index e7dcba877d7c..bcd47c59d122 100644 --- a/crates/polars-pipe/src/pipeline/convert.rs +++ b/crates/polars-pipe/src/pipeline/convert.rs @@ -54,7 +54,7 @@ where DataFrameScan { df, projection, - selection, + filter: selection, output_schema, .. } => { @@ -68,7 +68,7 @@ where } // projection is free if let Some(projection) = projection { - df = df.select(projection.as_slice())?; + df = df.select(projection.as_ref())?; } } Ok(Box::new(sources::DataFrameSource::from_df(df)) as Box) diff --git a/crates/polars-plan/src/dsl/meta.rs b/crates/polars-plan/src/dsl/meta.rs index e4efaf9f0f60..9b48d4c76f1b 100644 --- a/crates/polars-plan/src/dsl/meta.rs +++ b/crates/polars-plan/src/dsl/meta.rs @@ -2,8 +2,8 @@ use std::fmt::Display; use std::ops::BitAnd; use super::*; -use crate::logical_plan::alp::tree_format::TreeFmtVisitor; use crate::logical_plan::conversion::is_regex_projection; +use crate::logical_plan::ir::tree_format::TreeFmtVisitor; use crate::logical_plan::visitor::{AexprNode, TreeWalker}; /// Specialized expressions for Categorical dtypes. diff --git a/crates/polars-plan/src/logical_plan/anonymous_scan.rs b/crates/polars-plan/src/logical_plan/anonymous_scan.rs index 7943a1c8ebc4..203a17e21403 100644 --- a/crates/polars-plan/src/logical_plan/anonymous_scan.rs +++ b/crates/polars-plan/src/logical_plan/anonymous_scan.rs @@ -8,7 +8,7 @@ use crate::dsl::Expr; pub struct AnonymousScanArgs { pub n_rows: Option, - pub with_columns: Option>>, + pub with_columns: Option>, pub schema: SchemaRef, pub output_schema: Option, pub predicate: Option, diff --git a/crates/polars-plan/src/logical_plan/builder_dsl.rs b/crates/polars-plan/src/logical_plan/builder_dsl.rs index 5dd140f5f835..bb3ef85c666b 100644 --- a/crates/polars-plan/src/logical_plan/builder_dsl.rs +++ b/crates/polars-plan/src/logical_plan/builder_dsl.rs @@ -313,7 +313,7 @@ impl DslBuilder { schema, output_schema: None, projection: None, - selection: None, + filter: None, } .into() } diff --git a/crates/polars-plan/src/logical_plan/conversion/dsl_to_ir.rs b/crates/polars-plan/src/logical_plan/conversion/dsl_to_ir.rs index f5f7bb959e91..651a09369093 100644 --- a/crates/polars-plan/src/logical_plan/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/logical_plan/conversion/dsl_to_ir.rs @@ -20,7 +20,7 @@ fn empty_df() -> IR { schema: Arc::new(Default::default()), output_schema: None, projection: None, - selection: None, + filter: None, } } @@ -204,13 +204,13 @@ pub fn to_alp_impl( schema, output_schema, projection, - selection, + filter: selection, } => IR::DataFrameScan { df, schema, output_schema, projection, - selection: selection.map(|expr| to_expr_ir(expr, expr_arena)), + filter: selection.map(|expr| to_expr_ir(expr, expr_arena)), }, DslPlan::Select { expr, diff --git a/crates/polars-plan/src/logical_plan/conversion/mod.rs b/crates/polars-plan/src/logical_plan/conversion/mod.rs index 9eb7d4ec5c19..e9b5664ae019 100644 --- a/crates/polars-plan/src/logical_plan/conversion/mod.rs +++ b/crates/polars-plan/src/logical_plan/conversion/mod.rs @@ -100,13 +100,13 @@ impl IR { schema, output_schema, projection, - selection, + filter: selection, } => DslPlan::DataFrameScan { df, schema, output_schema, projection, - selection: selection.map(|e| e.to_expr(expr_arena)), + filter: selection.map(|e| e.to_expr(expr_arena)), }, IR::Select { expr, diff --git a/crates/polars-plan/src/logical_plan/alp/dot.rs b/crates/polars-plan/src/logical_plan/ir/dot.rs similarity index 98% rename from crates/polars-plan/src/logical_plan/alp/dot.rs rename to crates/polars-plan/src/logical_plan/ir/dot.rs index c50df1c0b982..4c3d7202cb42 100644 --- a/crates/polars-plan/src/logical_plan/alp/dot.rs +++ b/crates/polars-plan/src/logical_plan/ir/dot.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use super::format::ExprIRSliceDisplay; use crate::constants::UNLIMITED_CACHE; -use crate::prelude::alp::format::ColumnsDisplay; +use crate::prelude::ir::format::ColumnsDisplay; use crate::prelude::*; pub struct IRDotDisplay<'a> { @@ -153,7 +153,7 @@ impl<'a> IRDotDisplay<'a> { #[cfg(feature = "python")] PythonScan { predicate, options } => { let predicate = predicate.as_ref().map(|e| self.display_expr(e)); - let with_columns = NumColumns(options.with_columns.as_ref().map(|s| s.as_slice())); + let with_columns = NumColumns(options.with_columns.as_ref().map(|s| s.as_ref())); let total_columns = options.schema.len(); let predicate = OptionExprIRDisplay(predicate); @@ -228,10 +228,10 @@ impl<'a> IRDotDisplay<'a> { DataFrameScan { schema, projection, - selection, + filter: selection, .. } => { - let num_columns = NumColumns(projection.as_ref().map(|p| p.as_ref().as_ref())); + let num_columns = NumColumns(projection.as_ref().map(|p| p.as_ref())); let selection = selection.as_ref().map(|e| self.display_expr(e)); let selection = OptionExprIRDisplay(selection); let total_columns = schema.len(); @@ -250,7 +250,7 @@ impl<'a> IRDotDisplay<'a> { } => { let name: &str = scan_type.into(); let path = PathsDisplay(paths.as_ref()); - let with_columns = options.with_columns.as_ref().map(|cols| cols.as_slice()); + let with_columns = options.with_columns.as_ref().map(|cols| cols.as_ref()); let with_columns = NumColumns(with_columns); let total_columns = file_info.schema.len(); let predicate = predicate.as_ref().map(|e| self.display_expr(e)); diff --git a/crates/polars-plan/src/logical_plan/alp/format.rs b/crates/polars-plan/src/logical_plan/ir/format.rs similarity index 99% rename from crates/polars-plan/src/logical_plan/alp/format.rs rename to crates/polars-plan/src/logical_plan/ir/format.rs index e78daa1cd3dc..9f215798e8f8 100644 --- a/crates/polars-plan/src/logical_plan/alp/format.rs +++ b/crates/polars-plan/src/logical_plan/ir/format.rs @@ -250,7 +250,7 @@ impl<'a> IRDisplay<'a> { DataFrameScan { schema, projection, - selection, + filter: selection, .. } => { let total_columns = schema.len(); diff --git a/crates/polars-plan/src/logical_plan/alp/inputs.rs b/crates/polars-plan/src/logical_plan/ir/inputs.rs similarity index 98% rename from crates/polars-plan/src/logical_plan/alp/inputs.rs rename to crates/polars-plan/src/logical_plan/ir/inputs.rs index 653bee608962..e3b829bd6505 100644 --- a/crates/polars-plan/src/logical_plan/alp/inputs.rs +++ b/crates/polars-plan/src/logical_plan/ir/inputs.rs @@ -127,7 +127,7 @@ impl IR { schema, output_schema, projection, - selection, + filter: selection, } => { let mut new_selection = None; if selection.is_some() { @@ -139,7 +139,7 @@ impl IR { schema: schema.clone(), output_schema: output_schema.clone(), projection: projection.clone(), - selection: new_selection, + filter: new_selection, } }, MapFunction { function, .. } => MapFunction { @@ -188,7 +188,9 @@ impl IR { container.push(pred.clone()) } }, - DataFrameScan { selection, .. } => { + DataFrameScan { + filter: selection, .. + } => { if let Some(expr) = selection { container.push(expr.clone()) } diff --git a/crates/polars-plan/src/logical_plan/alp/mod.rs b/crates/polars-plan/src/logical_plan/ir/mod.rs similarity index 98% rename from crates/polars-plan/src/logical_plan/alp/mod.rs rename to crates/polars-plan/src/logical_plan/ir/mod.rs index d05ad1199a66..a288c8c7c0ff 100644 --- a/crates/polars-plan/src/logical_plan/alp/mod.rs +++ b/crates/polars-plan/src/logical_plan/ir/mod.rs @@ -62,8 +62,8 @@ pub enum IR { schema: SchemaRef, // schema of the projected file output_schema: Option, - projection: Option>>, - selection: Option, + projection: Option>, + filter: Option, }, // Only selects columns (semantically only has row access). // This is a more restricted operation than `Select`. diff --git a/crates/polars-plan/src/logical_plan/alp/schema.rs b/crates/polars-plan/src/logical_plan/ir/schema.rs similarity index 100% rename from crates/polars-plan/src/logical_plan/alp/schema.rs rename to crates/polars-plan/src/logical_plan/ir/schema.rs diff --git a/crates/polars-plan/src/logical_plan/alp/tree_format.rs b/crates/polars-plan/src/logical_plan/ir/tree_format.rs similarity index 99% rename from crates/polars-plan/src/logical_plan/alp/tree_format.rs rename to crates/polars-plan/src/logical_plan/ir/tree_format.rs index a260d09bf466..ca4bf5b8edd5 100644 --- a/crates/polars-plan/src/logical_plan/alp/tree_format.rs +++ b/crates/polars-plan/src/logical_plan/ir/tree_format.rs @@ -5,9 +5,9 @@ use polars_core::error::*; use regex::Regex; use crate::constants; -use crate::logical_plan::alp::IRPlanRef; +use crate::logical_plan::ir::IRPlanRef; use crate::logical_plan::visitor::{VisitRecursion, Visitor}; -use crate::prelude::alp::format::ColumnsDisplay; +use crate::prelude::ir::format::ColumnsDisplay; use crate::prelude::visitor::AexprNode; use crate::prelude::*; @@ -193,7 +193,7 @@ impl<'a> TreeFmtNode<'a> { DataFrameScan { schema, projection, - selection, + filter: selection, .. } => ND( wh( diff --git a/crates/polars-plan/src/logical_plan/mod.rs b/crates/polars-plan/src/logical_plan/mod.rs index 4c1e49904ce7..080d56ff9d11 100644 --- a/crates/polars-plan/src/logical_plan/mod.rs +++ b/crates/polars-plan/src/logical_plan/mod.rs @@ -9,8 +9,8 @@ use recursive::recursive; use crate::prelude::*; pub(crate) mod aexpr; -pub(crate) mod alp; pub(crate) mod anonymous_scan; +pub(crate) mod ir; mod apply; mod builder_dsl; @@ -33,7 +33,6 @@ mod schema; pub mod visitor; pub use aexpr::*; -pub use alp::*; pub use anonymous_scan::*; pub use apply::*; pub use builder_dsl::*; @@ -42,6 +41,7 @@ pub use conversion::*; pub(crate) use expr_ir::*; pub use file_scan::*; pub use functions::*; +pub use ir::*; pub use iterator::*; pub use lit::*; pub use optimizer::*; @@ -91,8 +91,8 @@ pub enum DslPlan { schema: SchemaRef, // schema of the projected file output_schema: Option, - projection: Option>>, - selection: Option, + projection: Option>, + filter: Option, }, /// Polars' `select` operation, this can mean projection, but also full data access. Select { @@ -188,7 +188,7 @@ impl Clone for DslPlan { Self::Filter { input, predicate } => Self::Filter { input: input.clone(), predicate: predicate.clone() }, Self::Cache { input, id, cache_hits } => Self::Cache { input: input.clone(), id: id.clone(), cache_hits: cache_hits.clone() }, Self::Scan { paths, file_info, predicate, file_options, scan_type } => Self::Scan { paths: paths.clone(), file_info: file_info.clone(), predicate: predicate.clone(), file_options: file_options.clone(), scan_type: scan_type.clone() }, - Self::DataFrameScan { df, schema, output_schema, projection, selection } => Self::DataFrameScan { df: df.clone(), schema: schema.clone(), output_schema: output_schema.clone(), projection: projection.clone(), selection: selection.clone() }, + Self::DataFrameScan { df, schema, output_schema, projection, filter: selection } => Self::DataFrameScan { df: df.clone(), schema: schema.clone(), output_schema: output_schema.clone(), projection: projection.clone(), filter: selection.clone() }, Self::Select { expr, input, options } => Self::Select { expr: expr.clone(), input: input.clone(), options: options.clone() }, Self::GroupBy { input, keys, aggs, apply, maintain_order, options } => Self::GroupBy { input: input.clone(), keys: keys.clone(), aggs: aggs.clone(), apply: apply.clone(), maintain_order: maintain_order.clone(), options: options.clone() }, Self::Join { input_left, input_right, left_on, right_on, options } => Self::Join { input_left: input_left.clone(), input_right: input_right.clone(), left_on: left_on.clone(), right_on: right_on.clone(), options: options.clone() }, @@ -215,7 +215,7 @@ impl Default for DslPlan { schema: Arc::new(schema), output_schema: None, projection: None, - selection: None, + filter: None, } } } diff --git a/crates/polars-plan/src/logical_plan/optimizer/cluster_with_columns.rs b/crates/polars-plan/src/logical_plan/optimizer/cluster_with_columns.rs index 82030b0b3828..2a7548468d2b 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/cluster_with_columns.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/cluster_with_columns.rs @@ -6,7 +6,7 @@ use polars_utils::aliases::{InitHashMaps, PlHashMap}; use polars_utils::arena::{Arena, Node}; use super::aexpr::AExpr; -use super::alp::IR; +use super::ir::IR; use super::{aexpr_to_leaf_names_iter, ColumnName}; type ColumnMap = PlHashMap; diff --git a/crates/polars-plan/src/logical_plan/optimizer/count_star.rs b/crates/polars-plan/src/logical_plan/optimizer/count_star.rs index 421f25bf5136..4d1c3c7073af 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/count_star.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/count_star.rs @@ -26,7 +26,7 @@ impl OptimizationRule for CountStar { schema: Arc::new(Default::default()), output_schema: None, projection: None, - selection: None, + filter: None, }; let placeholder_node = lp_arena.add(placeholder); diff --git a/crates/polars-plan/src/logical_plan/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/logical_plan/optimizer/predicate_pushdown/mod.rs index 37328471b59a..bcba865a9c0d 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/predicate_pushdown/mod.rs @@ -308,7 +308,7 @@ impl<'a> PredicatePushDown<'a> { schema, output_schema, projection, - selection, + filter: selection, } => { let selection = predicate_at_scan(acc_predicates, selection, expr_arena); let lp = DataFrameScan { @@ -316,7 +316,7 @@ impl<'a> PredicatePushDown<'a> { schema, output_schema, projection, - selection, + filter: selection, }; Ok(lp) }, @@ -391,7 +391,7 @@ impl<'a> PredicatePushDown<'a> { schema: schema.clone(), output_schema: None, projection: None, - selection: None, + filter: None, }); } else { paths = Arc::from(new_paths) diff --git a/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs b/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs index e6d06b564d26..0a44d989afd8 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs @@ -38,7 +38,7 @@ fn get_scan_columns( acc_projections: &mut Vec, expr_arena: &Arena, row_index: Option<&RowIndex>, -) -> Option>> { +) -> Option> { let mut with_columns = None; if !acc_projections.is_empty() { let mut columns = Vec::with_capacity(acc_projections.len()); @@ -55,7 +55,7 @@ fn get_scan_columns( columns.push((*name).to_owned()) } } - with_columns = Some(Arc::new(columns)); + with_columns = Some(Arc::from(columns)); } with_columns } @@ -354,7 +354,7 @@ impl ProjectionPushDown { df, schema, mut output_schema, - selection, + filter: selection, .. } => { let mut projection = None; @@ -372,7 +372,7 @@ impl ProjectionPushDown { schema, output_schema, projection, - selection, + filter: selection, }; Ok(lp) }, diff --git a/crates/polars-plan/src/logical_plan/optimizer/stack_opt.rs b/crates/polars-plan/src/logical_plan/optimizer/stack_opt.rs index 0155bab14134..d197dc263499 100644 --- a/crates/polars-plan/src/logical_plan/optimizer/stack_opt.rs +++ b/crates/polars-plan/src/logical_plan/optimizer/stack_opt.rs @@ -1,7 +1,7 @@ use polars_core::prelude::PolarsResult; use crate::logical_plan::aexpr::AExpr; -use crate::logical_plan::alp::IR; +use crate::logical_plan::ir::IR; use crate::prelude::{Arena, Node}; /// Optimizer that uses a stack and memory arenas in favor of recursion diff --git a/crates/polars-plan/src/logical_plan/options.rs b/crates/polars-plan/src/logical_plan/options.rs index 0d56a16bb8a1..cc1e936cc017 100644 --- a/crates/polars-plan/src/logical_plan/options.rs +++ b/crates/polars-plan/src/logical_plan/options.rs @@ -25,7 +25,7 @@ pub type FileCount = u32; /// Generic options for all file types. pub struct FileScanOptions { pub n_rows: Option, - pub with_columns: Option>>, + pub with_columns: Option>, pub cache: bool, pub row_index: Option, pub rechunk: bool, @@ -204,7 +204,7 @@ pub struct PythonOptions { pub scan_fn: Option, pub schema: SchemaRef, pub output_schema: Option, - pub with_columns: Option>>, + pub with_columns: Option>, pub pyarrow: bool, // a pyarrow predicate python expression // can be evaluated with python.eval diff --git a/crates/polars-plan/src/logical_plan/visitor/hash.rs b/crates/polars-plan/src/logical_plan/visitor/hash.rs index 086f45fdc27a..d040def75435 100644 --- a/crates/polars-plan/src/logical_plan/visitor/hash.rs +++ b/crates/polars-plan/src/logical_plan/visitor/hash.rs @@ -92,7 +92,7 @@ impl Hash for HashableEqLP<'_> { schema: _, output_schema: _, projection, - selection, + filter: selection, } => { (Arc::as_ptr(df) as usize).hash(state); projection.hash(state); @@ -281,14 +281,14 @@ impl HashableEqLP<'_> { schema: _, output_schema: _, projection: pl, - selection: sl, + filter: sl, }, IR::DataFrameScan { df: dfr, schema: _, output_schema: _, projection: pr, - selection: sr, + filter: sr, }, ) => { Arc::as_ptr(dfl) == Arc::as_ptr(dfr) diff --git a/crates/polars/tests/it/io/csv.rs b/crates/polars/tests/it/io/csv.rs index eef7314d797c..22d6d644a6fc 100644 --- a/crates/polars/tests/it/io/csv.rs +++ b/crates/polars/tests/it/io/csv.rs @@ -512,9 +512,7 @@ fn test_empty_bytes_to_dataframe() { let result = CsvReadOptions::default() .with_has_header(false) - .with_columns(Some(Arc::new( - schema.iter_names().map(|s| s.to_string()).collect(), - ))) + .with_columns(Some(schema.iter_names().map(|s| s.to_string()).collect())) .with_schema(Some(Arc::new(schema))) .into_reader_with_file_handle(file) .finish(); diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 9818301c0ee0..9801584e2284 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -612,7 +612,7 @@ def serialize(self, file: IOBase | str | Path | None = None) -> str | None: >>> lf = pl.LazyFrame({"a": [1, 2, 3]}).sum() >>> json = lf.serialize() >>> json - '{"MapFunction":{"input":{"DataFrameScan":{"df":{"columns":[{"name":"a","datatype":"Int64","bit_settings":"","values":[1,2,3]}]},"schema":{"inner":{"a":"Int64"}},"output_schema":null,"projection":null,"selection":null}},"function":{"Stats":"Sum"}}}' + '{"MapFunction":{"input":{"DataFrameScan":{"df":{"columns":[{"name":"a","datatype":"Int64","bit_settings":"","values":[1,2,3]}]},"schema":{"inner":{"a":"Int64"}},"output_schema":null,"projection":null,"filter":null}},"function":{"Stats":"Sum"}}}' The logical plan can later be deserialized back into a LazyFrame. diff --git a/py-polars/src/batched_csv.rs b/py-polars/src/batched_csv.rs index 215be75f6340..350fdf171b75 100644 --- a/py-polars/src/batched_csv.rs +++ b/py-polars/src/batched_csv.rs @@ -101,7 +101,7 @@ impl PyBatchedCsv { .with_projection(projection.map(Arc::new)) .with_rechunk(rechunk) .with_chunk_size(chunk_size) - .with_columns(columns.map(Arc::new)) + .with_columns(columns.map(Arc::from)) .with_n_threads(n_threads) .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new)) .with_low_memory(low_memory) diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs index 783274880bcb..6746fb2d37cf 100644 --- a/py-polars/src/dataframe/io.rs +++ b/py-polars/src/dataframe/io.rs @@ -102,7 +102,7 @@ impl PyDataFrame { .with_projection(projection.map(Arc::new)) .with_rechunk(rechunk) .with_chunk_size(chunk_size) - .with_columns(columns.map(Arc::new)) + .with_columns(columns.map(Arc::from)) .with_n_threads(n_threads) .with_schema_overwrite(overwrite_dtype.map(Arc::new)) .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new)) diff --git a/py-polars/src/lazyframe/visitor/nodes.rs b/py-polars/src/lazyframe/visitor/nodes.rs index 07e4c71947a8..cec4752d2a4b 100644 --- a/py-polars/src/lazyframe/visitor/nodes.rs +++ b/py-polars/src/lazyframe/visitor/nodes.rs @@ -322,7 +322,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { schema: _, output_schema: _, projection, - selection, + filter: selection, } => DataFrameScan { df: PyDataFrame::new((**df).clone()), projection: projection