From 2454e468641d4d98af211c2800c0afec2732385b Mon Sep 17 00:00:00 2001 From: rdettai Date: Wed, 13 Oct 2021 19:07:19 +0200 Subject: [PATCH] Reorganize table providers by table format (#1010) * [feat] stubs for provider re-organization * [feat] implement infer_schema to make test pass * [wip] trying to implement pruned_partition_list * [typo] * [fix] replace enum with trait for extensibility * [fix] add partition cols to infered schema * [feat] forked file format executors avro still missing * [doc] comments about why we are flattening * [test] migrated tests to file formats * [test] improve listing test * [feat] add avro to refactored format providers * [fix] remove try from new when unnecessary * [fix] remove try_ from ListingTable new * [refacto] renamed format module to file_format also removed statistics from the PartitionedFile abstraction * [fix] removed Ballista stubs * [fix] rename create_executor * [feat] added store * [fix] Clippy * [test] improve file_format tests with limit * [fix] limit file system read size * [fix] avoid fetching unnecessary stats after limit * [fix] improve readability * [doc] improve comments * [refacto] keep async reader stub * [doc] cleanup comments * [test] test file listing * [fix] add last_modified back * [refacto] simplify csv reader exec * [refacto] change SizedFile back to FileMeta * [doc] comment clarification * [fix] avoid keeping object store as field * [refacto] grouped params to avoid too_many_arguments * [fix] get_by_uri also returns path * [fix] ListingTable at store level instead of registry * [fix] builder take self and not ref to self * Replace file format providers (#2) * [fix] replace file format providers in datafusion * [lint] clippy * [fix] replace file format providers in ballista * [fix] await in python wrapper * [doc] clearer doc about why sql() is async * [doc] typos and clarity * [fix] missing await after rebase --- README.md | 6 +- .../src/bin/ballista-dataframe.rs | 3 +- ballista-examples/src/bin/ballista-sql.rs | 11 +- ballista/rust/client/README.md | 4 +- ballista/rust/client/src/context.rs | 60 +- ballista/rust/core/Cargo.toml | 1 + ballista/rust/core/proto/ballista.proto | 94 ++- .../core/src/serde/logical_plan/from_proto.rs | 185 ++--- .../rust/core/src/serde/logical_plan/mod.rs | 62 +- .../core/src/serde/logical_plan/to_proto.rs | 139 ++-- ballista/rust/core/src/serde/mod.rs | 14 + .../src/serde/physical_plan/from_proto.rs | 153 ++-- .../core/src/serde/physical_plan/to_proto.rs | 138 ++-- ballista/rust/core/src/utils.rs | 3 +- ballista/rust/scheduler/src/lib.rs | 70 +- ballista/rust/scheduler/src/planner.rs | 30 +- ballista/rust/scheduler/src/test_utils.rs | 6 +- benchmarks/src/bin/nyctaxi.rs | 6 +- benchmarks/src/bin/tpch.rs | 119 +-- datafusion-cli/src/main.rs | 4 +- datafusion-examples/examples/avro_sql.rs | 12 +- datafusion-examples/examples/csv_sql.rs | 11 +- datafusion-examples/examples/dataframe.rs | 3 +- datafusion-examples/examples/flight_server.rs | 18 +- datafusion-examples/examples/parquet_sql.rs | 11 +- datafusion/benches/aggregate_query_sql.rs | 2 +- datafusion/benches/filter_query_sql.rs | 2 +- datafusion/benches/math_query_sql.rs | 2 +- datafusion/benches/sort_limit_query_sql.rs | 19 +- datafusion/benches/window_query_sql.rs | 2 +- datafusion/src/avro_to_arrow/mod.rs | 6 +- datafusion/src/dataframe.rs | 78 +- datafusion/src/datasource/avro.rs | 426 ----------- datafusion/src/datasource/csv.rs | 245 ------- datafusion/src/datasource/file_format/avro.rs | 403 +++++++++++ datafusion/src/datasource/file_format/csv.rs | 290 ++++++++ datafusion/src/datasource/file_format/json.rs | 241 +++++++ datafusion/src/datasource/file_format/mod.rs | 85 +++ .../src/datasource/file_format/parquet.rs | 623 ++++++++++++++++ datafusion/src/datasource/json.rs | 184 ----- datafusion/src/datasource/listing.rs | 487 +++++++++++++ datafusion/src/datasource/mod.rs | 235 ++---- .../src/datasource/object_store/local.rs | 64 +- datafusion/src/datasource/object_store/mod.rs | 142 +++- datafusion/src/datasource/parquet.rs | 677 ------------------ datafusion/src/execution/context.rs | 327 ++++++--- datafusion/src/execution/dataframe_impl.rs | 69 +- datafusion/src/execution/mod.rs | 1 + datafusion/src/execution/options.rs | 173 +++++ datafusion/src/lib.rs | 6 +- datafusion/src/logical_plan/builder.rs | 146 +++- datafusion/src/logical_plan/expr.rs | 23 + .../src/physical_optimizer/repartition.rs | 26 +- datafusion/src/physical_plan/avro.rs | 457 ------------ .../src/physical_plan/coalesce_partitions.rs | 18 +- datafusion/src/physical_plan/csv.rs | 534 -------------- .../src/physical_plan/expressions/binary.rs | 2 +- .../src/physical_plan/file_format/avro.rs | 316 ++++++++ .../src/physical_plan/file_format/csv.rs | 330 +++++++++ .../src/physical_plan/file_format/json.rs | 338 +++++++++ .../src/physical_plan/file_format/mod.rs | 28 + .../{ => file_format}/parquet.rs | 225 +++--- datafusion/src/physical_plan/filter.rs | 22 +- datafusion/src/physical_plan/json.rs | 507 ------------- datafusion/src/physical_plan/limit.rs | 17 +- datafusion/src/physical_plan/mod.rs | 10 +- datafusion/src/physical_plan/planner.rs | 105 ++- datafusion/src/physical_plan/projection.rs | 22 +- datafusion/src/physical_plan/sort.rs | 25 +- .../physical_plan/sort_preserving_merge.rs | 52 +- datafusion/src/physical_plan/source.rs | 90 --- datafusion/src/physical_plan/union.rs | 39 +- datafusion/src/physical_plan/windows/mod.rs | 22 +- datafusion/src/prelude.rs | 3 +- datafusion/src/test/mod.rs | 20 +- datafusion/tests/custom_sources.rs | 1 + datafusion/tests/parquet_pruning.rs | 7 +- datafusion/tests/provider_filter_pushdown.rs | 3 +- datafusion/tests/sql.rs | 181 ++--- datafusion/tests/statistics.rs | 15 +- datafusion/tests/user_defined_plan.rs | 8 +- python/src/context.rs | 34 +- 82 files changed, 5116 insertions(+), 4462 deletions(-) delete mode 100644 datafusion/src/datasource/avro.rs delete mode 100644 datafusion/src/datasource/csv.rs create mode 100644 datafusion/src/datasource/file_format/avro.rs create mode 100644 datafusion/src/datasource/file_format/csv.rs create mode 100644 datafusion/src/datasource/file_format/json.rs create mode 100644 datafusion/src/datasource/file_format/mod.rs create mode 100644 datafusion/src/datasource/file_format/parquet.rs delete mode 100644 datafusion/src/datasource/json.rs create mode 100644 datafusion/src/datasource/listing.rs delete mode 100644 datafusion/src/datasource/parquet.rs create mode 100644 datafusion/src/execution/options.rs delete mode 100644 datafusion/src/physical_plan/avro.rs delete mode 100644 datafusion/src/physical_plan/csv.rs create mode 100644 datafusion/src/physical_plan/file_format/avro.rs create mode 100644 datafusion/src/physical_plan/file_format/csv.rs create mode 100644 datafusion/src/physical_plan/file_format/json.rs create mode 100644 datafusion/src/physical_plan/file_format/mod.rs rename datafusion/src/physical_plan/{ => file_format}/parquet.rs (85%) delete mode 100644 datafusion/src/physical_plan/json.rs delete mode 100644 datafusion/src/physical_plan/source.rs diff --git a/README.md b/README.md index 8b129177deda..458f1974816f 100644 --- a/README.md +++ b/README.md @@ -76,10 +76,10 @@ use datafusion::arrow::record_batch::RecordBatch; async fn main() -> datafusion::error::Result<()> { // register the table let mut ctx = ExecutionContext::new(); - ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?; + ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?; // create a plan to run a SQL query - let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?; + let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; // execute and print results df.show().await?; @@ -98,7 +98,7 @@ use datafusion::arrow::record_batch::RecordBatch; async fn main() -> datafusion::error::Result<()> { // create the dataframe let mut ctx = ExecutionContext::new(); - let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; let df = df.filter(col("a").lt_eq(col("b")))? .aggregate(vec![col("a")], vec![min(col("b"))])?; diff --git a/ballista-examples/src/bin/ballista-dataframe.rs b/ballista-examples/src/bin/ballista-dataframe.rs index 434ed7bcd899..8399324ad0e2 100644 --- a/ballista-examples/src/bin/ballista-dataframe.rs +++ b/ballista-examples/src/bin/ballista-dataframe.rs @@ -33,7 +33,8 @@ async fn main() -> Result<()> { // define the query using the DataFrame trait let df = ctx - .read_parquet(filename)? + .read_parquet(filename) + .await? .select_columns(&["id", "bool_col", "timestamp_col"])? .filter(col("id").gt(lit(1)))?; diff --git a/ballista-examples/src/bin/ballista-sql.rs b/ballista-examples/src/bin/ballista-sql.rs index 4b303e3ef3d5..3e0df21a73f1 100644 --- a/ballista-examples/src/bin/ballista-sql.rs +++ b/ballista-examples/src/bin/ballista-sql.rs @@ -34,15 +34,18 @@ async fn main() -> Result<()> { "aggregate_test_100", &format!("{}/csv/aggregate_test_100.csv", testdata), CsvReadOptions::new(), - )?; + ) + .await?; // execute the query - let df = ctx.sql( - "SELECT c1, MIN(c12), MAX(c12) \ + let df = ctx + .sql( + "SELECT c1, MIN(c12), MAX(c12) \ FROM aggregate_test_100 \ WHERE c11 > 0.1 AND c11 < 0.9 \ GROUP BY c1", - )?; + ) + .await?; // print the results df.show().await?; diff --git a/ballista/rust/client/README.md b/ballista/rust/client/README.md index 6443dd25a147..08485b6f291d 100644 --- a/ballista/rust/client/README.md +++ b/ballista/rust/client/README.md @@ -104,7 +104,7 @@ async fn main() -> Result<()> { "tripdata", "/path/to/yellow_tripdata_2020-01.csv", CsvReadOptions::new(), - )?; + ).await?; // execute the query let df = ctx.sql( @@ -112,7 +112,7 @@ async fn main() -> Result<()> { FROM tripdata GROUP BY passenger_count ORDER BY passenger_count", - )?; + ).await?; // collect the results and print them to stdout let results = df.collect().await?; diff --git a/ballista/rust/client/src/context.rs b/ballista/rust/client/src/context.rs index c8df5099c2be..e619f12c1eab 100644 --- a/ballista/rust/client/src/context.rs +++ b/ballista/rust/client/src/context.rs @@ -31,8 +31,7 @@ use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result}; use datafusion::execution::dataframe_impl::DataFrameImpl; use datafusion::logical_plan::LogicalPlan; -use datafusion::physical_plan::avro::AvroReadOptions; -use datafusion::physical_plan::csv::CsvReadOptions; +use datafusion::prelude::{AvroReadOptions, CsvReadOptions}; use datafusion::sql::parser::FileType; struct BallistaContextState { @@ -128,11 +127,11 @@ impl BallistaContext { } /// Create a DataFrame representing an Avro table scan - - pub fn read_avro( + /// TODO fetch schema from scheduler instead of resolving locally + pub async fn read_avro( &self, path: &str, - options: AvroReadOptions, + options: AvroReadOptions<'_>, ) -> Result> { // convert to absolute path because the executor likely has a different working directory let path = PathBuf::from(path); @@ -147,13 +146,13 @@ impl BallistaContext { guard.config(), ) }; - let df = ctx.read_avro(path.to_str().unwrap(), options)?; + let df = ctx.read_avro(path.to_str().unwrap(), options).await?; Ok(df) } /// Create a DataFrame representing a Parquet table scan - - pub fn read_parquet(&self, path: &str) -> Result> { + /// TODO fetch schema from scheduler instead of resolving locally + pub async fn read_parquet(&self, path: &str) -> Result> { // convert to absolute path because the executor likely has a different working directory let path = PathBuf::from(path); let path = fs::canonicalize(&path)?; @@ -167,16 +166,16 @@ impl BallistaContext { guard.config(), ) }; - let df = ctx.read_parquet(path.to_str().unwrap())?; + let df = ctx.read_parquet(path.to_str().unwrap()).await?; Ok(df) } /// Create a DataFrame representing a CSV table scan - - pub fn read_csv( + /// TODO fetch schema from scheduler instead of resolving locally + pub async fn read_csv( &self, path: &str, - options: CsvReadOptions, + options: CsvReadOptions<'_>, ) -> Result> { // convert to absolute path because the executor likely has a different working directory let path = PathBuf::from(path); @@ -191,7 +190,7 @@ impl BallistaContext { guard.config(), ) }; - let df = ctx.read_csv(path.to_str().unwrap(), options)?; + let df = ctx.read_csv(path.to_str().unwrap(), options).await?; Ok(df) } @@ -206,39 +205,42 @@ impl BallistaContext { Ok(()) } - pub fn register_csv( + pub async fn register_csv( &self, name: &str, path: &str, - options: CsvReadOptions, + options: CsvReadOptions<'_>, ) -> Result<()> { - match self.read_csv(path, options)?.to_logical_plan() { + match self.read_csv(path, options).await?.to_logical_plan() { LogicalPlan::TableScan { source, .. } => self.register_table(name, source), _ => Err(DataFusionError::Internal("Expected tables scan".to_owned())), } } - pub fn register_parquet(&self, name: &str, path: &str) -> Result<()> { - match self.read_parquet(path)?.to_logical_plan() { + pub async fn register_parquet(&self, name: &str, path: &str) -> Result<()> { + match self.read_parquet(path).await?.to_logical_plan() { LogicalPlan::TableScan { source, .. } => self.register_table(name, source), _ => Err(DataFusionError::Internal("Expected tables scan".to_owned())), } } - pub fn register_avro( + pub async fn register_avro( &self, name: &str, path: &str, - options: AvroReadOptions, + options: AvroReadOptions<'_>, ) -> Result<()> { - match self.read_avro(path, options)?.to_logical_plan() { + match self.read_avro(path, options).await?.to_logical_plan() { LogicalPlan::TableScan { source, .. } => self.register_table(name, source), _ => Err(DataFusionError::Internal("Expected tables scan".to_owned())), } } - /// Create a DataFrame from a SQL statement - pub fn sql(&self, sql: &str) -> Result> { + /// Create a DataFrame from a SQL statement. + /// + /// This method is `async` because queries of type `CREATE EXTERNAL TABLE` + /// might require the schema to be inferred. + pub async fn sql(&self, sql: &str) -> Result> { let mut ctx = { let state = self.state.lock().unwrap(); create_df_ctx_with_ballista_query_planner( @@ -275,15 +277,17 @@ impl BallistaContext { CsvReadOptions::new() .schema(&schema.as_ref().to_owned().into()) .has_header(*has_header), - )?; + ) + .await?; Ok(Arc::new(DataFrameImpl::new(ctx.state, &plan))) } FileType::Parquet => { - self.register_parquet(name, location)?; + self.register_parquet(name, location).await?; Ok(Arc::new(DataFrameImpl::new(ctx.state, &plan))) } FileType::Avro => { - self.register_avro(name, location, AvroReadOptions::default())?; + self.register_avro(name, location, AvroReadOptions::default()) + .await?; Ok(Arc::new(DataFrameImpl::new(ctx.state, &plan))) } _ => Err(DataFusionError::NotImplemented(format!( @@ -292,7 +296,7 @@ impl BallistaContext { ))), }, - _ => ctx.sql(sql), + _ => ctx.sql(sql).await, } } } @@ -306,7 +310,7 @@ mod tests { let context = BallistaContext::standalone(&BallistaConfig::new().unwrap(), 1) .await .unwrap(); - let df = context.sql("SELECT 1;").unwrap(); + let df = context.sql("SELECT 1;").await.unwrap(); df.collect().await.unwrap(); } } diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml index 94ee71d6d1c4..ac53aa00e47e 100644 --- a/ballista/rust/core/Cargo.toml +++ b/ballista/rust/core/Cargo.toml @@ -41,6 +41,7 @@ sqlparser = "0.11.0" tokio = "1.0" tonic = "0.5" uuid = { version = "0.8", features = ["v4"] } +chrono = "0.4" arrow-flight = { version = "^5.3" } diff --git a/ballista/rust/core/proto/ballista.proto b/ballista/rust/core/proto/ballista.proto index 9a2ec710411b..33638fdc50d4 100644 --- a/ballista/rust/core/proto/ballista.proto +++ b/ballista/rust/core/proto/ballista.proto @@ -241,8 +241,7 @@ message SortExprNode { // LogicalPlan is a nested type message LogicalPlanNode { oneof LogicalPlanType { - CsvTableScanNode csv_scan = 1; - ParquetTableScanNode parquet_scan = 2; + ListingTableScanNode listing_scan = 1; ProjectionNode projection = 3; SelectionNode selection = 4; LimitNode limit = 5; @@ -256,7 +255,6 @@ message LogicalPlanNode { WindowNode window = 13; AnalyzeNode analyze = 14; CrossJoinNode cross_join = 15; - AvroTableScanNode avro_scan = 16; } } @@ -264,17 +262,6 @@ message ProjectionColumns { repeated string columns = 1; } -message CsvTableScanNode { - string table_name = 1; - string path = 2; - bool has_header = 3; - string delimiter = 4; - string file_extension = 5; - ProjectionColumns projection = 6; - Schema schema = 7; - repeated LogicalExprNode filters = 8; -} - message Statistics { int64 num_rows = 1; int64 total_byte_size = 2; @@ -284,30 +271,36 @@ message Statistics { message PartitionedFile { string path = 1; - Statistics statistics = 2; + uint64 size = 2; + uint64 last_modified_ns = 3; } -message TableDescriptor { - string path = 1; - repeated PartitionedFile partition_files = 2; - Schema schema = 3; +message CsvFormat { + bool has_header = 1; + string delimiter = 2; } -message ParquetTableScanNode { - string table_name = 1; - TableDescriptor table_desc = 2; - ProjectionColumns projection = 3; - repeated LogicalExprNode filters = 4; - uint32 target_partitions = 5; +message ParquetFormat { + bool enable_pruning = 1; } -message AvroTableScanNode { +message AvroFormat {} + +message ListingTableScanNode { string table_name = 1; string path = 2; string file_extension = 3; ProjectionColumns projection = 4; Schema schema = 5; repeated LogicalExprNode filters = 6; + repeated string partitions = 7; + bool collect_stat = 8; + uint32 target_partitions = 9; + oneof FileFormatType { + CsvFormat csv = 10; + ParquetFormat parquet = 11; + AvroFormat avro = 12; + } } message ProjectionNode { @@ -603,40 +596,42 @@ message FilterExecNode { PhysicalExprNode expr = 2; } -message ParquetPartition { - uint32 index = 1; - repeated PartitionedFile files = 2; +message FilePartition { + repeated PartitionedFile files = 1; +} + +message ScanLimit { + // wrap into a message to make it optional + uint32 limit = 1; } message ParquetScanExecNode { - repeated ParquetPartition partitions = 1; + repeated FilePartition partitions = 1; Schema schema = 2; - repeated uint32 projection = 3; uint32 batch_size = 4; + repeated uint32 projection = 6; + ScanLimit limit = 7; + Statistics statistics = 8; } message CsvScanExecNode { - string path = 1; - repeated uint32 projection = 2; - Schema schema = 3; - string file_extension = 4; - bool has_header = 5; - uint32 batch_size = 6; - string delimiter = 7; - - // partition filenames - repeated string filename = 8; + repeated PartitionedFile files = 1; + Schema schema = 2; + bool has_header = 3; + uint32 batch_size = 4; + string delimiter = 5; + repeated uint32 projection = 6; + ScanLimit limit = 7; + Statistics statistics = 8; } message AvroScanExecNode { - string path = 1; - repeated uint32 projection = 2; - Schema schema = 3; - string file_extension = 4; - uint32 batch_size = 5; - - // partition filenames - repeated string filename = 8; + repeated PartitionedFile files = 1; + Schema schema = 2; + uint32 batch_size = 4; + repeated uint32 projection = 6; + ScanLimit limit = 7; + Statistics statistics = 8; } enum PartitionMode { @@ -951,7 +946,6 @@ message GetFileMetadataParams { message GetFileMetadataResult { Schema schema = 1; - repeated FilePartitionMetadata partitions = 2; } message FilePartitionMetadata { diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/ballista/rust/core/src/serde/logical_plan/from_proto.rs index 353be9a59642..07eced784004 100644 --- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs @@ -18,11 +18,16 @@ //! Serde code to convert from protocol buffers to Rust data structures. use crate::error::BallistaError; -use crate::serde::{from_proto_binary_op, proto_error, protobuf}; +use crate::serde::{from_proto_binary_op, proto_error, protobuf, str_to_byte}; use crate::{convert_box_required, convert_required}; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use datafusion::datasource::parquet::{ParquetTable, ParquetTableDescriptor}; -use datafusion::datasource::{PartitionedFile, TableDescriptor}; +use datafusion::datasource::file_format::avro::AvroFormat; +use datafusion::datasource::file_format::csv::CsvFormat; +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::file_format::FileFormat; +use datafusion::datasource::listing::{ListingOptions, ListingTable}; +use datafusion::datasource::object_store::local::LocalFileSystem; +use datafusion::datasource::object_store::{FileMeta, SizedFile}; use datafusion::logical_plan::window_frames::{ WindowFrame, WindowFrameBound, WindowFrameUnits, }; @@ -32,10 +37,10 @@ use datafusion::logical_plan::{ LogicalPlan, LogicalPlanBuilder, Operator, }; use datafusion::physical_plan::aggregates::AggregateFunction; -use datafusion::physical_plan::avro::AvroReadOptions; -use datafusion::physical_plan::csv::CsvReadOptions; use datafusion::physical_plan::window_functions::BuiltInWindowFunction; +use datafusion::prelude::*; use datafusion::scalar::ScalarValue; +use protobuf::listing_table_scan_node::FileFormatType; use protobuf::logical_plan_node::LogicalPlanType; use protobuf::{logical_expr_node::ExprType, scalar_type}; use std::{ @@ -116,13 +121,8 @@ impl TryInto for &protobuf::LogicalPlanNode { .build() .map_err(|e| e.into()) } - LogicalPlanType::CsvScan(scan) => { + LogicalPlanType::ListingScan(scan) => { let schema: Schema = convert_required!(scan.schema)?; - let options = CsvReadOptions::new() - .schema(&schema) - .delimiter(scan.delimiter.as_bytes()[0]) - .file_extension(&scan.file_extension) - .has_header(scan.has_header); let mut projection = None; if let Some(columns) = &scan.projection { @@ -134,73 +134,55 @@ impl TryInto for &protobuf::LogicalPlanNode { projection = Some(column_indices); } - LogicalPlanBuilder::scan_csv_with_name( - &scan.path, - options, - projection, - &scan.table_name, - )? - .build() - .map_err(|e| e.into()) - } - LogicalPlanType::ParquetScan(scan) => { - let descriptor: TableDescriptor = convert_required!(scan.table_desc)?; - let projection = match scan.projection.as_ref() { - None => None, - Some(columns) => { - let schema = descriptor.schema.clone(); - let r: Result, _> = columns - .columns - .iter() - .map(|col_name| { - schema.fields().iter().position(|field| field.name() == col_name).ok_or_else(|| { - let column_names: Vec<&String> = schema.fields().iter().map(|f| f.name()).collect(); - proto_error(format!( - "Parquet projection contains column name that is not present in schema. Column name: {}. Schema columns: {:?}", - col_name, column_names - )) - }) - }) - .collect(); - Some(r?) - } - }; + let filters = scan + .filters + .iter() + .map(|e| e.try_into()) + .collect::, _>>()?; - let parquet_table = ParquetTable::try_new_with_desc( - Arc::new(ParquetTableDescriptor { descriptor }), - scan.target_partitions as usize, - true, - )?; - LogicalPlanBuilder::scan( - &scan.table_name, - Arc::new(parquet_table), - projection, - )? - .build() - .map_err(|e| e.into()) - } - LogicalPlanType::AvroScan(scan) => { - let schema: Schema = convert_required!(scan.schema)?; - let options = AvroReadOptions { - schema: Some(Arc::new(schema.clone())), - file_extension: &scan.file_extension, + let file_format: Arc = + match scan.file_format_type.as_ref().ok_or_else(|| { + proto_error(format!( + "logical_plan::from_proto() Unsupported file format '{:?}'", + self + )) + })? { + &FileFormatType::Parquet(protobuf::ParquetFormat { + enable_pruning, + }) => Arc::new( + ParquetFormat::default().with_enable_pruning(enable_pruning), + ), + FileFormatType::Csv(protobuf::CsvFormat { + has_header, + delimiter, + }) => Arc::new( + CsvFormat::default() + .with_has_header(*has_header) + .with_delimiter(str_to_byte(delimiter)?), + ), + FileFormatType::Avro(..) => Arc::new(AvroFormat::default()), + }; + + let options = ListingOptions { + file_extension: scan.file_extension.clone(), + format: file_format, + partitions: scan.partitions.clone(), + collect_stat: scan.collect_stat, + target_partitions: scan.target_partitions as usize, }; - let mut projection = None; - if let Some(columns) = &scan.projection { - let column_indices = columns - .columns - .iter() - .map(|name| schema.index_of(name)) - .collect::, _>>()?; - projection = Some(column_indices); - } - - LogicalPlanBuilder::scan_avro_with_name( - &scan.path, + let provider = ListingTable::new( + Arc::new(LocalFileSystem {}), + scan.path.clone(), + Arc::new(schema), options, - projection, + ); + + LogicalPlanBuilder::scan_with_filters( &scan.table_name, + Arc::new(provider), + projection, + filters, )? .build() .map_err(|e| e.into()) @@ -343,61 +325,6 @@ impl TryInto for &protobuf::LogicalPlanNode { } } -impl TryInto for &protobuf::TableDescriptor { - type Error = BallistaError; - - fn try_into(self) -> Result { - let partition_files = self - .partition_files - .iter() - .map(|f| f.try_into()) - .collect::, _>>()?; - let schema = convert_required!(self.schema)?; - Ok(TableDescriptor { - path: self.path.to_owned(), - partition_files, - schema: Arc::new(schema), - }) - } -} - -impl TryInto for &protobuf::PartitionedFile { - type Error = BallistaError; - - fn try_into(self) -> Result { - let statistics = convert_required!(self.statistics)?; - Ok(PartitionedFile { - path: self.path.clone(), - statistics, - }) - } -} - -impl From<&protobuf::ColumnStats> for ColumnStatistics { - fn from(cs: &protobuf::ColumnStats) -> ColumnStatistics { - ColumnStatistics { - null_count: Some(cs.null_count as usize), - max_value: cs.max_value.as_ref().map(|m| m.try_into().unwrap()), - min_value: cs.min_value.as_ref().map(|m| m.try_into().unwrap()), - distinct_count: Some(cs.distinct_count as usize), - } - } -} - -impl TryInto for &protobuf::Statistics { - type Error = BallistaError; - - fn try_into(self) -> Result { - let column_statistics = self.column_stats.iter().map(|s| s.into()).collect(); - Ok(Statistics { - num_rows: Some(self.num_rows as usize), - total_byte_size: Some(self.total_byte_size as usize), - column_statistics: Some(column_statistics), - is_exact: self.is_exact, - }) - } -} - impl From<&protobuf::Column> for Column { fn from(c: &protobuf::Column) -> Column { let c = c.clone(); @@ -1215,7 +1142,7 @@ impl TryInto for &protobuf::Field { } use crate::serde::protobuf::ColumnStats; -use datafusion::physical_plan::{aggregates, windows, ColumnStatistics, Statistics}; +use datafusion::physical_plan::{aggregates, windows}; use datafusion::prelude::{ array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, sha384, sha512, trim, upper, diff --git a/ballista/rust/core/src/serde/logical_plan/mod.rs b/ballista/rust/core/src/serde/logical_plan/mod.rs index ada3c85de674..09bcf1fd048f 100644 --- a/ballista/rust/core/src/serde/logical_plan/mod.rs +++ b/ballista/rust/core/src/serde/logical_plan/mod.rs @@ -26,16 +26,17 @@ mod roundtrip_tests { use core::panic; use datafusion::{ arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit}, + datasource::object_store::local::LocalFileSystem, logical_plan::{ col, Expr, LogicalPlan, LogicalPlanBuilder, Partitioning, ToDFSchema, }, - physical_plan::{csv::CsvReadOptions, functions::BuiltinScalarFunction::Sqrt}, + physical_plan::functions::BuiltinScalarFunction::Sqrt, prelude::*, scalar::ScalarValue, sql::parser::FileType, }; use protobuf::arrow_type; - use std::convert::TryInto; + use std::{convert::TryInto, sync::Arc}; //Given a identity of a LogicalPlan converts it to protobuf and back, using debug formatting to test equality. macro_rules! roundtrip_test { @@ -57,8 +58,8 @@ mod roundtrip_tests { }; } - #[test] - fn roundtrip_repartition() -> Result<()> { + #[tokio::test] + async fn roundtrip_repartition() -> Result<()> { use datafusion::logical_plan::Partitioning; let test_batch_sizes = [usize::MIN, usize::MAX, 43256]; @@ -76,10 +77,13 @@ mod roundtrip_tests { let plan = std::sync::Arc::new( LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?, @@ -665,8 +669,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_analyze() -> Result<()> { + #[tokio::test] + async fn roundtrip_analyze() -> Result<()> { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("first_name", DataType::Utf8, false), @@ -676,20 +680,26 @@ mod roundtrip_tests { ]); let verbose_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.explain(true, true)) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?; let plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.explain(false, true)) .and_then(|plan| plan.build()) @@ -702,8 +712,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_explain() -> Result<()> { + #[tokio::test] + async fn roundtrip_explain() -> Result<()> { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("first_name", DataType::Utf8, false), @@ -713,20 +723,26 @@ mod roundtrip_tests { ]); let verbose_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.explain(true, false)) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?; let plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.explain(false, false)) .and_then(|plan| plan.build()) @@ -739,8 +755,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_join() -> Result<()> { + #[tokio::test] + async fn roundtrip_join() -> Result<()> { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("first_name", DataType::Utf8, false), @@ -750,18 +766,24 @@ mod roundtrip_tests { ]); let scan_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee1", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![0, 3, 4]), - )? + 4, + ) + .await? .build() .map_err(BallistaError::DataFusionError)?; let plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee2", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![0, 3, 4]), + 4, ) + .await .and_then(|plan| plan.join(&scan_plan, JoinType::Inner, (vec!["id"], vec!["id"]))) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?; @@ -770,8 +792,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_sort() -> Result<()> { + #[tokio::test] + async fn roundtrip_sort() -> Result<()> { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("first_name", DataType::Utf8, false), @@ -781,10 +803,13 @@ mod roundtrip_tests { ]); let plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.sort(vec![col("salary")])) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?; @@ -793,8 +818,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_empty_relation() -> Result<()> { + #[tokio::test] + async fn roundtrip_empty_relation() -> Result<()> { let plan_false = LogicalPlanBuilder::empty(false) .build() .map_err(BallistaError::DataFusionError)?; @@ -810,8 +835,8 @@ mod roundtrip_tests { Ok(()) } - #[test] - fn roundtrip_logical_plan() -> Result<()> { + #[tokio::test] + async fn roundtrip_logical_plan() -> Result<()> { let schema = Schema::new(vec![ Field::new("id", DataType::Int32, false), Field::new("first_name", DataType::Utf8, false), @@ -821,10 +846,13 @@ mod roundtrip_tests { ]); let plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), "employee.csv", CsvReadOptions::new().schema(&schema).has_header(true), Some(vec![3, 4]), + 4, ) + .await .and_then(|plan| plan.aggregate(vec![col("state")], vec![max(col("salary"))])) .and_then(|plan| plan.build()) .map_err(BallistaError::DataFusionError)?; diff --git a/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/ballista/rust/core/src/serde/logical_plan/to_proto.rs index 402422adb205..ba7daca18a4e 100644 --- a/ballista/rust/core/src/serde/logical_plan/to_proto.rs +++ b/ballista/rust/core/src/serde/logical_plan/to_proto.rs @@ -20,13 +20,18 @@ //! processes. use super::super::proto_error; -use crate::serde::{protobuf, BallistaError}; +use crate::serde::{byte_to_string, protobuf, BallistaError}; use datafusion::arrow::datatypes::{ DataType, Field, IntervalUnit, Schema, SchemaRef, TimeUnit, }; -use datafusion::datasource::avro::AvroFile; -use datafusion::datasource::{CsvFile, PartitionedFile, TableDescriptor}; +use datafusion::datasource::file_format::avro::AvroFormat; +use datafusion::datasource::file_format::csv::CsvFormat; +use datafusion::datasource::TableProvider; + +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::listing::ListingTable; use datafusion::logical_plan::{ + exprlist_to_fields, window_frames::{WindowFrame, WindowFrameBound, WindowFrameUnits}, Column, Expr, JoinConstraint, JoinType, LogicalPlan, }; @@ -36,7 +41,7 @@ use datafusion::physical_plan::window_functions::{ BuiltInWindowFunction, WindowFunction, }; use datafusion::physical_plan::{ColumnStatistics, Statistics}; -use datafusion::{datasource::parquet::ParquetTable, logical_plan::exprlist_to_fields}; +use protobuf::listing_table_scan_node::FileFormatType; use protobuf::{ arrow_type, logical_expr_node::ExprType, scalar_type, DateUnit, PrimitiveScalarType, ScalarListValue, ScalarType, @@ -256,59 +261,6 @@ impl TryInto for &protobuf::ArrowType { } } -impl From<&ColumnStatistics> for protobuf::ColumnStats { - fn from(cs: &ColumnStatistics) -> protobuf::ColumnStats { - protobuf::ColumnStats { - min_value: cs.min_value.as_ref().map(|m| m.try_into().unwrap()), - max_value: cs.max_value.as_ref().map(|m| m.try_into().unwrap()), - null_count: cs.null_count.map(|n| n as u32).unwrap_or(0), - distinct_count: cs.distinct_count.map(|n| n as u32).unwrap_or(0), - } - } -} - -impl From<&Statistics> for protobuf::Statistics { - fn from(s: &Statistics) -> protobuf::Statistics { - let none_value = -1_i64; - let column_stats = match &s.column_statistics { - None => vec![], - Some(column_stats) => column_stats.iter().map(|s| s.into()).collect(), - }; - protobuf::Statistics { - num_rows: s.num_rows.map(|n| n as i64).unwrap_or(none_value), - total_byte_size: s.total_byte_size.map(|n| n as i64).unwrap_or(none_value), - column_stats, - is_exact: s.is_exact, - } - } -} - -impl From<&PartitionedFile> for protobuf::PartitionedFile { - fn from(pf: &PartitionedFile) -> protobuf::PartitionedFile { - protobuf::PartitionedFile { - path: pf.path.clone(), - statistics: Some((&pf.statistics).into()), - } - } -} - -impl TryFrom for protobuf::TableDescriptor { - type Error = BallistaError; - - fn try_from(desc: TableDescriptor) -> Result { - let partition_files: Vec = - desc.partition_files.iter().map(|pf| pf.into()).collect(); - - let schema: protobuf::Schema = desc.schema.into(); - - Ok(protobuf::TableDescriptor { - path: desc.path, - partition_files, - schema: Some(schema), - }) - } -} - impl TryInto for &Box { type Error = BallistaError; fn try_into(self) -> Result { @@ -748,49 +700,46 @@ impl TryInto for &LogicalPlan { .map(|filter| filter.try_into()) .collect::, _>>()?; - if let Some(parquet) = source.downcast_ref::() { - let table_desc: protobuf::TableDescriptor = - parquet.desc.descriptor.clone().try_into()?; - Ok(protobuf::LogicalPlanNode { - logical_plan_type: Some(LogicalPlanType::ParquetScan( - protobuf::ParquetTableScanNode { - table_name: table_name.to_owned(), - table_desc: Some(table_desc), - projection, - filters, - target_partitions: parquet.get_target_partitions() as u32, - }, - )), - }) - } else if let Some(csv) = source.downcast_ref::() { - let delimiter = [csv.delimiter()]; - let delimiter = std::str::from_utf8(&delimiter).map_err(|_| { - BallistaError::General("Invalid CSV delimiter".to_owned()) - })?; + if let Some(listing_table) = source.downcast_ref::() { + let any = listing_table.options().format.as_any(); + let file_format_type = if let Some(parquet) = + any.downcast_ref::() + { + FileFormatType::Parquet(protobuf::ParquetFormat { + enable_pruning: parquet.enable_pruning(), + }) + } else if let Some(csv) = any.downcast_ref::() { + FileFormatType::Csv(protobuf::CsvFormat { + delimiter: byte_to_string(csv.delimiter())?, + has_header: csv.has_header(), + }) + } else if any.is::() { + FileFormatType::Avro(protobuf::AvroFormat {}) + } else { + return Err(proto_error(format!( + "Error converting file format, {:?} is invalid as a datafusion foramt.", + listing_table.options().format + ))); + }; Ok(protobuf::LogicalPlanNode { - logical_plan_type: Some(LogicalPlanType::CsvScan( - protobuf::CsvTableScanNode { + logical_plan_type: Some(LogicalPlanType::ListingScan( + protobuf::ListingTableScanNode { + file_format_type: Some(file_format_type), table_name: table_name.to_owned(), - path: csv.path().to_owned(), - projection, + collect_stat: listing_table.options().collect_stat, + file_extension: listing_table + .options() + .file_extension + .clone(), + partitions: listing_table.options().partitions.clone(), + path: listing_table.path().to_owned(), schema: Some(schema), - has_header: csv.has_header(), - delimiter: delimiter.to_string(), - file_extension: csv.file_extension().to_string(), - filters, - }, - )), - }) - } else if let Some(avro) = source.downcast_ref::() { - Ok(protobuf::LogicalPlanNode { - logical_plan_type: Some(LogicalPlanType::AvroScan( - protobuf::AvroTableScanNode { - table_name: table_name.to_owned(), - path: avro.path().to_owned(), projection, - schema: Some(schema), - file_extension: avro.file_extension().to_string(), filters, + target_partitions: listing_table + .options() + .target_partitions + as u32, }, )), }) diff --git a/ballista/rust/core/src/serde/mod.rs b/ballista/rust/core/src/serde/mod.rs index a4df5a45555d..4a32b24b9531 100644 --- a/ballista/rust/core/src/serde/mod.rs +++ b/ballista/rust/core/src/serde/mod.rs @@ -339,3 +339,17 @@ impl From for protobuf::JoinConstraint { } } } + +fn byte_to_string(b: u8) -> Result { + let b = &[b]; + let b = std::str::from_utf8(b) + .map_err(|_| BallistaError::General("Invalid CSV delimiter".to_owned()))?; + Ok(b.to_owned()) +} + +fn str_to_byte(s: &str) -> Result { + if s.len() != 1 { + return Err(BallistaError::General("Invalid CSV delimiter".to_owned())); + } + Ok(s.as_bytes()[0]) +} diff --git a/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/ballista/rust/core/src/serde/physical_plan/from_proto.rs index 5241e8b2bd5e..680e41980c72 100644 --- a/ballista/rust/core/src/serde/physical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/physical_plan/from_proto.rs @@ -28,14 +28,16 @@ use crate::execution_plans::{ use crate::serde::protobuf::repartition_exec_node::PartitionMethod; use crate::serde::protobuf::ShuffleReaderPartition; use crate::serde::scheduler::PartitionLocation; -use crate::serde::{from_proto_binary_op, proto_error, protobuf}; +use crate::serde::{from_proto_binary_op, proto_error, protobuf, str_to_byte}; use crate::{convert_box_required, convert_required, into_required}; +use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::catalog::catalog::{ CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider, }; -use datafusion::datasource::object_store::ObjectStoreRegistry; -use datafusion::datasource::FilePartition; +use datafusion::datasource::object_store::local::LocalFileSystem; +use datafusion::datasource::object_store::{FileMeta, ObjectStoreRegistry, SizedFile}; +use datafusion::datasource::{FilePartition, PartitionedFile}; use datafusion::execution::context::{ ExecutionConfig, ExecutionContextState, ExecutionProps, }; @@ -43,12 +45,11 @@ use datafusion::logical_plan::{ window_frames::WindowFrame, DFSchema, Expr, JoinConstraint, JoinType, }; use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction}; -use datafusion::physical_plan::avro::{AvroExec, AvroReadOptions}; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::file_format::{AvroExec, CsvExec, ParquetExec}; use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec}; use datafusion::physical_plan::hash_join::PartitionMode; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; -use datafusion::physical_plan::parquet::ParquetPartition; use datafusion::physical_plan::planner::DefaultPhysicalPlanner; use datafusion::physical_plan::window_functions::{ BuiltInWindowFunction, WindowFunction, @@ -57,7 +58,6 @@ use datafusion::physical_plan::windows::{create_window_expr, WindowAggExec}; use datafusion::physical_plan::{ coalesce_batches::CoalesceBatchesExec, cross_join::CrossJoinExec, - csv::CsvExec, empty::EmptyExec, expressions::{ col, Avg, BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, @@ -68,14 +68,13 @@ use datafusion::physical_plan::{ functions::{self, BuiltinScalarFunction, ScalarFunctionExpr}, hash_join::HashJoinExec, limit::{GlobalLimitExec, LocalLimitExec}, - parquet::ParquetExec, projection::ProjectionExec, repartition::RepartitionExec, sort::{SortExec, SortOptions}, Partitioning, }; use datafusion::physical_plan::{ - AggregateExpr, ExecutionPlan, PhysicalExpr, Statistics, WindowExpr, + AggregateExpr, ColumnStatistics, ExecutionPlan, PhysicalExpr, Statistics, WindowExpr, }; use datafusion::prelude::CsvReadOptions; use log::debug; @@ -121,53 +120,64 @@ impl TryInto> for &protobuf::PhysicalPlanNode { } PhysicalPlanType::CsvScan(scan) => { let schema = Arc::new(convert_required!(scan.schema)?); - let options = CsvReadOptions::new() - .has_header(scan.has_header) - .file_extension(&scan.file_extension) - .delimiter(scan.delimiter.as_bytes()[0]) - .schema(&schema); let projection = scan.projection.iter().map(|i| *i as usize).collect(); - Ok(Arc::new(CsvExec::try_new( - &scan.path, - options, + let statistics = convert_required!(scan.statistics)?; + + Ok(Arc::new(CsvExec::new( + Arc::new(LocalFileSystem {}), + scan.files + .iter() + .map(|f| f.try_into()) + .collect::, _>>()?, + statistics, + schema, + scan.has_header, + str_to_byte(&scan.delimiter)?, Some(projection), scan.batch_size as usize, - None, - )?)) + scan.limit.as_ref().map(|sl| sl.limit as usize), + ))) } PhysicalPlanType::ParquetScan(scan) => { - let partitions = scan - .partitions - .iter() - .map(|p| p.try_into()) - .collect::, _>>()?; let schema = Arc::new(convert_required!(scan.schema)?); let projection = scan.projection.iter().map(|i| *i as usize).collect(); + let statistics = convert_required!(scan.statistics)?; + Ok(Arc::new(ParquetExec::new( - partitions, + Arc::new(LocalFileSystem {}), + scan.partitions + .iter() + .map(|p| { + let it = p.files.iter().map(|f| f.try_into()); + it.collect::, _>>() + }) + .collect::>, _>>()?, + statistics, schema, Some(projection), - Statistics::default(), - ExecutionPlanMetricsSet::new(), + // TODO predicate should be de-serialized None, scan.batch_size as usize, - None, + scan.limit.as_ref().map(|sl| sl.limit as usize), ))) } PhysicalPlanType::AvroScan(scan) => { let schema = Arc::new(convert_required!(scan.schema)?); - let options = AvroReadOptions { - schema: Some(schema), - file_extension: &scan.file_extension, - }; let projection = scan.projection.iter().map(|i| *i as usize).collect(); - Ok(Arc::new(AvroExec::try_from_path( - &scan.path, - options, + let statistics = convert_required!(scan.statistics)?; + + Ok(Arc::new(AvroExec::new( + Arc::new(LocalFileSystem {}), + scan.files + .iter() + .map(|f| f.try_into()) + .collect::, _>>()?, + statistics, + schema, Some(projection), scan.batch_size as usize, - None, - )?)) + scan.limit.as_ref().map(|sl| sl.limit as usize), + ))) } PhysicalPlanType::CoalesceBatches(coalesce_batches) => { let input: Arc = @@ -498,23 +508,6 @@ impl TryInto> for &protobuf::PhysicalPlanNode { } } -impl TryInto for &protobuf::ParquetPartition { - type Error = BallistaError; - - fn try_into(self) -> Result { - let files = self - .files - .iter() - .map(|f| f.try_into()) - .collect::, _>>()?; - Ok(ParquetPartition::new( - files, - self.index as usize, - ExecutionPlanMetricsSet::new(), - )) - } -} - impl From<&protobuf::PhysicalColumn> for Column { fn from(c: &protobuf::PhysicalColumn) -> Column { Column::new(&c.name, c.index as usize) @@ -747,3 +740,57 @@ pub fn parse_protobuf_hash_partitioning( None => Ok(None), } } + +impl TryInto for &protobuf::PartitionedFile { + type Error = BallistaError; + + fn try_into(self) -> Result { + Ok(PartitionedFile { + file_meta: FileMeta { + sized_file: SizedFile { + path: self.path.clone(), + size: self.size, + }, + last_modified: if self.last_modified_ns == 0 { + None + } else { + Some(Utc.timestamp_nanos(self.last_modified_ns as i64)) + }, + }, + }) + } +} + +impl From<&protobuf::ColumnStats> for ColumnStatistics { + fn from(cs: &protobuf::ColumnStats) -> ColumnStatistics { + ColumnStatistics { + null_count: Some(cs.null_count as usize), + max_value: cs.max_value.as_ref().map(|m| m.try_into().unwrap()), + min_value: cs.min_value.as_ref().map(|m| m.try_into().unwrap()), + distinct_count: Some(cs.distinct_count as usize), + } + } +} + +impl TryInto for &protobuf::Statistics { + type Error = BallistaError; + + fn try_into(self) -> Result { + let column_statistics = self + .column_stats + .iter() + .map(|s| s.into()) + .collect::>(); + Ok(Statistics { + num_rows: Some(self.num_rows as usize), + total_byte_size: Some(self.total_byte_size as usize), + // No column statistic (None) is encoded with empty array + column_statistics: if column_statistics.is_empty() { + None + } else { + Some(column_statistics) + }, + is_exact: self.is_exact, + }) + } +} diff --git a/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/ballista/rust/core/src/serde/physical_plan/to_proto.rs index 22a49cb881ba..020b6888d8cf 100644 --- a/ballista/rust/core/src/serde/physical_plan/to_proto.rs +++ b/ballista/rust/core/src/serde/physical_plan/to_proto.rs @@ -26,21 +26,27 @@ use std::{ sync::Arc, }; -use datafusion::logical_plan::JoinType; -use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; -use datafusion::physical_plan::cross_join::CrossJoinExec; -use datafusion::physical_plan::csv::CsvExec; -use datafusion::physical_plan::expressions::{ - CaseExpr, InListExpr, IsNotNullExpr, IsNullExpr, NegativeExpr, NotExpr, -}; -use datafusion::physical_plan::expressions::{CastExpr, TryCastExpr}; -use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::hash_aggregate::AggregateMode; use datafusion::physical_plan::hash_join::{HashJoinExec, PartitionMode}; use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; -use datafusion::physical_plan::parquet::{ParquetExec, ParquetPartition}; use datafusion::physical_plan::projection::ProjectionExec; use datafusion::physical_plan::sort::SortExec; +use datafusion::physical_plan::{cross_join::CrossJoinExec, ColumnStatistics}; +use datafusion::physical_plan::{ + expressions::{ + CaseExpr, InListExpr, IsNotNullExpr, IsNullExpr, NegativeExpr, NotExpr, + }, + Statistics, +}; +use datafusion::physical_plan::{ + expressions::{CastExpr, TryCastExpr}, + file_format::ParquetExec, +}; +use datafusion::physical_plan::{file_format::AvroExec, filter::FilterExec}; +use datafusion::{ + datasource::PartitionedFile, physical_plan::coalesce_batches::CoalesceBatchesExec, +}; +use datafusion::{logical_plan::JoinType, physical_plan::file_format::CsvExec}; use datafusion::{ physical_plan::expressions::{Count, Literal}, scalar::ScalarValue, @@ -56,13 +62,13 @@ use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr}; use datafusion::physical_plan::hash_aggregate::HashAggregateExec; use protobuf::physical_plan_node::PhysicalPlanType; -use crate::execution_plans::{ - ShuffleReaderExec, ShuffleWriterExec, UnresolvedShuffleExec, -}; use crate::serde::protobuf::repartition_exec_node::PartitionMethod; use crate::serde::scheduler::PartitionLocation; use crate::serde::{protobuf, BallistaError}; -use datafusion::physical_plan::avro::AvroExec; +use crate::{ + execution_plans::{ShuffleReaderExec, ShuffleWriterExec, UnresolvedShuffleExec}, + serde::byte_to_string, +}; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::functions::{BuiltinScalarFunction, ScalarFunctionExpr}; use datafusion::physical_plan::repartition::RepartitionExec; @@ -238,44 +244,54 @@ impl TryInto for Arc { ))), }) } else if let Some(exec) = plan.downcast_ref::() { - let delimiter = [*exec.delimiter().ok_or_else(|| { - BallistaError::General("Delimeter is not set for CsvExec".to_owned()) - })?]; - let delimiter = std::str::from_utf8(&delimiter).map_err(|_| { - BallistaError::General("Invalid CSV delimiter".to_owned()) - })?; - Ok(protobuf::PhysicalPlanNode { physical_plan_type: Some(PhysicalPlanType::CsvScan( protobuf::CsvScanExecNode { - path: exec.path().to_owned(), - filename: exec.filenames().to_vec(), + files: exec + .files() + .iter() + .map(|f| f.into()) + .collect::>(), + statistics: Some((&exec.statistics()).into()), + limit: exec + .limit() + .map(|l| protobuf::ScanLimit { limit: l as u32 }), projection: exec .projection() + .as_ref() .ok_or_else(|| { BallistaError::General( - "projection in CsvExec dosn not exist.".to_owned(), + "projection in CsvExec does not exist.".to_owned(), ) })? .iter() .map(|n| *n as u32) .collect(), - file_extension: exec.file_extension().to_owned(), schema: Some(exec.file_schema().as_ref().into()), has_header: exec.has_header(), - delimiter: delimiter.to_string(), + delimiter: byte_to_string(exec.delimiter())?, batch_size: exec.batch_size() as u32, }, )), }) } else if let Some(exec) = plan.downcast_ref::() { - let partitions = exec.partitions().iter().map(|p| p.into()).collect(); + let partitions = exec + .partitions() + .into_iter() + .map(|p| protobuf::FilePartition { + files: p.iter().map(|f| f.into()).collect(), + }) + .collect(); Ok(protobuf::PhysicalPlanNode { physical_plan_type: Some(PhysicalPlanType::ParquetScan( protobuf::ParquetScanExecNode { partitions, - schema: Some(exec.schema.as_ref().into()), + statistics: Some((&exec.statistics()).into()), + limit: exec + .limit() + .map(|l| protobuf::ScanLimit { limit: l as u32 }), + schema: Some(exec.schema().as_ref().into()), projection: exec .projection() .as_ref() @@ -290,19 +306,26 @@ impl TryInto for Arc { Ok(protobuf::PhysicalPlanNode { physical_plan_type: Some(PhysicalPlanType::AvroScan( protobuf::AvroScanExecNode { - path: exec.path().to_owned(), - filename: exec.filenames().to_vec(), + files: exec + .files() + .iter() + .map(|f| f.into()) + .collect::>(), + statistics: Some((&exec.statistics()).into()), + limit: exec + .limit() + .map(|l| protobuf::ScanLimit { limit: l as u32 }), projection: exec .projection() + .as_ref() .ok_or_else(|| { BallistaError::General( - "projection in AvroExec doesn't exist.".to_owned(), + "projection in AvroExec does not exist.".to_owned(), ) })? .iter() .map(|n| *n as u32) .collect(), - file_extension: exec.file_extension().to_owned(), schema: Some(exec.file_schema().as_ref().into()), batch_size: exec.batch_size() as u32, }, @@ -641,16 +664,6 @@ impl TryFrom> for protobuf::PhysicalExprNode { } } -impl From<&ParquetPartition> for protobuf::ParquetPartition { - fn from(p: &ParquetPartition) -> protobuf::ParquetPartition { - let files = p.file_partition.files.iter().map(|f| f.into()).collect(); - protobuf::ParquetPartition { - index: p.file_partition.index as u32, - files, - } - } -} - fn try_parse_when_then_expr( when_expr: &Arc, then_expr: &Arc, @@ -660,3 +673,44 @@ fn try_parse_when_then_expr( then_expr: Some(then_expr.clone().try_into()?), }) } + +impl From<&PartitionedFile> for protobuf::PartitionedFile { + fn from(pf: &PartitionedFile) -> protobuf::PartitionedFile { + protobuf::PartitionedFile { + path: pf.file_meta.path().to_owned(), + size: pf.file_meta.size(), + last_modified_ns: pf + .file_meta + .last_modified + .map(|ts| ts.timestamp_nanos() as u64) + .unwrap_or(0), + } + } +} + +impl From<&ColumnStatistics> for protobuf::ColumnStats { + fn from(cs: &ColumnStatistics) -> protobuf::ColumnStats { + protobuf::ColumnStats { + min_value: cs.min_value.as_ref().map(|m| m.try_into().unwrap()), + max_value: cs.max_value.as_ref().map(|m| m.try_into().unwrap()), + null_count: cs.null_count.map(|n| n as u32).unwrap_or(0), + distinct_count: cs.distinct_count.map(|n| n as u32).unwrap_or(0), + } + } +} + +impl From<&Statistics> for protobuf::Statistics { + fn from(s: &Statistics) -> protobuf::Statistics { + let none_value = -1_i64; + let column_stats = match &s.column_statistics { + None => vec![], + Some(column_stats) => column_stats.iter().map(|s| s.into()).collect(), + }; + protobuf::Statistics { + num_rows: s.num_rows.map(|n| n as i64).unwrap_or(none_value), + total_byte_size: s.total_byte_size.map(|n| n as i64).unwrap_or(none_value), + column_stats, + is_exact: s.is_exact, + } + } +} diff --git a/ballista/rust/core/src/utils.rs b/ballista/rust/core/src/utils.rs index fd12eb996785..80391b38355f 100644 --- a/ballista/rust/core/src/utils.rs +++ b/ballista/rust/core/src/utils.rs @@ -52,13 +52,12 @@ use datafusion::physical_optimizer::merge_exec::AddCoalescePartitionsExec; use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule; use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; -use datafusion::physical_plan::csv::CsvExec; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::expressions::{BinaryExpr, Column, Literal}; +use datafusion::physical_plan::file_format::{CsvExec, ParquetExec}; use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::hash_aggregate::HashAggregateExec; use datafusion::physical_plan::hash_join::HashJoinExec; -use datafusion::physical_plan::parquet::ParquetExec; use datafusion::physical_plan::projection::ProjectionExec; use datafusion::physical_plan::sort::SortExec; use datafusion::physical_plan::{ diff --git a/ballista/rust/scheduler/src/lib.rs b/ballista/rust/scheduler/src/lib.rs index 47caf4c21ede..107ea28ff68b 100644 --- a/ballista/rust/scheduler/src/lib.rs +++ b/ballista/rust/scheduler/src/lib.rs @@ -22,6 +22,13 @@ pub mod planner; #[cfg(feature = "sled")] mod standalone; pub mod state; + +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::file_format::FileFormat; +use datafusion::datasource::object_store::{local::LocalFileSystem, ObjectStore}; + +use futures::StreamExt; + #[cfg(feature = "sled")] pub use standalone::new_standalone_scheduler; @@ -40,10 +47,10 @@ use std::{fmt, net::IpAddr}; use ballista_core::serde::protobuf::{ execute_query_params::Query, executor_registration::OptionalHost, job_status, scheduler_grpc_server::SchedulerGrpc, task_status, ExecuteQueryParams, - ExecuteQueryResult, FailedJob, FilePartitionMetadata, FileType, - GetFileMetadataParams, GetFileMetadataResult, GetJobStatusParams, GetJobStatusResult, - JobStatus, PartitionId, PollWorkParams, PollWorkResult, QueuedJob, RunningJob, - TaskDefinition, TaskStatus, + ExecuteQueryResult, FailedJob, FileType, GetFileMetadataParams, + GetFileMetadataResult, GetJobStatusParams, GetJobStatusResult, JobStatus, + PartitionId, PollWorkParams, PollWorkResult, QueuedJob, RunningJob, TaskDefinition, + TaskStatus, }; use ballista_core::serde::scheduler::ExecutorMeta; @@ -82,7 +89,6 @@ use self::state::{ConfigBackendClient, SchedulerState}; use ballista_core::config::BallistaConfig; use ballista_core::execution_plans::ShuffleWriterExec; use ballista_core::serde::scheduler::to_proto::hash_partitioning_to_proto; -use datafusion::datasource::parquet::ParquetTableDescriptor; use datafusion::prelude::{ExecutionConfig, ExecutionContext}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; @@ -272,6 +278,10 @@ impl SchedulerGrpc for SchedulerServer { &self, request: Request, ) -> std::result::Result, tonic::Status> { + // TODO support multiple object stores + let obj_store = LocalFileSystem {}; + // TODO shouldn't this take a ListingOption object as input? + let GetFileMetadataParams { path, file_type } = request.into_inner(); let file_type: FileType = file_type.try_into().map_err(|e| { @@ -280,34 +290,34 @@ impl SchedulerGrpc for SchedulerServer { tonic::Status::internal(msg) })?; - match file_type { - FileType::Parquet => { - let parquet_desc = ParquetTableDescriptor::new(&path).map_err(|e| { - let msg = format!("Error opening parquet files: {}", e); - error!("{}", msg); - tonic::Status::internal(msg) - })?; - - let partitions = parquet_desc - .descriptor - .partition_files - .iter() - .map(|pf| FilePartitionMetadata { - filename: vec![pf.path.clone()], - }) - .collect(); - - //TODO include statistics and any other info needed to reconstruct ParquetExec - Ok(Response::new(GetFileMetadataResult { - schema: Some(parquet_desc.schema().as_ref().into()), - partitions, - })) - } + let file_format: Arc = match file_type { + FileType::Parquet => Ok(Arc::new(ParquetFormat::default())), //TODO implement for CSV _ => Err(tonic::Status::unimplemented( "get_file_metadata unsupported file type", )), - } + }?; + + let file_metas = obj_store.list_file(&path).await.map_err(|e| { + let msg = format!("Error listing files: {}", e); + error!("{}", msg); + tonic::Status::internal(msg) + })?; + + let obj_readers = file_metas.map(move |f| obj_store.file_reader(f?.sized_file)); + + let schema = file_format + .infer_schema(Box::pin(obj_readers)) + .await + .map_err(|e| { + let msg = format!("Error infering schema: {}", e); + error!("{}", msg); + tonic::Status::internal(msg) + })?; + + Ok(Response::new(GetFileMetadataResult { + schema: Some(schema.as_ref().into()), + })) } async fn execute_query( @@ -343,7 +353,7 @@ impl SchedulerGrpc for SchedulerServer { //TODO we can't just create a new context because we need a context that has // tables registered from previous SQL statements that have been executed let mut ctx = create_datafusion_context(&config); - let df = ctx.sql(&sql).map_err(|e| { + let df = ctx.sql(&sql).await.map_err(|e| { let msg = format!("Error parsing SQL: {}", e); error!("{}", msg); tonic::Status::internal(msg) diff --git a/ballista/rust/scheduler/src/planner.rs b/ballista/rust/scheduler/src/planner.rs index 3d5712ae8a67..3291a62abe64 100644 --- a/ballista/rust/scheduler/src/planner.rs +++ b/ballista/rust/scheduler/src/planner.rs @@ -271,15 +271,17 @@ mod test { #[tokio::test] async fn distributed_hash_aggregate_plan() -> Result<(), BallistaError> { - let mut ctx = datafusion_test_context("testdata")?; + let mut ctx = datafusion_test_context("testdata").await?; // simplified form of TPC-H query 1 - let df = ctx.sql( - "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price + let df = ctx + .sql( + "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price from lineitem group by l_returnflag order by l_returnflag", - )?; + ) + .await?; let plan = df.to_logical_plan(); let plan = ctx.optimize(&plan)?; @@ -356,11 +358,12 @@ mod test { #[tokio::test] async fn distributed_join_plan() -> Result<(), BallistaError> { - let mut ctx = datafusion_test_context("testdata")?; + let mut ctx = datafusion_test_context("testdata").await?; // simplified form of TPC-H query 12 - let df = ctx.sql( - "select + let df = ctx + .sql( + "select l_shipmode, sum(case when o_orderpriority = '1-URGENT' @@ -391,7 +394,8 @@ group by order by l_shipmode; ", - )?; + ) + .await?; let plan = df.to_logical_plan(); let plan = ctx.optimize(&plan)?; @@ -529,15 +533,17 @@ order by #[tokio::test] async fn roundtrip_serde_hash_aggregate() -> Result<(), BallistaError> { - let mut ctx = datafusion_test_context("testdata")?; + let mut ctx = datafusion_test_context("testdata").await?; // simplified form of TPC-H query 1 - let df = ctx.sql( - "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price + let df = ctx + .sql( + "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price from lineitem group by l_returnflag order by l_returnflag", - )?; + ) + .await?; let plan = df.to_logical_plan(); let plan = ctx.optimize(&plan)?; diff --git a/ballista/rust/scheduler/src/test_utils.rs b/ballista/rust/scheduler/src/test_utils.rs index d19730998ec4..b9d7ee42f48b 100644 --- a/ballista/rust/scheduler/src/test_utils.rs +++ b/ballista/rust/scheduler/src/test_utils.rs @@ -19,13 +19,13 @@ use ballista_core::error::Result; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::execution::context::{ExecutionConfig, ExecutionContext}; -use datafusion::physical_plan::csv::CsvReadOptions; +use datafusion::prelude::CsvReadOptions; pub const TPCH_TABLES: &[&str] = &[ "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region", ]; -pub fn datafusion_test_context(path: &str) -> Result { +pub async fn datafusion_test_context(path: &str) -> Result { let default_shuffle_partitions = 2; let config = ExecutionConfig::new().with_target_partitions(default_shuffle_partitions); @@ -38,7 +38,7 @@ pub fn datafusion_test_context(path: &str) -> Result { .has_header(false) .file_extension(".tbl"); let dir = format!("{}/{}", path, table); - ctx.register_csv(table, &dir, options)?; + ctx.register_csv(table, &dir, options).await?; } Ok(ctx) } diff --git a/benchmarks/src/bin/nyctaxi.rs b/benchmarks/src/bin/nyctaxi.rs index a88494fc8547..59fc69180368 100644 --- a/benchmarks/src/bin/nyctaxi.rs +++ b/benchmarks/src/bin/nyctaxi.rs @@ -29,7 +29,7 @@ use datafusion::error::Result; use datafusion::execution::context::{ExecutionConfig, ExecutionContext}; use datafusion::physical_plan::collect; -use datafusion::physical_plan::csv::CsvReadOptions; +use datafusion::prelude::CsvReadOptions; use structopt::StructOpt; #[cfg(feature = "snmalloc")] @@ -80,9 +80,9 @@ async fn main() -> Result<()> { "csv" => { let schema = nyctaxi_schema(); let options = CsvReadOptions::new().schema(&schema).has_header(true); - ctx.register_csv("tripdata", path, options)? + ctx.register_csv("tripdata", path, options).await? } - "parquet" => ctx.register_parquet("tripdata", path)?, + "parquet" => ctx.register_parquet("tripdata", path).await?, other => { println!("Invalid file format '{}'", other); process::exit(-1); diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 203c186e1ec3..bfe87efb9f74 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -28,11 +28,7 @@ use std::{ use ballista::context::BallistaContext; use ballista::prelude::{BallistaConfig, BALLISTA_DEFAULT_SHUFFLE_PARTITIONS}; -use datafusion::arrow::datatypes::{DataType, Field, Schema}; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::arrow::util::pretty; -use datafusion::datasource::parquet::ParquetTable; -use datafusion::datasource::{CsvFile, MemTable, TableProvider}; +use datafusion::datasource::{MemTable, TableProvider}; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_plan::LogicalPlan; use datafusion::parquet::basic::Compression; @@ -40,6 +36,20 @@ use datafusion::parquet::file::properties::WriterProperties; use datafusion::physical_plan::display::DisplayableExecutionPlan; use datafusion::physical_plan::{collect, displayable}; use datafusion::prelude::*; +use datafusion::{ + arrow::datatypes::{DataType, Field, Schema}, + datasource::file_format::{csv::CsvFormat, FileFormat}, +}; +use datafusion::{ + arrow::record_batch::RecordBatch, datasource::file_format::parquet::ParquetFormat, +}; +use datafusion::{ + arrow::util::pretty, + datasource::{ + listing::{ListingOptions, ListingTable}, + object_store::local::LocalFileSystem, + }, +}; use structopt::StructOpt; @@ -272,6 +282,7 @@ async fn benchmark_ballista(opt: BallistaBenchmarkOpt) -> Result<()> { .has_header(false) .file_extension(".tbl"); ctx.register_csv(table, &path, options) + .await .map_err(|e| DataFusionError::Plan(format!("{:?}", e)))?; } "csv" => { @@ -279,11 +290,13 @@ async fn benchmark_ballista(opt: BallistaBenchmarkOpt) -> Result<()> { let schema = get_schema(table); let options = CsvReadOptions::new().schema(&schema).has_header(true); ctx.register_csv(table, &path, options) + .await .map_err(|e| DataFusionError::Plan(format!("{:?}", e)))?; } "parquet" => { let path = format!("{}/{}", path, table); ctx.register_parquet(table, &path) + .await .map_err(|e| DataFusionError::Plan(format!("{:?}", e)))?; } other => { @@ -301,6 +314,7 @@ async fn benchmark_ballista(opt: BallistaBenchmarkOpt) -> Result<()> { let start = Instant::now(); let df = ctx .sql(&sql) + .await .map_err(|e| DataFusionError::Plan(format!("{:?}", e)))?; let batches = df .collect() @@ -384,7 +398,7 @@ async fn convert_tbl(opt: ConvertOpt) -> Result<()> { let mut ctx = ExecutionContext::with_config(config); // build plan to read the TBL file - let mut csv = ctx.read_csv(&input_path, options)?; + let mut csv = ctx.read_csv(&input_path, options).await?; // optionally, repartition the file if opt.partitions > 1 { @@ -445,40 +459,52 @@ fn get_table( table_format: &str, target_partitions: usize, ) -> Result> { - match table_format { - // dbgen creates .tbl ('|' delimited) files without header - "tbl" => { - let path = format!("{}/{}.tbl", path, table); - let schema = get_schema(table); - let options = CsvReadOptions::new() - .schema(&schema) - .delimiter(b'|') - .has_header(false) - .file_extension(".tbl"); + let (format, path, extension): (Arc, String, &'static str) = + match table_format { + // dbgen creates .tbl ('|' delimited) files without header + "tbl" => { + let path = format!("{}/{}.tbl", path, table); - Ok(Arc::new(CsvFile::try_new(&path, options)?)) - } - "csv" => { - let path = format!("{}/{}", path, table); - let schema = get_schema(table); - let options = CsvReadOptions::new().schema(&schema).has_header(true); + let format = CsvFormat::default() + .with_delimiter(b'|') + .with_has_header(false); - Ok(Arc::new(CsvFile::try_new(&path, options)?)) - } - "parquet" => { - let path = format!("{}/{}", path, table); - let schema = get_schema(table); - Ok(Arc::new(ParquetTable::try_new_with_schema( - &path, - schema, - target_partitions, - false, - )?)) - } - other => { - unimplemented!("Invalid file format '{}'", other); - } - } + (Arc::new(format), path, ".tbl") + } + "csv" => { + let path = format!("{}/{}", path, table); + let format = CsvFormat::default() + .with_delimiter(b',') + .with_has_header(true); + + (Arc::new(format), path, ".csv") + } + "parquet" => { + let path = format!("{}/{}", path, table); + let format = ParquetFormat::default().with_enable_pruning(true); + + (Arc::new(format), path, ".parquet") + } + other => { + unimplemented!("Invalid file format '{}'", other); + } + }; + let schema = Arc::new(get_schema(table)); + + let options = ListingOptions { + format, + file_extension: extension.to_owned(), + target_partitions, + collect_stat: true, + partitions: vec![], + }; + + Ok(Arc::new(ListingTable::new( + Arc::new(LocalFileSystem {}), + path, + schema, + options, + ))) } fn get_schema(table: &str) -> Schema { @@ -1002,7 +1028,9 @@ mod tests { .schema(&schema) .delimiter(b'|') .file_extension(".out"); - let df = ctx.read_csv(&format!("{}/answers/q{}.out", path, n), options)?; + let df = ctx + .read_csv(&format!("{}/answers/q{}.out", path, n), options) + .await?; let df = df.select( get_answer_schema(n) .fields() @@ -1081,10 +1109,13 @@ mod tests { .delimiter(b'|') .has_header(false) .file_extension(".tbl"); - let provider = CsvFile::try_new( - &format!("{}/{}.tbl", tpch_data_path, table), - options, - )?; + let listing_options = options.to_listing_options(1); + let provider = ListingTable::new( + Arc::new(LocalFileSystem {}), + format!("{}/{}.tbl", tpch_data_path, table), + Arc::new(schema), + listing_options, + ); ctx.register_table(table, Arc::new(provider))?; } @@ -1105,7 +1136,7 @@ mod tests { assert_eq!( format!("{:?}", plan), format!("{:?}", round_trip), - "opitmized logical plan round trip failed" + "optimized logical plan round trip failed" ); // test physical plan roundtrip diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 4a4588812c24..481448596c23 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -285,8 +285,8 @@ async fn exec_and_print( let now = Instant::now(); let df = match ctx { - Context::Local(datafusion) => datafusion.sql(&sql)?, - Context::Remote(ballista) => ballista.sql(&sql)?, + Context::Local(datafusion) => datafusion.sql(&sql).await?, + Context::Remote(ballista) => ballista.sql(&sql).await?, }; let results = df.collect().await?; diff --git a/datafusion-examples/examples/avro_sql.rs b/datafusion-examples/examples/avro_sql.rs index e9676a05b1fc..f08c12bbb73a 100644 --- a/datafusion-examples/examples/avro_sql.rs +++ b/datafusion-examples/examples/avro_sql.rs @@ -18,7 +18,6 @@ use datafusion::arrow::util::pretty; use datafusion::error::Result; -use datafusion::physical_plan::avro::AvroReadOptions; use datafusion::prelude::*; /// This example demonstrates executing a simple query against an Arrow data source (Avro) and @@ -32,14 +31,17 @@ async fn main() -> Result<()> { // register avro file with the execution context let avro_file = &format!("{}/avro/alltypes_plain.avro", testdata); - ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default())?; + ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default()) + .await?; // execute the query - let df = ctx.sql( - "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ + let df = ctx + .sql( + "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ FROM alltypes_plain \ WHERE id > 1 AND tinyint_col < double_col", - )?; + ) + .await?; let results = df.collect().await?; // print the results diff --git a/datafusion-examples/examples/csv_sql.rs b/datafusion-examples/examples/csv_sql.rs index a1cdf5d6e0ab..5ad9bd7d4385 100644 --- a/datafusion-examples/examples/csv_sql.rs +++ b/datafusion-examples/examples/csv_sql.rs @@ -32,15 +32,18 @@ async fn main() -> Result<()> { "aggregate_test_100", &format!("{}/csv/aggregate_test_100.csv", testdata), CsvReadOptions::new(), - )?; + ) + .await?; // execute the query - let df = ctx.sql( - "SELECT c1, MIN(c12), MAX(c12) \ + let df = ctx + .sql( + "SELECT c1, MIN(c12), MAX(c12) \ FROM aggregate_test_100 \ WHERE c11 > 0.1 AND c11 < 0.9 \ GROUP BY c1", - )?; + ) + .await?; // print the results df.show().await?; diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index 013f3224ae59..6fd34610ba5c 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -31,7 +31,8 @@ async fn main() -> Result<()> { // define the query using the DataFrame trait let df = ctx - .read_parquet(filename)? + .read_parquet(filename) + .await? .select_columns(&["id", "bool_col", "timestamp_col"])? .filter(col("id").gt(lit(1)))?; diff --git a/datafusion-examples/examples/flight_server.rs b/datafusion-examples/examples/flight_server.rs index 138434ea2482..c26dcce59f69 100644 --- a/datafusion-examples/examples/flight_server.rs +++ b/datafusion-examples/examples/flight_server.rs @@ -16,14 +16,16 @@ // under the License. use std::pin::Pin; +use std::sync::Arc; use arrow_flight::SchemaAsIpc; +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::listing::ListingOptions; +use datafusion::datasource::object_store::local::LocalFileSystem; use futures::Stream; use tonic::transport::Server; use tonic::{Request, Response, Status, Streaming}; -use datafusion::datasource::parquet::ParquetTable; -use datafusion::datasource::TableProvider; use datafusion::prelude::*; use arrow_flight::{ @@ -65,10 +67,15 @@ impl FlightService for FlightServiceImpl { ) -> Result, Status> { let request = request.into_inner(); - let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap(); + let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + + let schema = listing_options + .infer_schema(Arc::new(LocalFileSystem {}), &request.path[0]) + .await + .unwrap(); let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default(); - let schema_result = SchemaAsIpc::new(table.schema().as_ref(), &options).into(); + let schema_result = SchemaAsIpc::new(&schema, &options).into(); Ok(Response::new(schema_result)) } @@ -92,10 +99,11 @@ impl FlightService for FlightServiceImpl { "alltypes_plain", &format!("{}/alltypes_plain.parquet", testdata), ) + .await .map_err(to_tonic_err)?; // create the DataFrame - let df = ctx.sql(sql).map_err(to_tonic_err)?; + let df = ctx.sql(sql).await.map_err(to_tonic_err)?; // execute the query let results = df.collect().await.map_err(to_tonic_err)?; diff --git a/datafusion-examples/examples/parquet_sql.rs b/datafusion-examples/examples/parquet_sql.rs index 2f3ce916f4bf..e74ed39c68ce 100644 --- a/datafusion-examples/examples/parquet_sql.rs +++ b/datafusion-examples/examples/parquet_sql.rs @@ -31,14 +31,17 @@ async fn main() -> Result<()> { ctx.register_parquet( "alltypes_plain", &format!("{}/alltypes_plain.parquet", testdata), - )?; + ) + .await?; // execute the query - let df = ctx.sql( - "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ + let df = ctx + .sql( + "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ FROM alltypes_plain \ WHERE id > 1 AND tinyint_col < double_col", - )?; + ) + .await?; // print the results df.show().await?; diff --git a/datafusion/benches/aggregate_query_sql.rs b/datafusion/benches/aggregate_query_sql.rs index b8fe06fd9145..dc40c61db41d 100644 --- a/datafusion/benches/aggregate_query_sql.rs +++ b/datafusion/benches/aggregate_query_sql.rs @@ -30,7 +30,7 @@ use tokio::runtime::Runtime; fn query(ctx: Arc>, sql: &str) { let rt = Runtime::new().unwrap(); - let df = ctx.lock().unwrap().sql(sql).unwrap(); + let df = rt.block_on(ctx.lock().unwrap().sql(sql)).unwrap(); criterion::black_box(rt.block_on(df.collect()).unwrap()); } diff --git a/datafusion/benches/filter_query_sql.rs b/datafusion/benches/filter_query_sql.rs index aac7f9624872..c64c52126b0d 100644 --- a/datafusion/benches/filter_query_sql.rs +++ b/datafusion/benches/filter_query_sql.rs @@ -31,7 +31,7 @@ async fn query(ctx: &mut ExecutionContext, sql: &str) { let rt = Runtime::new().unwrap(); // execute the query - let df = ctx.sql(sql).unwrap(); + let df = rt.block_on(ctx.sql(sql)).unwrap(); criterion::black_box(rt.block_on(df.collect()).unwrap()); } diff --git a/datafusion/benches/math_query_sql.rs b/datafusion/benches/math_query_sql.rs index 51e52e8acddb..4f738890460f 100644 --- a/datafusion/benches/math_query_sql.rs +++ b/datafusion/benches/math_query_sql.rs @@ -40,7 +40,7 @@ fn query(ctx: Arc>, sql: &str) { let rt = Runtime::new().unwrap(); // execute the query - let df = ctx.lock().unwrap().sql(sql).unwrap(); + let df = rt.block_on(ctx.lock().unwrap().sql(sql)).unwrap(); rt.block_on(df.collect()).unwrap(); } diff --git a/datafusion/benches/sort_limit_query_sql.rs b/datafusion/benches/sort_limit_query_sql.rs index 195bd5cf15c2..f3151d2d7140 100644 --- a/datafusion/benches/sort_limit_query_sql.rs +++ b/datafusion/benches/sort_limit_query_sql.rs @@ -18,6 +18,9 @@ #[macro_use] extern crate criterion; use criterion::Criterion; +use datafusion::datasource::file_format::csv::CsvFormat; +use datafusion::datasource::listing::{ListingOptions, ListingTable}; +use datafusion::datasource::object_store::local::LocalFileSystem; use std::sync::{Arc, Mutex}; @@ -26,7 +29,7 @@ extern crate datafusion; use arrow::datatypes::{DataType, Field, Schema}; -use datafusion::datasource::{CsvFile, CsvReadOptions, MemTable}; +use datafusion::datasource::MemTable; use datafusion::execution::context::ExecutionContext; use tokio::runtime::Runtime; @@ -35,7 +38,7 @@ fn query(ctx: Arc>, sql: &str) { let rt = Runtime::new().unwrap(); // execute the query - let df = ctx.lock().unwrap().sql(sql).unwrap(); + let df = rt.block_on(ctx.lock().unwrap().sql(sql)).unwrap(); rt.block_on(df.collect()).unwrap(); } @@ -60,11 +63,13 @@ fn create_context() -> Arc> { let testdata = datafusion::test_util::arrow_test_data(); // create CSV data source - let csv = CsvFile::try_new( - &format!("{}/csv/aggregate_test_100.csv", testdata), - CsvReadOptions::new().schema(&schema), - ) - .unwrap(); + let listing_options = ListingOptions::new(Arc::new(CsvFormat::default())); + let csv = ListingTable::new( + Arc::new(LocalFileSystem {}), + format!("{}/csv/aggregate_test_100.csv", testdata), + schema, + listing_options, + ); let rt = Runtime::new().unwrap(); diff --git a/datafusion/benches/window_query_sql.rs b/datafusion/benches/window_query_sql.rs index 7c323be2b5ed..bca4a38360fe 100644 --- a/datafusion/benches/window_query_sql.rs +++ b/datafusion/benches/window_query_sql.rs @@ -30,7 +30,7 @@ use tokio::runtime::Runtime; fn query(ctx: Arc>, sql: &str) { let rt = Runtime::new().unwrap(); - let df = ctx.lock().unwrap().sql(sql).unwrap(); + let df = rt.block_on(ctx.lock().unwrap().sql(sql)).unwrap(); criterion::black_box(rt.block_on(df.collect()).unwrap()); } diff --git a/datafusion/src/avro_to_arrow/mod.rs b/datafusion/src/avro_to_arrow/mod.rs index 531b1092e1d6..f30fbdcc0cec 100644 --- a/datafusion/src/avro_to_arrow/mod.rs +++ b/datafusion/src/avro_to_arrow/mod.rs @@ -28,11 +28,11 @@ use crate::arrow::datatypes::Schema; use crate::error::Result; #[cfg(feature = "avro")] pub use reader::{Reader, ReaderBuilder}; -use std::io::{Read, Seek}; +use std::io::Read; #[cfg(feature = "avro")] /// Read Avro schema given a reader -pub fn read_avro_schema_from_reader(reader: &mut R) -> Result { +pub fn read_avro_schema_from_reader(reader: &mut R) -> Result { let avro_reader = avro_rs::Reader::new(reader)?; let schema = avro_reader.writer_schema(); schema::to_arrow_schema(schema) @@ -40,7 +40,7 @@ pub fn read_avro_schema_from_reader(reader: &mut R) -> Result(_: &mut R) -> Result { +pub fn read_avro_schema_from_reader(_: &mut R) -> Result { Err(crate::error::DataFusionError::NotImplemented( "cannot read avro schema without the 'avro' feature enabled".to_string(), )) diff --git a/datafusion/src/dataframe.rs b/datafusion/src/dataframe.rs index 5b157d0ac149..4bfd7206eb3e 100644 --- a/datafusion/src/dataframe.rs +++ b/datafusion/src/dataframe.rs @@ -41,9 +41,10 @@ use async_trait::async_trait; /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// # #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); -/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; +/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.filter(col("a").lt_eq(col("b")))? /// .aggregate(vec![col("a")], vec![min(col("b"))])? /// .limit(100)?; @@ -59,9 +60,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.select_columns(&["a", "b"])?; /// # Ok(()) /// # } @@ -73,9 +75,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.select(vec![col("a") * col("b"), col("c")])?; /// # Ok(()) /// # } @@ -87,9 +90,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.filter(col("a").lt_eq(col("b")))?; /// # Ok(()) /// # } @@ -101,9 +105,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a" /// let _ = df.aggregate(vec![col("a")], vec![min(col("b"))])?; @@ -124,9 +129,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.limit(100)?; /// # Ok(()) /// # } @@ -138,9 +144,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.union(df.clone())?; /// # Ok(()) /// # } @@ -152,9 +159,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.union(df.clone())?; /// let df = df.distinct()?; /// # Ok(()) @@ -168,9 +176,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?; /// # Ok(()) /// # } @@ -185,8 +194,8 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; - /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new())? + /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; + /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await? /// .select(vec![ /// col("a").alias("a2"), /// col("b").alias("b2"), @@ -211,9 +220,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?; /// # Ok(()) /// # } @@ -231,7 +241,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let batches = df.collect().await?; /// # Ok(()) /// # } @@ -246,7 +256,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// df.show().await?; /// # Ok(()) /// # } @@ -261,7 +271,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// df.show_limit(10).await?; /// # Ok(()) /// # } @@ -276,7 +286,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let stream = df.execute_stream().await?; /// # Ok(()) /// # } @@ -292,7 +302,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let batches = df.collect_partitioned().await?; /// # Ok(()) /// # } @@ -307,7 +317,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let batches = df.execute_stream_partitioned().await?; /// # Ok(()) /// # } @@ -320,9 +330,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let schema = df.schema(); /// # Ok(()) /// # } @@ -342,7 +353,7 @@ pub trait DataFrame: Send + Sync { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let batches = df.limit(100)?.explain(false, false)?.collect().await?; /// # Ok(()) /// # } @@ -354,9 +365,10 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// # #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); - /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; + /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let f = df.registry(); /// // use f.udf("name", vec![...]) to use the udf /// # Ok(()) diff --git a/datafusion/src/datasource/avro.rs b/datafusion/src/datasource/avro.rs deleted file mode 100644 index ee5cea51d991..000000000000 --- a/datafusion/src/datasource/avro.rs +++ /dev/null @@ -1,426 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Line-delimited Avro data source -//! -//! This data source allows Line-delimited Avro records or files to be used as input for queries. -//! - -use std::{ - any::Any, - io::{Read, Seek}, - sync::{Arc, Mutex}, -}; - -use arrow::datatypes::SchemaRef; -use async_trait::async_trait; - -use crate::physical_plan::avro::{AvroExec, AvroReadOptions}; -use crate::{ - datasource::{Source, TableProvider}, - error::{DataFusionError, Result}, - physical_plan::{common, ExecutionPlan}, -}; - -trait SeekRead: Read + Seek {} - -impl SeekRead for T {} - -/// Represents a line-delimited Avro file with a provided schema -pub struct AvroFile { - source: Source>, - schema: SchemaRef, - file_extension: String, -} - -impl AvroFile { - /// Attempt to initialize a `AvroFile` from a path. The schema can be read automatically. - pub fn try_new(path: &str, options: AvroReadOptions) -> Result { - let schema = if let Some(schema) = options.schema { - schema - } else { - let filenames = - common::build_checked_file_list(path, options.file_extension)?; - Arc::new(AvroExec::try_read_schema(&filenames)?) - }; - - Ok(Self { - source: Source::Path(path.to_string()), - schema, - file_extension: options.file_extension.to_string(), - }) - } - - /// Attempt to initialize a `AvroFile` from a reader. The schema MUST be provided in options - pub fn try_new_from_reader( - reader: R, - options: AvroReadOptions, - ) -> Result { - let schema = match options.schema { - Some(s) => s, - None => { - return Err(DataFusionError::Execution( - "Schema must be provided to CsvRead".to_string(), - )); - } - }; - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - file_extension: String::new(), - }) - } - - /// Attempt to initialize an AvroFile from a reader impls Seek. The schema can be read automatically. - pub fn try_new_from_reader_schema( - mut reader: R, - options: AvroReadOptions, - ) -> Result { - let schema = { - if let Some(schema) = options.schema { - schema - } else { - Arc::new(crate::avro_to_arrow::read_avro_schema_from_reader( - &mut reader, - )?) - } - }; - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - file_extension: String::new(), - }) - } - - /// Get the path for Avro file(s) represented by this AvroFile instance - pub fn path(&self) -> &str { - match &self.source { - Source::Reader(_) => "", - Source::Path(path) => path, - } - } - - /// Get the file extension for the Avro file(s) represented by this AvroFile instance - pub fn file_extension(&self) -> &str { - &self.file_extension - } -} - -#[async_trait] -impl TableProvider for AvroFile { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - async fn scan( - &self, - projection: &Option>, - batch_size: usize, - _filters: &[crate::logical_plan::Expr], - limit: Option, - ) -> Result> { - let opts = AvroReadOptions { - schema: Some(self.schema.clone()), - file_extension: self.file_extension.as_str(), - }; - let batch_size = limit - .map(|l| std::cmp::min(l, batch_size)) - .unwrap_or(batch_size); - - let exec = match &self.source { - Source::Reader(maybe_reader) => { - if let Some(rdr) = maybe_reader.lock().unwrap().take() { - AvroExec::try_new_from_reader( - rdr, - opts, - projection.clone(), - batch_size, - limit, - )? - } else { - return Err(DataFusionError::Execution( - "You can only read once if the data comes from a reader" - .to_string(), - )); - } - } - Source::Path(p) => { - AvroExec::try_from_path(p, opts, projection.clone(), batch_size, limit)? - } - }; - Ok(Arc::new(exec)) - } -} - -#[cfg(test)] -#[cfg(feature = "avro")] -mod tests { - use arrow::array::{ - BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array, - TimestampMicrosecondArray, - }; - use arrow::record_batch::RecordBatch; - use futures::StreamExt; - - use super::*; - - #[tokio::test] - async fn read_small_batches() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = None; - let exec = table.scan(&projection, 2, &[], None).await?; - let stream = exec.execute(0).await?; - - let _ = stream - .map(|batch| { - let batch = batch.unwrap(); - assert_eq!(11, batch.num_columns()); - assert_eq!(2, batch.num_rows()); - }) - .fold(0, |acc, _| async move { acc + 1i32 }) - .await; - - Ok(()) - } - - #[cfg(feature = "avro")] - #[tokio::test] - async fn read_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - - let x: Vec = table - .schema() - .fields() - .iter() - .map(|f| format!("{}: {:?}", f.name(), f.data_type())) - .collect(); - let y = x.join("\n"); - assert_eq!( - "id: Int32\n\ - bool_col: Boolean\n\ - tinyint_col: Int32\n\ - smallint_col: Int32\n\ - int_col: Int32\n\ - bigint_col: Int64\n\ - float_col: Float32\n\ - double_col: Float64\n\ - date_string_col: Binary\n\ - string_col: Binary\n\ - timestamp_col: Timestamp(Microsecond, None)", - y - ); - - let projection = None; - let batch = get_first_batch(table, &projection).await?; - let expected = vec![ - "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", - "| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |", - "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", - "| 4 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30332f30312f3039 | 30 | 2009-03-01 00:00:00 |", - "| 5 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30332f30312f3039 | 31 | 2009-03-01 00:01:00 |", - "| 6 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30342f30312f3039 | 30 | 2009-04-01 00:00:00 |", - "| 7 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30342f30312f3039 | 31 | 2009-04-01 00:01:00 |", - "| 2 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30322f30312f3039 | 30 | 2009-02-01 00:00:00 |", - "| 3 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30322f30312f3039 | 31 | 2009-02-01 00:01:00 |", - "| 0 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30312f30312f3039 | 30 | 2009-01-01 00:00:00 |", - "| 1 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30312f30312f3039 | 31 | 2009-01-01 00:01:00 |", - "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", - ]; - - crate::assert_batches_eq!(expected, &[batch]); - Ok(()) - } - - #[tokio::test] - async fn read_bool_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![1]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[true, false, true, false, true, false, true, false]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_i32_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![0]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values)); - - Ok(()) - } - - #[tokio::test] - async fn read_i96_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![10]); - let batch = get_first_batch(table, &projection).await?; - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!("[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]", format!("{:?}", values)); - - Ok(()) - } - - #[tokio::test] - async fn read_f32_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![6]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_f64_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![7]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_binary_alltypes_plain_avro() -> Result<()> { - let table = load_table("alltypes_plain.avro")?; - let projection = Some(vec![9]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec<&str> = vec![]; - for i in 0..batch.num_rows() { - values.push(std::str::from_utf8(array.value(i)).unwrap()); - } - - assert_eq!( - "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]", - format!("{:?}", values) - ); - - Ok(()) - } - - fn load_table(name: &str) -> Result> { - let testdata = crate::test_util::arrow_test_data(); - let filename = format!("{}/avro/{}", testdata, name); - let table = AvroFile::try_new(&filename, AvroReadOptions::default())?; - Ok(Arc::new(table)) - } - - async fn get_first_batch( - table: Arc, - projection: &Option>, - ) -> Result { - let exec = table.scan(projection, 1024, &[], None).await?; - let mut it = exec.execute(0).await?; - it.next() - .await - .expect("should have received at least one batch") - .map_err(|e| e.into()) - } -} diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs deleted file mode 100644 index d47312e8b745..000000000000 --- a/datafusion/src/datasource/csv.rs +++ /dev/null @@ -1,245 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! CSV data source -//! -//! This CSV data source allows CSV files to be used as input for queries. -//! -//! Example: -//! -//! ``` -//! use datafusion::datasource::TableProvider; -//! use datafusion::datasource::csv::{CsvFile, CsvReadOptions}; -//! -//! let testdata = datafusion::test_util::arrow_test_data(); -//! let csvdata = CsvFile::try_new( -//! &format!("{}/csv/aggregate_test_100.csv", testdata), -//! CsvReadOptions::new().delimiter(b'|'), -//! ).unwrap(); -//! let schema = csvdata.schema(); -//! ``` - -use arrow::datatypes::SchemaRef; -use async_trait::async_trait; -use std::any::Any; -use std::io::{Read, Seek}; -use std::string::String; -use std::sync::{Arc, Mutex}; - -use crate::datasource::{Source, TableProvider}; -use crate::error::{DataFusionError, Result}; -use crate::logical_plan::Expr; -use crate::physical_plan::csv::CsvExec; -pub use crate::physical_plan::csv::CsvReadOptions; -use crate::physical_plan::{common, ExecutionPlan}; - -/// Represents a CSV file with a provided schema -pub struct CsvFile { - source: Source, - schema: SchemaRef, - has_header: bool, - delimiter: u8, - file_extension: String, -} - -impl CsvFile { - /// Attempt to initialize a new `CsvFile` from a file path - pub fn try_new(path: impl Into, options: CsvReadOptions) -> Result { - let path = path.into(); - let schema = Arc::new(match options.schema { - Some(s) => s.clone(), - None => { - let filenames = common::build_file_list(&path, options.file_extension)?; - if filenames.is_empty() { - return Err(DataFusionError::Plan(format!( - "No files found at {path} with file extension {file_extension}", - path = path, - file_extension = options.file_extension - ))); - } - CsvExec::try_infer_schema(&filenames, &options)? - } - }); - - Ok(Self { - source: Source::Path(path), - schema, - has_header: options.has_header, - delimiter: options.delimiter, - file_extension: String::from(options.file_extension), - }) - } - - /// Attempt to initialize a `CsvFile` from a reader. The schema MUST be provided in options. - pub fn try_new_from_reader( - reader: R, - options: CsvReadOptions, - ) -> Result { - let schema = Arc::new(match options.schema { - Some(s) => s.clone(), - None => { - return Err(DataFusionError::Execution( - "Schema must be provided to CsvRead".to_string(), - )); - } - }); - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - has_header: options.has_header, - delimiter: options.delimiter, - file_extension: String::new(), - }) - } - - /// Attempt to initialize a `CsvRead` from a reader impls `Seek`. The schema can be inferred automatically. - pub fn try_new_from_reader_infer_schema( - mut reader: R, - options: CsvReadOptions, - ) -> Result { - let schema = Arc::new(match options.schema { - Some(s) => s.clone(), - None => { - let (schema, _) = arrow::csv::reader::infer_file_schema( - &mut reader, - options.delimiter, - Some(options.schema_infer_max_records), - options.has_header, - )?; - schema - } - }); - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - has_header: options.has_header, - delimiter: options.delimiter, - file_extension: String::new(), - }) - } - - /// Get the path for the CSV file(s) represented by this CsvFile instance - pub fn path(&self) -> &str { - match &self.source { - Source::Reader(_) => "", - Source::Path(path) => path, - } - } - - /// Determine whether the CSV file(s) represented by this CsvFile instance have a header row - pub fn has_header(&self) -> bool { - self.has_header - } - - /// Get the delimiter for the CSV file(s) represented by this CsvFile instance - pub fn delimiter(&self) -> u8 { - self.delimiter - } - - /// Get the file extension for the CSV file(s) represented by this CsvFile instance - pub fn file_extension(&self) -> &str { - &self.file_extension - } -} - -#[async_trait] -impl TableProvider for CsvFile { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - async fn scan( - &self, - projection: &Option>, - batch_size: usize, - _filters: &[Expr], - limit: Option, - ) -> Result> { - let opts = CsvReadOptions::new() - .schema(&self.schema) - .has_header(self.has_header) - .delimiter(self.delimiter) - .file_extension(self.file_extension.as_str()); - let batch_size = limit - .map(|l| std::cmp::min(l, batch_size)) - .unwrap_or(batch_size); - - let exec = match &self.source { - Source::Reader(maybe_reader) => { - if let Some(rdr) = maybe_reader.lock().unwrap().take() { - CsvExec::try_new_from_reader( - rdr, - opts, - projection.clone(), - batch_size, - limit, - )? - } else { - return Err(DataFusionError::Execution( - "You can only read once if the data comes from a reader" - .to_string(), - )); - } - } - Source::Path(p) => { - CsvExec::try_new(p, opts, projection.clone(), batch_size, limit)? - } - }; - Ok(Arc::new(exec)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::prelude::*; - - #[tokio::test] - async fn csv_file_from_reader() -> Result<()> { - let testdata = crate::test_util::arrow_test_data(); - let filename = "aggregate_test_100.csv"; - let path = format!("{}/csv/{}", testdata, filename); - let buf = std::fs::read(path).unwrap(); - let rdr = std::io::Cursor::new(buf); - let mut ctx = ExecutionContext::new(); - ctx.register_table( - "aggregate_test", - Arc::new(CsvFile::try_new_from_reader_infer_schema( - rdr, - CsvReadOptions::new(), - )?), - )?; - let df = ctx.sql("select max(c2) from aggregate_test")?; - let batches = df.collect().await?; - assert_eq!( - batches[0] - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0), - 5 - ); - Ok(()) - } -} diff --git a/datafusion/src/datasource/file_format/avro.rs b/datafusion/src/datasource/file_format/avro.rs new file mode 100644 index 000000000000..7728747b8015 --- /dev/null +++ b/datafusion/src/datasource/file_format/avro.rs @@ -0,0 +1,403 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Apache Avro format abstractions + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::Schema; +use arrow::{self, datatypes::SchemaRef}; +use async_trait::async_trait; +use futures::StreamExt; + +use super::{FileFormat, PhysicalPlanConfig}; +use crate::avro_to_arrow::read_avro_schema_from_reader; +use crate::datasource::object_store::{ObjectReader, ObjectReaderStream}; +use crate::error::Result; +use crate::physical_plan::file_format::AvroExec; +use crate::physical_plan::ExecutionPlan; +use crate::physical_plan::Statistics; + +/// Avro `FileFormat` implementation. +#[derive(Default, Debug)] +pub struct AvroFormat; + +#[async_trait] +impl FileFormat for AvroFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema(&self, mut readers: ObjectReaderStream) -> Result { + let mut schemas = vec![]; + while let Some(obj_reader) = readers.next().await { + let mut reader = obj_reader?.sync_reader()?; + let schema = read_avro_schema_from_reader(&mut reader)?; + schemas.push(schema); + } + let merged_schema = Schema::try_merge(schemas)?; + Ok(Arc::new(merged_schema)) + } + + async fn infer_stats(&self, _reader: Arc) -> Result { + Ok(Statistics::default()) + } + + async fn create_physical_plan( + &self, + conf: PhysicalPlanConfig, + ) -> Result> { + let exec = AvroExec::new( + conf.object_store, + // flattening this for now because CsvExec does not support partitioning yet + conf.files.into_iter().flatten().collect::>(), + conf.statistics, + conf.schema, + conf.projection, + conf.batch_size, + conf.limit, + ); + Ok(Arc::new(exec)) + } +} + +#[cfg(test)] +#[cfg(feature = "avro")] +mod tests { + use crate::{ + datasource::{ + object_store::local::{ + local_file_meta, local_object_reader, local_object_reader_stream, + LocalFileSystem, + }, + PartitionedFile, + }, + physical_plan::collect, + }; + + use super::*; + use arrow::array::{ + BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array, + TimestampMicrosecondArray, + }; + use futures::StreamExt; + + #[tokio::test] + async fn read_small_batches() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.avro", &projection, 2, None).await?; + let stream = exec.execute(0).await?; + + let tt_batches = stream + .map(|batch| { + let batch = batch.unwrap(); + assert_eq!(11, batch.num_columns()); + assert_eq!(2, batch.num_rows()); + }) + .fold(0, |acc, _| async move { acc + 1i32 }) + .await; + + assert_eq!(tt_batches, 4 /* 8/2 */); + + Ok(()) + } + + #[tokio::test] + async fn read_limit() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.avro", &projection, 1024, Some(1)).await?; + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(11, batches[0].num_columns()); + assert_eq!(1, batches[0].num_rows()); + + Ok(()) + } + + #[tokio::test] + async fn read_alltypes_plain_avro() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let x: Vec = exec + .schema() + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect(); + assert_eq!( + vec![ + "id: Int32", + "bool_col: Boolean", + "tinyint_col: Int32", + "smallint_col: Int32", + "int_col: Int32", + "bigint_col: Int64", + "float_col: Float32", + "double_col: Float64", + "date_string_col: Binary", + "string_col: Binary", + "timestamp_col: Timestamp(Microsecond, None)", + ], + x + ); + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + + let expected = vec![ + "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", + "| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col | string_col | timestamp_col |", + "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", + "| 4 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30332f30312f3039 | 30 | 2009-03-01 00:00:00 |", + "| 5 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30332f30312f3039 | 31 | 2009-03-01 00:01:00 |", + "| 6 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30342f30312f3039 | 30 | 2009-04-01 00:00:00 |", + "| 7 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30342f30312f3039 | 31 | 2009-04-01 00:01:00 |", + "| 2 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30322f30312f3039 | 30 | 2009-02-01 00:00:00 |", + "| 3 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30322f30312f3039 | 31 | 2009-02-01 00:01:00 |", + "| 0 | true | 0 | 0 | 0 | 0 | 0 | 0 | 30312f30312f3039 | 30 | 2009-01-01 00:00:00 |", + "| 1 | false | 1 | 1 | 1 | 10 | 1.1 | 10.1 | 30312f30312f3039 | 31 | 2009-01-01 00:01:00 |", + "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+", + ]; + + crate::assert_batches_eq!(expected, &batches); + Ok(()) + } + + #[tokio::test] + async fn read_bool_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![1]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[true, false, true, false, true, false, true, false]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_i32_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![0]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values)); + + Ok(()) + } + + #[tokio::test] + async fn read_i96_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![10]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!("[1235865600000000, 1235865660000000, 1238544000000000, 1238544060000000, 1233446400000000, 1233446460000000, 1230768000000000, 1230768060000000]", format!("{:?}", values)); + + Ok(()) + } + + #[tokio::test] + async fn read_f32_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![6]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_f64_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![7]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_binary_alltypes_plain_avro() -> Result<()> { + let projection = Some(vec![9]); + let exec = get_exec("alltypes_plain.avro", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(batches.len(), 1); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec<&str> = vec![]; + for i in 0..batches[0].num_rows() { + values.push(std::str::from_utf8(array.value(i)).unwrap()); + } + + assert_eq!( + "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]", + format!("{:?}", values) + ); + + Ok(()) + } + + async fn get_exec( + file_name: &str, + projection: &Option>, + batch_size: usize, + limit: Option, + ) -> Result> { + let testdata = crate::test_util::arrow_test_data(); + let filename = format!("{}/avro/{}", testdata, file_name); + let format = AvroFormat {}; + let schema = format + .infer_schema(local_object_reader_stream(vec![filename.clone()])) + .await + .expect("Schema inference"); + let statistics = format + .infer_stats(local_object_reader(filename.clone())) + .await + .expect("Stats inference"); + let files = vec![vec![PartitionedFile { + file_meta: local_file_meta(filename.to_owned()), + }]]; + let exec = format + .create_physical_plan(PhysicalPlanConfig { + object_store: Arc::new(LocalFileSystem {}), + schema, + files, + statistics, + projection: projection.clone(), + batch_size, + filters: vec![], + limit, + }) + .await?; + Ok(exec) + } +} + +#[cfg(test)] +#[cfg(not(feature = "avro"))] +mod tests { + use super::*; + + use crate::datasource::object_store::local::local_object_reader_stream; + use crate::error::DataFusionError; + + #[tokio::test] + async fn test() -> Result<()> { + let testdata = crate::test_util::arrow_test_data(); + let filename = format!("{}/avro/alltypes_plain.avro", testdata); + let schema_result = AvroFormat {} + .infer_schema(local_object_reader_stream(vec![filename])) + .await; + assert!(matches!( + schema_result, + Err(DataFusionError::NotImplemented(msg)) + if msg == *"cannot read avro schema without the 'avro' feature enabled" + )); + + Ok(()) + } +} diff --git a/datafusion/src/datasource/file_format/csv.rs b/datafusion/src/datasource/file_format/csv.rs new file mode 100644 index 000000000000..4d75c65eddbb --- /dev/null +++ b/datafusion/src/datasource/file_format/csv.rs @@ -0,0 +1,290 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! CSV format abstractions + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::Schema; +use arrow::{self, datatypes::SchemaRef}; +use async_trait::async_trait; +use futures::StreamExt; + +use super::{FileFormat, PhysicalPlanConfig}; +use crate::datasource::object_store::{ObjectReader, ObjectReaderStream}; +use crate::error::Result; +use crate::physical_plan::file_format::CsvExec; +use crate::physical_plan::ExecutionPlan; +use crate::physical_plan::Statistics; + +/// Character Separated Value `FileFormat` implementation. +#[derive(Debug)] +pub struct CsvFormat { + has_header: bool, + delimiter: u8, + schema_infer_max_rec: Option, +} + +impl Default for CsvFormat { + fn default() -> Self { + Self { + schema_infer_max_rec: None, + has_header: true, + delimiter: b',', + } + } +} + +impl CsvFormat { + /// Set a limit in terms of records to scan to infer the schema + /// - default to `None` (no limit) + pub fn with_schema_infer_max_rec(mut self, max_rec: Option) -> Self { + self.schema_infer_max_rec = max_rec; + self + } + + /// Set true to indicate that the first line is a header. + /// - default to true + pub fn with_has_header(mut self, has_header: bool) -> Self { + self.has_header = has_header; + self + } + + /// True if the first line is a header. + pub fn has_header(&self) -> bool { + self.has_header + } + + /// The character separating values within a row. + /// - default to ',' + pub fn with_delimiter(mut self, delimiter: u8) -> Self { + self.delimiter = delimiter; + self + } + + /// The delimiter character. + pub fn delimiter(&self) -> u8 { + self.delimiter + } +} + +#[async_trait] +impl FileFormat for CsvFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema(&self, mut readers: ObjectReaderStream) -> Result { + let mut schemas = vec![]; + + let mut records_to_read = self.schema_infer_max_rec.unwrap_or(std::usize::MAX); + + while let Some(obj_reader) = readers.next().await { + let mut reader = obj_reader?.sync_reader()?; + let (schema, records_read) = arrow::csv::reader::infer_reader_schema( + &mut reader, + self.delimiter, + Some(records_to_read), + self.has_header, + )?; + if records_read == 0 { + continue; + } + schemas.push(schema.clone()); + records_to_read -= records_read; + if records_to_read == 0 { + break; + } + } + + let merged_schema = Schema::try_merge(schemas)?; + Ok(Arc::new(merged_schema)) + } + + async fn infer_stats(&self, _reader: Arc) -> Result { + Ok(Statistics::default()) + } + + async fn create_physical_plan( + &self, + conf: PhysicalPlanConfig, + ) -> Result> { + let exec = CsvExec::new( + conf.object_store, + // flattening this for now because CsvExec does not support partitioning yet + conf.files.into_iter().flatten().collect::>(), + conf.statistics, + conf.schema, + self.has_header, + self.delimiter, + conf.projection, + conf.batch_size, + conf.limit, + ); + Ok(Arc::new(exec)) + } +} + +#[cfg(test)] +mod tests { + use arrow::array::StringArray; + + use super::*; + use crate::{ + datasource::{ + file_format::PhysicalPlanConfig, + object_store::local::{ + local_file_meta, local_object_reader, local_object_reader_stream, + LocalFileSystem, + }, + PartitionedFile, + }, + physical_plan::collect, + }; + + #[tokio::test] + async fn read_small_batches() -> Result<()> { + // skip column 9 that overflows the automaticly discovered column type of i64 (u64 would work) + let projection = Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]); + let exec = get_exec("aggregate_test_100.csv", &projection, 2, None).await?; + let stream = exec.execute(0).await?; + + let tt_batches: i32 = stream + .map(|batch| { + let batch = batch.unwrap(); + assert_eq!(12, batch.num_columns()); + assert_eq!(2, batch.num_rows()); + }) + .fold(0, |acc, _| async move { acc + 1i32 }) + .await; + + assert_eq!(tt_batches, 50 /* 100/2 */); + + // test metadata + assert_eq!(exec.statistics().num_rows, None); + assert_eq!(exec.statistics().total_byte_size, None); + + Ok(()) + } + + #[tokio::test] + async fn read_limit() -> Result<()> { + let projection = Some(vec![0, 1, 2, 3]); + let exec = get_exec("aggregate_test_100.csv", &projection, 1024, Some(1)).await?; + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(4, batches[0].num_columns()); + assert_eq!(1, batches[0].num_rows()); + + Ok(()) + } + + #[tokio::test] + async fn infer_schema() -> Result<()> { + let projection = None; + let exec = get_exec("aggregate_test_100.csv", &projection, 1024, None).await?; + + let x: Vec = exec + .schema() + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect(); + assert_eq!( + vec![ + "c1: Utf8", + "c2: Int64", + "c3: Int64", + "c4: Int64", + "c5: Int64", + "c6: Int64", + "c7: Int64", + "c8: Int64", + "c9: Int64", + "c10: Int64", + "c11: Float64", + "c12: Float64", + "c13: Utf8" + ], + x + ); + + Ok(()) + } + + #[tokio::test] + async fn read_char_column() -> Result<()> { + let projection = Some(vec![0]); + let exec = get_exec("aggregate_test_100.csv", &projection, 1024, None).await?; + + let batches = collect(exec).await.expect("Collect batches"); + + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(100, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec<&str> = vec![]; + for i in 0..5 { + values.push(array.value(i)); + } + + assert_eq!(vec!["c", "d", "b", "a", "b"], values); + + Ok(()) + } + + async fn get_exec( + file_name: &str, + projection: &Option>, + batch_size: usize, + limit: Option, + ) -> Result> { + let testdata = crate::test_util::arrow_test_data(); + let filename = format!("{}/csv/{}", testdata, file_name); + let format = CsvFormat::default(); + let schema = format + .infer_schema(local_object_reader_stream(vec![filename.clone()])) + .await + .expect("Schema inference"); + let statistics = format + .infer_stats(local_object_reader(filename.clone())) + .await + .expect("Stats inference"); + let files = vec![vec![PartitionedFile { + file_meta: local_file_meta(filename.to_owned()), + }]]; + let exec = format + .create_physical_plan(PhysicalPlanConfig { + object_store: Arc::new(LocalFileSystem {}), + schema, + files, + statistics, + projection: projection.clone(), + batch_size, + filters: vec![], + limit, + }) + .await?; + Ok(exec) + } +} diff --git a/datafusion/src/datasource/file_format/json.rs b/datafusion/src/datasource/file_format/json.rs new file mode 100644 index 000000000000..2741da31b921 --- /dev/null +++ b/datafusion/src/datasource/file_format/json.rs @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Line delimited JSON format abstractions + +use std::any::Any; +use std::io::BufReader; +use std::sync::Arc; + +use arrow::datatypes::Schema; +use arrow::datatypes::SchemaRef; +use arrow::json::reader::infer_json_schema_from_iterator; +use arrow::json::reader::ValueIter; +use async_trait::async_trait; +use futures::StreamExt; + +use super::FileFormat; +use super::PhysicalPlanConfig; +use crate::datasource::object_store::{ObjectReader, ObjectReaderStream}; +use crate::error::Result; +use crate::physical_plan::file_format::NdJsonExec; +use crate::physical_plan::ExecutionPlan; +use crate::physical_plan::Statistics; + +/// New line delimited JSON `FileFormat` implementation. +#[derive(Debug)] +pub struct JsonFormat { + schema_infer_max_rec: Option, +} + +impl Default for JsonFormat { + fn default() -> Self { + Self { + schema_infer_max_rec: None, + } + } +} + +impl JsonFormat { + /// Set a limit in terms of records to scan to infer the schema + /// - defaults to `None` (no limit) + pub fn with_schema_infer_max_rec(mut self, max_rec: Option) -> Self { + self.schema_infer_max_rec = max_rec; + self + } +} + +#[async_trait] +impl FileFormat for JsonFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema(&self, mut readers: ObjectReaderStream) -> Result { + let mut schemas = Vec::new(); + let mut records_to_read = self.schema_infer_max_rec.unwrap_or(usize::MAX); + while let Some(obj_reader) = readers.next().await { + let mut reader = BufReader::new(obj_reader?.sync_reader()?); + let iter = ValueIter::new(&mut reader, None); + let schema = infer_json_schema_from_iterator(iter.take_while(|_| { + let should_take = records_to_read > 0; + records_to_read -= 1; + should_take + }))?; + if records_to_read == 0 { + break; + } + schemas.push(schema); + } + + let schema = Schema::try_merge(schemas)?; + Ok(Arc::new(schema)) + } + + async fn infer_stats(&self, _reader: Arc) -> Result { + Ok(Statistics::default()) + } + + async fn create_physical_plan( + &self, + conf: PhysicalPlanConfig, + ) -> Result> { + let exec = NdJsonExec::new( + conf.object_store, + // flattening this for now because NdJsonExec does not support partitioning yet + conf.files.into_iter().flatten().collect::>(), + conf.statistics, + conf.schema, + conf.projection, + conf.batch_size, + conf.limit, + ); + Ok(Arc::new(exec)) + } +} + +#[cfg(test)] +mod tests { + use arrow::array::Int64Array; + + use super::*; + use crate::{ + datasource::{ + file_format::PhysicalPlanConfig, + object_store::local::{ + local_file_meta, local_object_reader, local_object_reader_stream, + LocalFileSystem, + }, + PartitionedFile, + }, + physical_plan::collect, + }; + + #[tokio::test] + async fn read_small_batches() -> Result<()> { + let projection = None; + let exec = get_exec(&projection, 2, None).await?; + let stream = exec.execute(0).await?; + + let tt_batches: i32 = stream + .map(|batch| { + let batch = batch.unwrap(); + assert_eq!(4, batch.num_columns()); + assert_eq!(2, batch.num_rows()); + }) + .fold(0, |acc, _| async move { acc + 1i32 }) + .await; + + assert_eq!(tt_batches, 6 /* 12/2 */); + + // test metadata + assert_eq!(exec.statistics().num_rows, None); + assert_eq!(exec.statistics().total_byte_size, None); + + Ok(()) + } + + #[tokio::test] + async fn read_limit() -> Result<()> { + let projection = None; + let exec = get_exec(&projection, 1024, Some(1)).await?; + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(4, batches[0].num_columns()); + assert_eq!(1, batches[0].num_rows()); + + Ok(()) + } + + #[tokio::test] + async fn infer_schema() -> Result<()> { + let projection = None; + let exec = get_exec(&projection, 1024, None).await?; + + let x: Vec = exec + .schema() + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect(); + assert_eq!(vec!["a: Int64", "b: Float64", "c: Boolean", "d: Utf8",], x); + + Ok(()) + } + + #[tokio::test] + async fn read_int_column() -> Result<()> { + let projection = Some(vec![0]); + let exec = get_exec(&projection, 1024, None).await?; + + let batches = collect(exec).await.expect("Collect batches"); + + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(12, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + vec![1, -10, 2, 1, 7, 1, 1, 5, 1, 1, 1, 100000000000000], + values + ); + + Ok(()) + } + + async fn get_exec( + projection: &Option>, + batch_size: usize, + limit: Option, + ) -> Result> { + let filename = "tests/jsons/2.json"; + let format = JsonFormat::default(); + let schema = format + .infer_schema(local_object_reader_stream(vec![filename.to_owned()])) + .await + .expect("Schema inference"); + let statistics = format + .infer_stats(local_object_reader(filename.to_owned())) + .await + .expect("Stats inference"); + let files = vec![vec![PartitionedFile { + file_meta: local_file_meta(filename.to_owned()), + }]]; + let exec = format + .create_physical_plan(PhysicalPlanConfig { + object_store: Arc::new(LocalFileSystem {}), + schema, + files, + statistics, + projection: projection.clone(), + batch_size, + filters: vec![], + limit, + }) + .await?; + Ok(exec) + } +} diff --git a/datafusion/src/datasource/file_format/mod.rs b/datafusion/src/datasource/file_format/mod.rs new file mode 100644 index 000000000000..d545596f6e5c --- /dev/null +++ b/datafusion/src/datasource/file_format/mod.rs @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Module containing helper methods for the various file formats + +pub mod avro; +pub mod csv; +pub mod json; +pub mod parquet; + +use std::any::Any; +use std::fmt; +use std::sync::Arc; + +use crate::arrow::datatypes::SchemaRef; +use crate::error::Result; +use crate::logical_plan::Expr; +use crate::physical_plan::{ExecutionPlan, Statistics}; + +use async_trait::async_trait; + +use super::object_store::{ObjectReader, ObjectReaderStream, ObjectStore}; +use super::PartitionedFile; + +/// The configurations to be passed when creating a physical plan for +/// a given file format. +pub struct PhysicalPlanConfig { + /// Store from which the `files` should be fetched + pub object_store: Arc, + /// Schema before projection + pub schema: SchemaRef, + /// List of files to be processed, grouped into partitions + pub files: Vec>, + /// Estimated overall statistics of the plan, taking `filters` into account + pub statistics: Statistics, + /// Columns on which to project the data + pub projection: Option>, + /// The maximum number of records per arrow column + pub batch_size: usize, + /// The filters that were pushed down to this execution plan + pub filters: Vec, + /// The minimum number of records required from this source plan + pub limit: Option, +} + +/// This trait abstracts all the file format specific implementations +/// from the `TableProvider`. This helps code re-utilization accross +/// providers that support the the same file formats. +#[async_trait] +pub trait FileFormat: Send + Sync + fmt::Debug { + /// Returns the table provider as [`Any`](std::any::Any) so that it can be + /// downcast to a specific implementation. + fn as_any(&self) -> &dyn Any; + + /// Infer the common schema of the provided objects. The objects will usually + /// be analysed up to a given number of records or files (as specified in the + /// format config) then give the estimated common schema. This might fail if + /// the files have schemas that cannot be merged. + async fn infer_schema(&self, readers: ObjectReaderStream) -> Result; + + /// Infer the statistics for the provided object. The cost and accuracy of the + /// estimated statistics might vary greatly between file formats. + async fn infer_stats(&self, reader: Arc) -> Result; + + /// Take a list of files and convert it to the appropriate executor + /// according to this file format. + async fn create_physical_plan( + &self, + conf: PhysicalPlanConfig, + ) -> Result>; +} diff --git a/datafusion/src/datasource/file_format/parquet.rs b/datafusion/src/datasource/file_format/parquet.rs new file mode 100644 index 000000000000..424a2985a3f7 --- /dev/null +++ b/datafusion/src/datasource/file_format/parquet.rs @@ -0,0 +1,623 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Parquet format abstractions + +use std::any::Any; +use std::io::Read; +use std::sync::Arc; + +use arrow::datatypes::Schema; +use arrow::datatypes::SchemaRef; +use async_trait::async_trait; +use futures::stream::StreamExt; +use parquet::arrow::ArrowReader; +use parquet::arrow::ParquetFileArrowReader; +use parquet::errors::ParquetError; +use parquet::errors::Result as ParquetResult; +use parquet::file::reader::ChunkReader; +use parquet::file::reader::Length; +use parquet::file::serialized_reader::SerializedFileReader; +use parquet::file::statistics::Statistics as ParquetStatistics; + +use super::FileFormat; +use super::PhysicalPlanConfig; +use crate::arrow::datatypes::{DataType, Field}; +use crate::datasource::object_store::{ObjectReader, ObjectReaderStream}; +use crate::datasource::{create_max_min_accs, get_col_stats}; +use crate::error::DataFusionError; +use crate::error::Result; +use crate::logical_plan::combine_filters; +use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; +use crate::physical_plan::file_format::ParquetExec; +use crate::physical_plan::ExecutionPlan; +use crate::physical_plan::{Accumulator, Statistics}; +use crate::scalar::ScalarValue; + +/// The default file exetension of parquet files +pub const DEFAULT_PARQUET_EXTENSION: &str = ".parquet"; + +/// The Apache Parquet `FileFormat` implementation +#[derive(Debug)] +pub struct ParquetFormat { + enable_pruning: bool, +} + +impl Default for ParquetFormat { + fn default() -> Self { + Self { + enable_pruning: true, + } + } +} + +impl ParquetFormat { + /// Activate statistics based row group level pruning + /// - defaults to true + pub fn with_enable_pruning(mut self, enable: bool) -> Self { + self.enable_pruning = enable; + self + } + /// Return true if pruning is enabled + pub fn enable_pruning(&self) -> bool { + self.enable_pruning + } +} + +#[async_trait] +impl FileFormat for ParquetFormat { + fn as_any(&self) -> &dyn Any { + self + } + + async fn infer_schema(&self, mut readers: ObjectReaderStream) -> Result { + // We currently get the schema information from the first file rather than do + // schema merging and this is a limitation. + // See https://issues.apache.org/jira/browse/ARROW-11017 + let first_file = readers + .next() + .await + .ok_or_else(|| DataFusionError::Plan("No data file found".to_owned()))??; + let (schema, _) = fetch_metadata(first_file)?; + Ok(Arc::new(schema)) + } + + async fn infer_stats(&self, reader: Arc) -> Result { + let (_, stats) = fetch_metadata(reader)?; + Ok(stats) + } + + async fn create_physical_plan( + &self, + conf: PhysicalPlanConfig, + ) -> Result> { + // If enable pruning then combine the filters to build the predicate. + // If disable pruning then set the predicate to None, thus readers + // will not prune data based on the statistics. + let predicate = if self.enable_pruning { + combine_filters(&conf.filters) + } else { + None + }; + + Ok(Arc::new(ParquetExec::new( + conf.object_store, + conf.files, + conf.statistics, + conf.schema, + conf.projection, + predicate, + conf.batch_size, + conf.limit, + ))) + } +} + +fn summarize_min_max( + max_values: &mut Vec>, + min_values: &mut Vec>, + fields: &[Field], + i: usize, + stat: &ParquetStatistics, +) { + match stat { + ParquetStatistics::Boolean(s) => { + if let DataType::Boolean = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Boolean(Some(*s.max()))]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Boolean(Some(*s.min()))]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Int32(s) => { + if let DataType::Int32 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Int32(Some(*s.max()))]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Int32(Some(*s.min()))]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Int64(s) => { + if let DataType::Int64 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Int64(Some(*s.max()))]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Int64(Some(*s.min()))]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Float(s) => { + if let DataType::Float32 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Float32(Some(*s.max()))]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Float32(Some(*s.min()))]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Double(s) => { + if let DataType::Float64 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Float64(Some(*s.max()))]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Float64(Some(*s.min()))]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + _ => {} + } +} + +/// Read and parse the metadata of the Parquet file at location `path` +fn fetch_metadata(object_reader: Arc) -> Result<(Schema, Statistics)> { + let obj_reader = ChunkObjectReader(object_reader); + let file_reader = Arc::new(SerializedFileReader::new(obj_reader)?); + let mut arrow_reader = ParquetFileArrowReader::new(file_reader); + let schema = arrow_reader.get_schema()?; + let num_fields = schema.fields().len(); + let fields = schema.fields().to_vec(); + let meta_data = arrow_reader.get_metadata(); + + let mut num_rows = 0; + let mut total_byte_size = 0; + let mut null_counts = vec![0; num_fields]; + let mut has_statistics = false; + + let (mut max_values, mut min_values) = create_max_min_accs(&schema); + + for row_group_meta in meta_data.row_groups() { + num_rows += row_group_meta.num_rows(); + total_byte_size += row_group_meta.total_byte_size(); + + let columns_null_counts = row_group_meta + .columns() + .iter() + .flat_map(|c| c.statistics().map(|stats| stats.null_count())); + + for (i, cnt) in columns_null_counts.enumerate() { + null_counts[i] += cnt as usize + } + + for (i, column) in row_group_meta.columns().iter().enumerate() { + if let Some(stat) = column.statistics() { + has_statistics = true; + summarize_min_max(&mut max_values, &mut min_values, &fields, i, stat) + } + } + } + + let column_stats = if has_statistics { + Some(get_col_stats( + &schema, + null_counts, + &mut max_values, + &mut min_values, + )) + } else { + None + }; + + let statistics = Statistics { + num_rows: Some(num_rows as usize), + total_byte_size: Some(total_byte_size as usize), + column_statistics: column_stats, + is_exact: true, + }; + + Ok((schema, statistics)) +} + +/// A wrapper around the object reader to make it implement `ChunkReader` +pub struct ChunkObjectReader(pub Arc); + +impl Length for ChunkObjectReader { + fn len(&self) -> u64 { + self.0.length() + } +} + +impl ChunkReader for ChunkObjectReader { + type T = Box; + + fn get_read(&self, start: u64, length: usize) -> ParquetResult { + self.0 + .sync_chunk_reader(start, length) + .map_err(|e| ParquetError::ArrowError(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + datasource::{ + object_store::local::{ + local_file_meta, local_object_reader, local_object_reader_stream, + LocalFileSystem, + }, + PartitionedFile, + }, + physical_plan::collect, + }; + + use super::*; + use arrow::array::{ + BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array, + TimestampNanosecondArray, + }; + use futures::StreamExt; + + #[tokio::test] + async fn read_small_batches() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.parquet", &projection, 2, None).await?; + let stream = exec.execute(0).await?; + + let tt_batches = stream + .map(|batch| { + let batch = batch.unwrap(); + assert_eq!(11, batch.num_columns()); + assert_eq!(2, batch.num_rows()); + }) + .fold(0, |acc, _| async move { acc + 1i32 }) + .await; + + assert_eq!(tt_batches, 4 /* 8/2 */); + + // test metadata + assert_eq!(exec.statistics().num_rows, Some(8)); + assert_eq!(exec.statistics().total_byte_size, Some(671)); + + Ok(()) + } + + #[tokio::test] + async fn read_limit() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, Some(1)).await?; + + // note: even if the limit is set, the executor rounds up to the batch size + assert_eq!(exec.statistics().num_rows, Some(8)); + assert_eq!(exec.statistics().total_byte_size, Some(671)); + assert!(exec.statistics().is_exact); + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(11, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + Ok(()) + } + + #[tokio::test] + async fn read_alltypes_plain_parquet() -> Result<()> { + let projection = None; + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let x: Vec = exec + .schema() + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect(); + let y = x.join("\n"); + assert_eq!( + "id: Int32\n\ + bool_col: Boolean\n\ + tinyint_col: Int32\n\ + smallint_col: Int32\n\ + int_col: Int32\n\ + bigint_col: Int64\n\ + float_col: Float32\n\ + double_col: Float64\n\ + date_string_col: Binary\n\ + string_col: Binary\n\ + timestamp_col: Timestamp(Nanosecond, None)", + y + ); + + let batches = collect(exec).await?; + + assert_eq!(1, batches.len()); + assert_eq!(11, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + Ok(()) + } + + #[tokio::test] + async fn read_bool_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![1]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[true, false, true, false, true, false, true, false]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_i32_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![0]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values)); + + Ok(()) + } + + #[tokio::test] + async fn read_i96_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![10]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{:?}", values)); + + Ok(()) + } + + #[tokio::test] + async fn read_f32_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![6]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_f64_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![7]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec = vec![]; + for i in 0..batches[0].num_rows() { + values.push(array.value(i)); + } + + assert_eq!( + "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]", + format!("{:?}", values) + ); + + Ok(()) + } + + #[tokio::test] + async fn read_binary_alltypes_plain_parquet() -> Result<()> { + let projection = Some(vec![9]); + let exec = get_exec("alltypes_plain.parquet", &projection, 1024, None).await?; + + let batches = collect(exec).await?; + assert_eq!(1, batches.len()); + assert_eq!(1, batches[0].num_columns()); + assert_eq!(8, batches[0].num_rows()); + + let array = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut values: Vec<&str> = vec![]; + for i in 0..batches[0].num_rows() { + values.push(std::str::from_utf8(array.value(i)).unwrap()); + } + + assert_eq!( + "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]", + format!("{:?}", values) + ); + + Ok(()) + } + + async fn get_exec( + file_name: &str, + projection: &Option>, + batch_size: usize, + limit: Option, + ) -> Result> { + let testdata = crate::test_util::parquet_test_data(); + let filename = format!("{}/{}", testdata, file_name); + let format = ParquetFormat::default(); + let schema = format + .infer_schema(local_object_reader_stream(vec![filename.clone()])) + .await + .expect("Schema inference"); + let statistics = format + .infer_stats(local_object_reader(filename.clone())) + .await + .expect("Stats inference"); + let files = vec![vec![PartitionedFile { + file_meta: local_file_meta(filename.clone()), + }]]; + let exec = format + .create_physical_plan(PhysicalPlanConfig { + object_store: Arc::new(LocalFileSystem {}), + schema, + files, + statistics, + projection: projection.clone(), + batch_size, + filters: vec![], + limit, + }) + .await?; + Ok(exec) + } +} diff --git a/datafusion/src/datasource/json.rs b/datafusion/src/datasource/json.rs deleted file mode 100644 index 1a6ec7af0720..000000000000 --- a/datafusion/src/datasource/json.rs +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Line-delimited JSON data source -//! -//! This data source allows Line-delimited JSON string or files to be used as input for queries. -//! - -use std::{ - any::Any, - io::{BufReader, Read, Seek}, - sync::{Arc, Mutex}, -}; - -use crate::{ - datasource::{Source, TableProvider}, - error::{DataFusionError, Result}, - physical_plan::{ - common, - json::{NdJsonExec, NdJsonReadOptions}, - ExecutionPlan, - }, -}; -use arrow::{datatypes::SchemaRef, json::reader::infer_json_schema_from_seekable}; -use async_trait::async_trait; - -trait SeekRead: Read + Seek {} - -impl SeekRead for T {} - -/// Represents a line-delimited JSON file with a provided schema -pub struct NdJsonFile { - source: Source>, - schema: SchemaRef, - file_extension: String, -} - -impl NdJsonFile { - /// Attempt to initialize a `NdJsonFile` from a path. The schema can be inferred automatically. - pub fn try_new(path: &str, options: NdJsonReadOptions) -> Result { - let schema = if let Some(schema) = options.schema { - schema - } else { - let filenames = common::build_file_list(path, options.file_extension)?; - if filenames.is_empty() { - return Err(DataFusionError::Plan(format!( - "No files found at {path} with file extension {file_extension}", - path = path, - file_extension = options.file_extension - ))); - } - - NdJsonExec::try_infer_schema( - filenames, - Some(options.schema_infer_max_records), - )? - .into() - }; - - Ok(Self { - source: Source::Path(path.to_string()), - schema, - file_extension: options.file_extension.to_string(), - }) - } - - /// Attempt to initialize a `NdJsonFile` from a reader impls `Seek`. The schema can be inferred automatically. - pub fn try_new_from_reader( - mut reader: R, - options: NdJsonReadOptions, - ) -> Result { - let schema = if let Some(schema) = options.schema { - schema - } else { - let mut bufr = BufReader::new(reader); - let schema = infer_json_schema_from_seekable( - &mut bufr, - Some(options.schema_infer_max_records), - )? - .into(); - reader = bufr.into_inner(); - schema - }; - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - file_extension: String::new(), - }) - } -} - -#[async_trait] -impl TableProvider for NdJsonFile { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - async fn scan( - &self, - projection: &Option>, - batch_size: usize, - _filters: &[crate::logical_plan::Expr], - limit: Option, - ) -> Result> { - let opts = NdJsonReadOptions { - schema: Some(self.schema.clone()), - schema_infer_max_records: 0, // schema will always be provided, so it's unnecessary to infer schema - file_extension: self.file_extension.as_str(), - }; - let batch_size = limit - .map(|l| std::cmp::min(l, batch_size)) - .unwrap_or(batch_size); - - let exec = match &self.source { - Source::Reader(maybe_reader) => { - if let Some(rdr) = maybe_reader.lock().unwrap().take() { - NdJsonExec::try_new_from_reader( - rdr, - opts, - projection.clone(), - batch_size, - limit, - )? - } else { - return Err(DataFusionError::Execution( - "You can only read once if the data comes from a reader" - .to_string(), - )); - } - } - Source::Path(p) => { - NdJsonExec::try_new(p, opts, projection.clone(), batch_size, limit)? - } - }; - Ok(Arc::new(exec)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::prelude::*; - const TEST_DATA_BASE: &str = "tests/jsons"; - - #[tokio::test] - async fn csv_file_from_reader() -> Result<()> { - let mut ctx = ExecutionContext::new(); - let path = format!("{}/2.json", TEST_DATA_BASE); - ctx.register_table( - "ndjson", - Arc::new(NdJsonFile::try_new(&path, Default::default())?), - )?; - let df = ctx.sql("select sum(a) from ndjson")?; - let batches = df.collect().await?; - assert_eq!( - batches[0] - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0), - 100000000000011 - ); - Ok(()) - } -} diff --git a/datafusion/src/datasource/listing.rs b/datafusion/src/datasource/listing.rs new file mode 100644 index 000000000000..585a40ffe05e --- /dev/null +++ b/datafusion/src/datasource/listing.rs @@ -0,0 +1,487 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A table that uses the `ObjectStore` listing capability +//! to get the list of files to process. + +use std::{any::Any, sync::Arc}; + +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use async_trait::async_trait; +use futures::StreamExt; + +use crate::{ + datasource::PartitionedFile, + error::{DataFusionError, Result}, + logical_plan::Expr, + physical_plan::{ExecutionPlan, Statistics}, +}; + +use super::{ + datasource::TableProviderFilterPushDown, + file_format::{FileFormat, PhysicalPlanConfig}, + get_statistics_with_limit, + object_store::ObjectStore, + PartitionedFileStream, TableProvider, +}; + +/// Options for creating a `ListingTable` +pub struct ListingOptions { + /// A suffix on which files should be filtered (leave empty to + /// keep all files on the path) + pub file_extension: String, + /// The file format + pub format: Arc, + /// The expected partition column names in the folder structure. + /// For example `Vec["a", "b"]` means that the two first levels of + /// partitioning expected should be named "a" and "b": + /// - If there is a third level of partitioning it will be ignored. + /// - Files that don't follow this partitioning will be ignored. + /// Note that only `DataType::Utf8` is supported for the column type. + /// TODO implement case where partitions.len() > 0 + pub partitions: Vec, + /// Set true to try to guess statistics from the files. + /// This can add a lot of overhead as it will usually require files + /// to be opened and at least partially parsed. + pub collect_stat: bool, + /// Group files to avoid that the number of partitions exceeds + /// this limit + pub target_partitions: usize, +} + +impl ListingOptions { + /// Creates an options instance with the given format + /// Default values: + /// - no file extension filter + /// - no input partition to discover + /// - one target partition + /// - no stat collection + pub fn new(format: Arc) -> Self { + Self { + file_extension: String::new(), + format, + partitions: vec![], + collect_stat: false, + target_partitions: 1, + } + } + + /// Infer the schema of the files at the given path on the provided object store. + /// The inferred schema should include the partitioning columns. + /// + /// This method will not be called by the table itself but before creating it. + /// This way when creating the logical plan we can decide to resolve the schema + /// locally or ask a remote service to do it (e.g a scheduler). + pub async fn infer_schema<'a>( + &'a self, + object_store: Arc, + path: &'a str, + ) -> Result { + let file_stream = object_store + .list_file_with_suffix(path, &self.file_extension) + .await? + .map(move |file_meta| object_store.file_reader(file_meta?.sized_file)); + let file_schema = self.format.infer_schema(Box::pin(file_stream)).await?; + // Add the partition columns to the file schema + let mut fields = file_schema.fields().clone(); + for part in &self.partitions { + fields.push(Field::new(part, DataType::Utf8, false)); + } + Ok(Arc::new(Schema::new(fields))) + } +} + +/// An implementation of `TableProvider` that uses the object store +/// or file system listing capability to get the list of files. +pub struct ListingTable { + object_store: Arc, + path: String, + schema: SchemaRef, + options: ListingOptions, +} + +impl ListingTable { + /// Create new table that lists the FS to get the files to scan. + pub fn new( + object_store: Arc, + path: String, + // the schema must be resolved before creating the table + schema: SchemaRef, + options: ListingOptions, + ) -> Self { + Self { + object_store, + path, + schema, + options, + } + } + + /// Get object store ref + pub fn object_store(&self) -> &Arc { + &self.object_store + } + /// Get path ref + pub fn path(&self) -> &str { + &self.path + } + /// Get options ref + pub fn options(&self) -> &ListingOptions { + &self.options + } +} + +#[async_trait] +impl TableProvider for ListingTable { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } + + async fn scan( + &self, + projection: &Option>, + batch_size: usize, + filters: &[Expr], + limit: Option, + ) -> Result> { + // TODO object_store_registry should be provided as param here + let (partitioned_file_lists, statistics) = self + .list_files_for_scan( + Arc::clone(&self.object_store), + &self.path, + filters, + limit, + ) + .await?; + // create the execution plan + self.options + .format + .create_physical_plan(PhysicalPlanConfig { + object_store: Arc::clone(&self.object_store), + schema: self.schema(), + files: partitioned_file_lists, + statistics, + projection: projection.clone(), + batch_size, + filters: filters.to_vec(), + limit, + }) + .await + } + + fn supports_filter_pushdown( + &self, + _filter: &Expr, + ) -> Result { + Ok(TableProviderFilterPushDown::Inexact) + } +} + +impl ListingTable { + async fn list_files_for_scan<'a>( + &'a self, + object_store: Arc, + path: &'a str, + filters: &'a [Expr], + limit: Option, + ) -> Result<(Vec>, Statistics)> { + // list files (with partitions) + let file_list = pruned_partition_list( + object_store.as_ref(), + path, + filters, + &self.options.file_extension, + &self.options.partitions, + ) + .await?; + + // collect the statistics if required by the config + let files = file_list.then(move |part_file| { + let object_store = object_store.clone(); + async move { + let part_file = part_file?; + let statistics = if self.options.collect_stat { + let object_reader = object_store + .file_reader(part_file.file_meta.sized_file.clone())?; + self.options.format.infer_stats(object_reader).await? + } else { + Statistics::default() + }; + Ok((part_file, statistics)) as Result<(PartitionedFile, Statistics)> + } + }); + + let (files, statistics) = + get_statistics_with_limit(files, self.schema(), limit).await?; + + if files.is_empty() { + return Err(DataFusionError::Plan(format!( + "No files found at {} with file extension {}", + self.path, self.options.file_extension, + ))); + } + + Ok(( + split_files(files, self.options.target_partitions), + statistics, + )) + } +} + +/// Discover the partitions on the given path and prune out files +/// relative to irrelevant partitions using `filters` expressions +async fn pruned_partition_list( + store: &dyn ObjectStore, + path: &str, + _filters: &[Expr], + file_extension: &str, + partition_names: &[String], +) -> Result { + if partition_names.is_empty() { + Ok(Box::pin( + store + .list_file_with_suffix(path, file_extension) + .await? + .map(|f| Ok(PartitionedFile { file_meta: f? })), + )) + } else { + todo!("use filters to prune partitions") + } +} + +fn split_files( + partitioned_files: Vec, + n: usize, +) -> Vec> { + let mut chunk_size = partitioned_files.len() / n; + if partitioned_files.len() % n > 0 { + chunk_size += 1; + } + partitioned_files + .chunks(chunk_size) + .map(|c| c.to_vec()) + .collect() +} + +#[cfg(test)] +mod tests { + use std::io::Read; + + use futures::AsyncRead; + + use crate::datasource::{ + file_format::{avro::AvroFormat, parquet::ParquetFormat}, + object_store::{ + local::LocalFileSystem, FileMeta, FileMetaStream, ListEntryStream, + ObjectReader, ObjectStore, SizedFile, + }, + }; + + use super::*; + + #[test] + fn test_split_files() { + let new_partitioned_file = |path: &str| PartitionedFile { + file_meta: FileMeta { + sized_file: SizedFile { + path: path.to_owned(), + size: 10, + }, + last_modified: None, + }, + }; + let files = vec![ + new_partitioned_file("a"), + new_partitioned_file("b"), + new_partitioned_file("c"), + new_partitioned_file("d"), + new_partitioned_file("e"), + ]; + + let chunks = split_files(files.clone(), 1); + assert_eq!(1, chunks.len()); + assert_eq!(5, chunks[0].len()); + + let chunks = split_files(files.clone(), 2); + assert_eq!(2, chunks.len()); + assert_eq!(3, chunks[0].len()); + assert_eq!(2, chunks[1].len()); + + let chunks = split_files(files.clone(), 5); + assert_eq!(5, chunks.len()); + assert_eq!(1, chunks[0].len()); + assert_eq!(1, chunks[1].len()); + assert_eq!(1, chunks[2].len()); + assert_eq!(1, chunks[3].len()); + assert_eq!(1, chunks[4].len()); + + let chunks = split_files(files, 123); + assert_eq!(5, chunks.len()); + assert_eq!(1, chunks[0].len()); + assert_eq!(1, chunks[1].len()); + assert_eq!(1, chunks[2].len()); + assert_eq!(1, chunks[3].len()); + assert_eq!(1, chunks[4].len()); + } + + #[tokio::test] + async fn read_single_file() -> Result<()> { + let table = load_table("alltypes_plain.parquet").await?; + let projection = None; + let exec = table + .scan(&projection, 1024, &[], None) + .await + .expect("Scan table"); + + assert_eq!(exec.children().len(), 0); + assert_eq!(exec.output_partitioning().partition_count(), 1); + + // test metadata + assert_eq!(exec.statistics().num_rows, Some(8)); + assert_eq!(exec.statistics().total_byte_size, Some(671)); + + Ok(()) + } + + #[tokio::test] + async fn file_listings() -> Result<()> { + assert_partitioning(5, 12, 5).await?; + assert_partitioning(4, 4, 4).await?; + assert_partitioning(5, 2, 2).await?; + assert_partitioning(0, 2, 0).await.expect_err("no files"); + Ok(()) + } + + async fn load_table(name: &str) -> Result> { + let testdata = crate::test_util::parquet_test_data(); + let filename = format!("{}/{}", testdata, name); + let opt = ListingOptions { + file_extension: "parquet".to_owned(), + format: Arc::new(ParquetFormat::default()), + partitions: vec![], + target_partitions: 2, + collect_stat: true, + }; + // here we resolve the schema locally + let schema = opt + .infer_schema(Arc::new(LocalFileSystem {}), &filename) + .await + .expect("Infer schema"); + let table = + ListingTable::new(Arc::new(LocalFileSystem {}), filename, schema, opt); + Ok(Arc::new(table)) + } + + async fn assert_partitioning( + files_in_folder: usize, + target_partitions: usize, + output_partitioning: usize, + ) -> Result<()> { + let mock_store: Arc = + Arc::new(MockObjectStore { files_in_folder }); + + let format = AvroFormat {}; + + let opt = ListingOptions { + file_extension: "".to_owned(), + format: Arc::new(format), + partitions: vec![], + target_partitions, + collect_stat: true, + }; + + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]); + + let table = ListingTable::new( + Arc::clone(&mock_store), + "bucket/key-prefix".to_owned(), + Arc::new(schema), + opt, + ); + + let (file_list, _) = table + .list_files_for_scan(mock_store, "bucket/key-prefix", &[], None) + .await?; + + assert_eq!(file_list.len(), output_partitioning); + + Ok(()) + } + + #[derive(Debug)] + struct MockObjectStore { + pub files_in_folder: usize, + } + + #[async_trait] + impl ObjectStore for MockObjectStore { + async fn list_file(&self, prefix: &str) -> Result { + let prefix = prefix.to_owned(); + let files = (0..self.files_in_folder).map(move |i| { + Ok(FileMeta { + sized_file: SizedFile { + path: format!("{}file{}", prefix, i), + size: 100, + }, + last_modified: None, + }) + }); + Ok(Box::pin(futures::stream::iter(files))) + } + + async fn list_dir( + &self, + _prefix: &str, + _delimiter: Option, + ) -> Result { + unimplemented!() + } + + fn file_reader(&self, _file: SizedFile) -> Result> { + Ok(Arc::new(MockObjectReader {})) + } + } + + struct MockObjectReader {} + + #[async_trait] + impl ObjectReader for MockObjectReader { + async fn chunk_reader( + &self, + _start: u64, + _length: usize, + ) -> Result> { + unimplemented!() + } + + fn sync_chunk_reader( + &self, + _start: u64, + _length: usize, + ) -> Result> { + unimplemented!() + } + + fn length(&self) -> u64 { + unimplemented!() + } + } +} diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index ab70014a4367..b607469bff00 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -17,175 +17,35 @@ //! DataFusion data sources -pub mod avro; -pub mod csv; pub mod datasource; pub mod empty; -pub mod json; +pub mod file_format; +pub mod listing; pub mod memory; pub mod object_store; -pub mod parquet; -pub use self::csv::{CsvFile, CsvReadOptions}; +use futures::Stream; + pub use self::datasource::{TableProvider, TableType}; pub use self::memory::MemTable; +use self::object_store::{FileMeta, SizedFile}; use crate::arrow::datatypes::{Schema, SchemaRef}; -use crate::error::{DataFusionError, Result}; -use crate::physical_plan::common::build_file_list; +use crate::error::Result; use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; use crate::physical_plan::{Accumulator, ColumnStatistics, Statistics}; -use std::sync::Arc; - -/// Source for table input data -pub(crate) enum Source> { - /// Path to a single file or a directory containing one of more files - Path(String), - - /// Read data from a reader - Reader(std::sync::Mutex>), -} - -#[derive(Debug, Clone)] -/// A single file that should be read, along with its schema, statistics -/// and partition column values that need to be appended to each row. -pub struct PartitionedFile { - /// Path for the file (e.g. URL, filesystem path, etc) - pub path: String, - /// Statistics of the file - pub statistics: Statistics, - // Values of partition columns to be appended to each row - // pub partition_value: Option>, - // We may include row group range here for a more fine-grained parallel execution -} - -impl From for PartitionedFile { - fn from(path: String) -> Self { - Self { - path, - statistics: Default::default(), - } - } -} - -impl std::fmt::Display for PartitionedFile { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.path) - } -} - -#[derive(Debug, Clone)] -/// A collection of files that should be read in a single task -pub struct FilePartition { - /// The index of the partition among all partitions - pub index: usize, - /// The contained files of the partition - pub files: Vec, -} - -impl std::fmt::Display for FilePartition { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - let files: Vec = self.files.iter().map(|f| f.to_string()).collect(); - write!(f, "{}", files.join(", ")) - } -} - -#[derive(Debug, Clone)] -/// All source files with same schema exists in a path -pub struct TableDescriptor { - /// root path of the table - pub path: String, - /// All source files in the path - pub partition_files: Vec, - /// The schema of the files - pub schema: SchemaRef, -} - -/// Returned partitioned file with its schema -pub struct FileAndSchema { - file: PartitionedFile, - schema: Schema, -} - -/// Builder for ['TableDescriptor'] inside given path -pub trait TableDescriptorBuilder { - /// Construct a ['TableDescriptor'] from the provided path - fn build_table_desc( - path: &str, - ext: &str, - provided_schema: Option, - collect_statistics: bool, - ) -> Result { - let filenames = build_file_list(path, ext)?; - if filenames.is_empty() { - return Err(DataFusionError::Plan(format!( - "No file (with .{} extension) found at path {}", - ext, path - ))); - } - - // build a list of partitions with statistics and gather all unique schemas - // used in this data set - let mut schemas: Vec = vec![]; - let mut contains_file = false; - - let partitioned_files = filenames - .iter() - .map(|file_path| { - contains_file = true; - let result = if collect_statistics { - let FileAndSchema {file, schema} = Self::file_meta(file_path)?; - if schemas.is_empty() { - schemas.push(schema); - } else if schema.fields() != schemas[0].fields() { - // we currently get the schema information from the first file rather than do - // schema merging and this is a limitation. - // See https://issues.apache.org/jira/browse/ARROW-11017 - return Err(DataFusionError::Plan(format!( - "The file {} have different schema from the first file and DataFusion does \ - not yet support schema merging", - file_path - ))); - } - file - } else { - PartitionedFile { - path: file_path.to_owned(), - statistics: Statistics::default(), - } - }; - - Ok(result) - }).collect::>>(); - - if !contains_file { - return Err(DataFusionError::Plan(format!( - "No file (with .{} extension) found at path {}", - ext, path - ))); - } - - let result_schema = provided_schema.unwrap_or_else(|| schemas.pop().unwrap()); - - Ok(TableDescriptor { - path: path.to_string(), - partition_files: partitioned_files?, - schema: Arc::new(result_schema), - }) - } - - /// Get all metadata for a source file, including schema, statistics, partitions, etc. - fn file_meta(path: &str) -> Result; -} +use futures::StreamExt; +use std::pin::Pin; /// Get all files as well as the summary statistic /// if the optional `limit` is provided, includes only sufficient files /// needed to read up to `limit` number of rows -pub fn get_statistics_with_limit( - table_desc: &TableDescriptor, +/// TODO fix case where `num_rows` and `total_byte_size` are not defined (stat should be None instead of Some(0)) +pub async fn get_statistics_with_limit( + all_files: impl Stream>, + schema: SchemaRef, limit: Option, -) -> (Vec, Statistics) { - let mut all_files = table_desc.partition_files.clone(); - let schema = table_desc.schema.clone(); +) -> Result<(Vec, Statistics)> { + let mut result_files = vec![]; let mut total_byte_size = 0; let mut null_counts = vec![0; schema.fields().len()]; @@ -193,11 +53,12 @@ pub fn get_statistics_with_limit( let (mut max_values, mut min_values) = create_max_min_accs(&schema); let mut num_rows = 0; - let mut num_files = 0; let mut is_exact = true; - for file in &all_files { - num_files += 1; - let file_stats = &file.statistics; + // fusing the stream allows us to call next safely even once it is finished + let mut all_files = Box::pin(all_files.fuse()); + while let Some(res) = all_files.next().await { + let (file, file_stats) = res?; + result_files.push(file); is_exact &= file_stats.is_exact; num_rows += file_stats.num_rows.unwrap_or(0); total_byte_size += file_stats.total_byte_size.unwrap_or(0); @@ -233,9 +94,11 @@ pub fn get_statistics_with_limit( break; } } - if num_files < all_files.len() { + // if we still have files in the stream, it means that the limit kicked + // in and that the statistic could have been different if we processed + // the files in a different order. + if all_files.next().await.is_some() { is_exact = false; - all_files.truncate(num_files); } let column_stats = if has_statistics { @@ -255,7 +118,57 @@ pub fn get_statistics_with_limit( column_statistics: column_stats, is_exact, }; - (all_files, statistics) + + Ok((result_files, statistics)) +} + +#[derive(Debug, Clone)] +/// A single file that should be read, along with its schema, statistics +/// and partition column values that need to be appended to each row. +pub struct PartitionedFile { + /// Path for the file (e.g. URL, filesystem path, etc) + pub file_meta: FileMeta, + // Values of partition columns to be appended to each row + // pub partition_value: Option>, + // We may include row group range here for a more fine-grained parallel execution +} + +impl PartitionedFile { + /// Create a simple file without metadata or partition + pub fn new(path: String, size: u64) -> Self { + Self { + file_meta: FileMeta { + sized_file: SizedFile { path, size }, + last_modified: None, + }, + } + } +} + +/// Stream of files get listed from object store +pub type PartitionedFileStream = + Pin> + Send + Sync + 'static>>; + +impl std::fmt::Display for PartitionedFile { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.file_meta) + } +} + +#[derive(Debug, Clone)] +/// A collection of files that should be read in a single task +pub struct FilePartition { + /// The index of the partition among all partitions + pub index: usize, + /// The contained files of the partition + pub files: Vec, +} + +impl std::fmt::Display for FilePartition { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let files: Vec = self.files.iter().map(|f| f.to_string()).collect(); + write!(f, "{}", files.join(", ")) + } } fn create_max_min_accs( diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index 2b27f6c8f993..4f4dbefbca49 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -17,7 +17,8 @@ //! Object store that represents the Local File System. -use std::fs::Metadata; +use std::fs::{self, File, Metadata}; +use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; use async_trait::async_trait; @@ -29,6 +30,8 @@ use crate::datasource::object_store::{ use crate::error::DataFusionError; use crate::error::Result; +use super::{ObjectReaderStream, SizedFile}; + #[derive(Debug)] /// Local File System as Object Store. pub struct LocalFileSystem; @@ -47,17 +50,17 @@ impl ObjectStore for LocalFileSystem { todo!() } - fn file_reader(&self, file: FileMeta) -> Result> { + fn file_reader(&self, file: SizedFile) -> Result> { Ok(Arc::new(LocalFileReader::new(file)?)) } } struct LocalFileReader { - file: FileMeta, + file: SizedFile, } impl LocalFileReader { - fn new(file: FileMeta) -> Result { + fn new(file: SizedFile) -> Result { Ok(Self { file }) } } @@ -68,8 +71,22 @@ impl ObjectReader for LocalFileReader { &self, _start: u64, _length: usize, - ) -> Result> { - todo!() + ) -> Result> { + todo!( + "implement once async file readers are available (arrow-rs#78, arrow-rs#111)" + ) + } + + fn sync_chunk_reader( + &self, + start: u64, + length: usize, + ) -> Result> { + // A new file descriptor is opened for each chunk reader. + // This okay because chunks are usually fairly large. + let mut file = File::open(&self.file.path)?; + file.seek(SeekFrom::Start(start))?; + Ok(Box::new(file.take(length as u64))) } fn length(&self) -> u64 { @@ -80,9 +97,11 @@ impl ObjectReader for LocalFileReader { async fn list_all(prefix: String) -> Result { fn get_meta(path: String, metadata: Metadata) -> FileMeta { FileMeta { - path, + sized_file: SizedFile { + path, + size: metadata.len(), + }, last_modified: metadata.modified().map(chrono::DateTime::from).ok(), - size: metadata.len(), } } @@ -133,6 +152,31 @@ async fn list_all(prefix: String) -> Result { } } +/// Create a stream of `ObjectReader` by converting each file in the `files` vector +/// into instances of `LocalFileReader` +pub fn local_object_reader_stream(files: Vec) -> ObjectReaderStream { + Box::pin(futures::stream::iter(files).map(|f| Ok(local_object_reader(f)))) +} + +/// Helper method to convert a file location to a `LocalFileReader` +pub fn local_object_reader(file: String) -> Arc { + LocalFileSystem + .file_reader(local_file_meta(file).sized_file) + .expect("File not found") +} + +/// Helper method to fetch the file size and date at given path and create a `FileMeta` +pub fn local_file_meta(file: String) -> FileMeta { + let metadata = fs::metadata(&file).expect("Local file metadata"); + FileMeta { + sized_file: SizedFile { + size: metadata.len(), + path: file, + }, + last_modified: metadata.modified().map(chrono::DateTime::from).ok(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -163,8 +207,8 @@ mod tests { let mut files = list_all(tmp.path().to_str().unwrap().to_string()).await?; while let Some(file) = files.next().await { let file = file?; - assert_eq!(file.size, 0); - all_files.insert(file.path); + assert_eq!(file.size(), 0); + all_files.insert(file.path().to_owned()); } assert_eq!(all_files.len(), 3); diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index fd25fd43a2e7..61bc47dc462c 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -20,57 +20,110 @@ pub mod local; use std::collections::HashMap; -use std::fmt::Debug; +use std::fmt::{self, Debug}; +use std::io::Read; use std::pin::Pin; use std::sync::{Arc, RwLock}; use async_trait::async_trait; -use futures::{AsyncRead, Stream}; +use chrono::{DateTime, Utc}; +use futures::{AsyncRead, Stream, StreamExt}; use local::LocalFileSystem; use crate::error::{DataFusionError, Result}; -use chrono::Utc; -/// Object Reader for one file in a object store +/// Object Reader for one file in an object store. +/// +/// Note that the dynamic dispatch on the reader might +/// have some performance impacts. #[async_trait] -pub trait ObjectReader { +pub trait ObjectReader: Send + Sync { /// Get reader for a part [start, start + length] in the file asynchronously async fn chunk_reader(&self, start: u64, length: usize) - -> Result>; + -> Result>; - /// Get length for the file + /// Get reader for a part [start, start + length] in the file + fn sync_chunk_reader( + &self, + start: u64, + length: usize, + ) -> Result>; + + /// Get reader for the entire file + fn sync_reader(&self) -> Result> { + self.sync_chunk_reader(0, self.length() as usize) + } + + /// Get the size of the file fn length(&self) -> u64; } -/// Represents a file or a prefix that may require further resolution +/// Represents a specific file or a prefix (folder) that may +/// require further resolution #[derive(Debug)] pub enum ListEntry { - /// File metadata + /// Specific file with metadata FileMeta(FileMeta), /// Prefix to be further resolved during partition discovery Prefix(String), } -/// File meta we got from object store -#[derive(Debug)] -pub struct FileMeta { - /// Path of the file +/// The path and size of the file. +#[derive(Debug, Clone)] +pub struct SizedFile { + /// Path of the file. It is relative to the current object + /// store (it does not specify the `xx://` scheme). pub path: String, - /// Last time the file was modified in UTC - pub last_modified: Option>, /// File size in total pub size: u64, } -/// Stream of files get listed from object store +/// Description of a file as returned by the listing command of a +/// given object store. The resulting path is relative to the +/// object store that generated it. +#[derive(Debug, Clone)] +pub struct FileMeta { + /// The path and size of the file. + pub sized_file: SizedFile, + /// The last modification time of the file according to the + /// object store metadata. This information might be used by + /// catalog systems like Delta Lake for time travel (see + /// https://github.com/delta-io/delta/issues/192) + pub last_modified: Option>, +} + +impl FileMeta { + /// The path that describes this file. It is relative to the + /// associated object store. + pub fn path(&self) -> &str { + &self.sized_file.path + } + + /// The size of the file. + pub fn size(&self) -> u64 { + self.sized_file.size + } +} + +impl std::fmt::Display for FileMeta { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} (size: {})", self.path(), self.size()) + } +} + +/// Stream of files listed from object store pub type FileMetaStream = Pin> + Send + Sync + 'static>>; -/// Stream of list entries get from object store +/// Stream of list entries obtained from object store pub type ListEntryStream = Pin> + Send + Sync + 'static>>; +/// Stream readers opened on a given object store +pub type ObjectReaderStream = + Pin>> + Send + Sync + 'static>>; + /// A ObjectStore abstracts access to an underlying file/object storage. /// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes #[async_trait] @@ -78,6 +131,23 @@ pub trait ObjectStore: Sync + Send + Debug { /// Returns all the files in path `prefix` async fn list_file(&self, prefix: &str) -> Result; + /// Calls `list_file` with a suffix filter + async fn list_file_with_suffix( + &self, + prefix: &str, + suffix: &str, + ) -> Result { + let file_stream = self.list_file(prefix).await?; + let suffix = suffix.to_owned(); + Ok(Box::pin(file_stream.filter(move |fr| { + let has_suffix = match fr { + Ok(f) => f.path().ends_with(&suffix), + Err(_) => true, + }; + async move { has_suffix } + }))) + } + /// Returns all the files in `prefix` if the `prefix` is already a leaf dir, /// or all paths between the `prefix` and the first occurrence of the `delimiter` if it is provided. async fn list_dir( @@ -87,7 +157,7 @@ pub trait ObjectStore: Sync + Send + Debug { ) -> Result; /// Get object reader for one file - fn file_reader(&self, file: FileMeta) -> Result>; + fn file_reader(&self, file: SizedFile) -> Result>; } static LOCAL_SCHEME: &str = "file"; @@ -100,6 +170,22 @@ pub struct ObjectStoreRegistry { pub object_stores: RwLock>>, } +impl fmt::Debug for ObjectStoreRegistry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ObjectStoreRegistry") + .field( + "schemes", + &self + .object_stores + .read() + .unwrap() + .keys() + .collect::>(), + ) + .finish() + } +} + impl ObjectStoreRegistry { /// Create the registry that object stores can registered into. /// ['LocalFileSystem'] store is registered in by default to support read local files natively. @@ -130,12 +216,17 @@ impl ObjectStoreRegistry { } /// Get a suitable store for the URI based on it's scheme. For example: - /// URI with scheme file or no schema will return the default LocalFS store, - /// URI with scheme s3 will return the S3 store if it's registered. - pub fn get_by_uri(&self, uri: &str) -> Result> { - if let Some((scheme, _)) = uri.split_once(':') { + /// - URI with scheme `file://` or no schema will return the default LocalFS store + /// - URI with scheme `s3://` will return the S3 store if it's registered + /// Returns a tuple with the store and the path of the file in that store + /// (URI=scheme://path). + pub fn get_by_uri<'a>( + &self, + uri: &'a str, + ) -> Result<(Arc, &'a str)> { + if let Some((scheme, path)) = uri.split_once("://") { let stores = self.object_stores.read().unwrap(); - stores + let store = stores .get(&*scheme.to_lowercase()) .map(Clone::clone) .ok_or_else(|| { @@ -143,9 +234,10 @@ impl ObjectStoreRegistry { "No suitable object store found for {}", scheme )) - }) + })?; + Ok((store, path)) } else { - Ok(Arc::new(LocalFileSystem)) + Ok((Arc::new(LocalFileSystem), uri)) } } } diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs deleted file mode 100644 index d044ed94d59d..000000000000 --- a/datafusion/src/datasource/parquet.rs +++ /dev/null @@ -1,677 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Parquet data source - -use std::any::Any; -use std::fs::File; -use std::sync::Arc; - -use async_trait::async_trait; -use parquet::arrow::ArrowReader; -use parquet::arrow::ParquetFileArrowReader; -use parquet::file::serialized_reader::SerializedFileReader; -use parquet::file::statistics::Statistics as ParquetStatistics; - -use super::datasource::TableProviderFilterPushDown; -use crate::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use crate::datasource::{ - create_max_min_accs, get_col_stats, FileAndSchema, PartitionedFile, TableDescriptor, - TableDescriptorBuilder, TableProvider, -}; -use crate::error::Result; -use crate::logical_plan::{combine_filters, Expr}; -use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; -use crate::physical_plan::parquet::ParquetExec; -use crate::physical_plan::{Accumulator, ExecutionPlan, Statistics}; -use crate::scalar::ScalarValue; - -/// Table-based representation of a `ParquetFile`. -pub struct ParquetTable { - /// Descriptor of the table, including schema, files, etc. - pub desc: Arc, - target_partitions: usize, - enable_pruning: bool, -} - -impl ParquetTable { - /// Attempt to initialize a new `ParquetTable` from a file path. - pub fn try_new(path: impl Into, target_partitions: usize) -> Result { - let path = path.into(); - let table_desc = ParquetTableDescriptor::new(path.as_str()); - Ok(Self { - desc: Arc::new(table_desc?), - target_partitions, - enable_pruning: true, - }) - } - - /// Attempt to initialize a new `ParquetTable` from a file path and known schema. - /// If collect_statistics is `false`, doesn't read files until necessary by scan - pub fn try_new_with_schema( - path: impl Into, - schema: Schema, - target_partitions: usize, - collect_statistics: bool, - ) -> Result { - let path = path.into(); - let table_desc = ParquetTableDescriptor::new_with_schema( - path.as_str(), - Some(schema), - collect_statistics, - ); - Ok(Self { - desc: Arc::new(table_desc?), - target_partitions, - enable_pruning: true, - }) - } - - /// Attempt to initialize a new `ParquetTable` from a table descriptor. - pub fn try_new_with_desc( - desc: Arc, - target_partitions: usize, - enable_pruning: bool, - ) -> Result { - Ok(Self { - desc, - target_partitions, - enable_pruning, - }) - } - - /// Get the path for the Parquet file(s) represented by this ParquetTable instance - pub fn path(&self) -> &str { - &self.desc.descriptor.path - } - - /// Get parquet pruning option - pub fn get_enable_pruning(&self) -> bool { - self.enable_pruning - } - - /// Set parquet pruning option - pub fn with_enable_pruning(mut self, enable_pruning: bool) -> Self { - self.enable_pruning = enable_pruning; - self - } - - /// Get Target partitions - pub fn get_target_partitions(&self) -> usize { - self.target_partitions - } -} - -#[async_trait] -impl TableProvider for ParquetTable { - fn as_any(&self) -> &dyn Any { - self - } - - /// Get the schema for this parquet file. - fn schema(&self) -> SchemaRef { - self.desc.schema() - } - - fn supports_filter_pushdown( - &self, - _filter: &Expr, - ) -> Result { - Ok(TableProviderFilterPushDown::Inexact) - } - - /// Scan the file(s), using the provided projection, and return one BatchIterator per - /// partition. - async fn scan( - &self, - projection: &Option>, - batch_size: usize, - filters: &[Expr], - limit: Option, - ) -> Result> { - // If enable pruning then combine the filters to build the predicate. - // If disable pruning then set the predicate to None, thus readers - // will not prune data based on the statistics. - let predicate = if self.enable_pruning { - combine_filters(filters) - } else { - None - }; - Ok(Arc::new(ParquetExec::try_new( - self.desc.clone(), - projection.clone(), - predicate, - limit - .map(|l| std::cmp::min(l, batch_size)) - .unwrap_or(batch_size), - self.target_partitions, - limit, - )?)) - } -} - -#[derive(Debug, Clone)] -/// Descriptor for a parquet root path -pub struct ParquetTableDescriptor { - /// metadata for files inside the root path - pub descriptor: TableDescriptor, -} - -impl ParquetTableDescriptor { - /// Construct a new parquet descriptor for a root path - pub fn new(root_path: &str) -> Result { - let table_desc = Self::build_table_desc(root_path, "parquet", None, true); - Ok(Self { - descriptor: table_desc?, - }) - } - - /// Construct a new parquet descriptor for a root path with known schema - pub fn new_with_schema( - root_path: &str, - schema: Option, - collect_statistics: bool, - ) -> Result { - let table_desc = - Self::build_table_desc(root_path, "parquet", schema, collect_statistics); - Ok(Self { - descriptor: table_desc?, - }) - } - - /// Get file schema for all parquet files - pub fn schema(&self) -> SchemaRef { - self.descriptor.schema.clone() - } - - fn summarize_min_max( - max_values: &mut Vec>, - min_values: &mut Vec>, - fields: &[Field], - i: usize, - stat: &ParquetStatistics, - ) { - match stat { - ParquetStatistics::Boolean(s) => { - if let DataType::Boolean = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value - .update(&[ScalarValue::Boolean(Some(*s.max()))]) - { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value - .update(&[ScalarValue::Boolean(Some(*s.min()))]) - { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Int32(s) => { - if let DataType::Int32 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ScalarValue::Int32(Some(*s.max()))]) - { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ScalarValue::Int32(Some(*s.min()))]) - { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Int64(s) => { - if let DataType::Int64 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ScalarValue::Int64(Some(*s.max()))]) - { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ScalarValue::Int64(Some(*s.min()))]) - { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Float(s) => { - if let DataType::Float32 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value - .update(&[ScalarValue::Float32(Some(*s.max()))]) - { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value - .update(&[ScalarValue::Float32(Some(*s.min()))]) - { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Double(s) => { - if let DataType::Float64 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value - .update(&[ScalarValue::Float64(Some(*s.max()))]) - { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value - .update(&[ScalarValue::Float64(Some(*s.min()))]) - { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - _ => {} - } - } -} - -impl TableDescriptorBuilder for ParquetTableDescriptor { - fn file_meta(path: &str) -> Result { - let file = File::open(path)?; - let file_reader = Arc::new(SerializedFileReader::new(file)?); - let mut arrow_reader = ParquetFileArrowReader::new(file_reader); - let path = path.to_string(); - let schema = arrow_reader.get_schema()?; - let num_fields = schema.fields().len(); - let fields = schema.fields().to_vec(); - let meta_data = arrow_reader.get_metadata(); - - let mut num_rows = 0; - let mut total_byte_size = 0; - let mut null_counts = vec![0; num_fields]; - let mut has_statistics = false; - - let (mut max_values, mut min_values) = create_max_min_accs(&schema); - - for row_group_meta in meta_data.row_groups() { - num_rows += row_group_meta.num_rows(); - total_byte_size += row_group_meta.total_byte_size(); - - let columns_null_counts = row_group_meta - .columns() - .iter() - .flat_map(|c| c.statistics().map(|stats| stats.null_count())); - - for (i, cnt) in columns_null_counts.enumerate() { - null_counts[i] += cnt as usize - } - - for (i, column) in row_group_meta.columns().iter().enumerate() { - if let Some(stat) = column.statistics() { - has_statistics = true; - ParquetTableDescriptor::summarize_min_max( - &mut max_values, - &mut min_values, - &fields, - i, - stat, - ) - } - } - } - - let column_stats = if has_statistics { - Some(get_col_stats( - &schema, - null_counts, - &mut max_values, - &mut min_values, - )) - } else { - None - }; - - let statistics = Statistics { - num_rows: Some(num_rows as usize), - total_byte_size: Some(total_byte_size as usize), - column_statistics: column_stats, - is_exact: true, - }; - - Ok(FileAndSchema { - file: PartitionedFile { path, statistics }, - schema, - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use arrow::array::{ - BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array, - TimestampNanosecondArray, - }; - use arrow::record_batch::RecordBatch; - use futures::StreamExt; - - #[tokio::test] - async fn read_small_batches() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = None; - let exec = table.scan(&projection, 2, &[], None).await?; - let stream = exec.execute(0).await?; - - let _ = stream - .map(|batch| { - let batch = batch.unwrap(); - assert_eq!(11, batch.num_columns()); - assert_eq!(2, batch.num_rows()); - }) - .fold(0, |acc, _| async move { acc + 1i32 }) - .await; - - // test metadata - assert_eq!(exec.statistics().num_rows, Some(8)); - assert_eq!(exec.statistics().total_byte_size, Some(671)); - - Ok(()) - } - - #[tokio::test] - async fn read_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - - let x: Vec = table - .schema() - .fields() - .iter() - .map(|f| format!("{}: {:?}", f.name(), f.data_type())) - .collect(); - let y = x.join("\n"); - assert_eq!( - "id: Int32\n\ - bool_col: Boolean\n\ - tinyint_col: Int32\n\ - smallint_col: Int32\n\ - int_col: Int32\n\ - bigint_col: Int64\n\ - float_col: Float32\n\ - double_col: Float64\n\ - date_string_col: Binary\n\ - string_col: Binary\n\ - timestamp_col: Timestamp(Nanosecond, None)", - y - ); - - let projection = None; - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(11, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - Ok(()) - } - - #[tokio::test] - async fn read_bool_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![1]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[true, false, true, false, true, false, true, false]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_i32_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![0]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values)); - - Ok(()) - } - - #[tokio::test] - async fn read_i96_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![10]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{:?}", values)); - - Ok(()) - } - - #[tokio::test] - async fn read_f32_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![6]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_f64_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![7]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec = vec![]; - for i in 0..batch.num_rows() { - values.push(array.value(i)); - } - - assert_eq!( - "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]", - format!("{:?}", values) - ); - - Ok(()) - } - - #[tokio::test] - async fn read_binary_alltypes_plain_parquet() -> Result<()> { - let table = load_table("alltypes_plain.parquet")?; - let projection = Some(vec![9]); - let batch = get_first_batch(table, &projection).await?; - - assert_eq!(1, batch.num_columns()); - assert_eq!(8, batch.num_rows()); - - let array = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - let mut values: Vec<&str> = vec![]; - for i in 0..batch.num_rows() { - values.push(std::str::from_utf8(array.value(i)).unwrap()); - } - - assert_eq!( - "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]", - format!("{:?}", values) - ); - - Ok(()) - } - - fn load_table(name: &str) -> Result> { - let testdata = crate::test_util::parquet_test_data(); - let filename = format!("{}/{}", testdata, name); - let table = ParquetTable::try_new(&filename, 2)?; - Ok(Arc::new(table)) - } - - async fn get_first_batch( - table: Arc, - projection: &Option>, - ) -> Result { - let exec = table.scan(projection, 1024, &[], None).await?; - let mut it = exec.execute(0).await?; - it.next() - .await - .expect("should have received at least one batch") - .map_err(|e| e.into()) - } - - #[test] - fn combine_zero_filters() { - let result = combine_filters(&[]); - assert_eq!(result, None); - } - - #[test] - fn combine_one_filter() { - use crate::logical_plan::{binary_expr, col, lit, Operator}; - let filter = binary_expr(col("c1"), Operator::Lt, lit(1)); - let result = combine_filters(&[filter.clone()]); - assert_eq!(result, Some(filter)); - } - - #[test] - fn combine_multiple_filters() { - use crate::logical_plan::{and, binary_expr, col, lit, Operator}; - let filter1 = binary_expr(col("c1"), Operator::Lt, lit(1)); - let filter2 = binary_expr(col("c2"), Operator::Lt, lit(2)); - let filter3 = binary_expr(col("c3"), Operator::Lt, lit(3)); - let result = - combine_filters(&[filter1.clone(), filter2.clone(), filter3.clone()]); - assert_eq!(result, Some(and(and(filter1, filter2), filter3))); - } -} diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 7272e57c5693..23667f5a6ec8 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -21,6 +21,13 @@ use crate::{ catalog::{CatalogList, MemoryCatalogList}, information_schema::CatalogWithInformationSchema, }, + datasource::file_format::{ + avro::AvroFormat, + csv::CsvFormat, + parquet::{ParquetFormat, DEFAULT_PARQUET_EXTENSION}, + FileFormat, + }, + datasource::listing::{ListingOptions, ListingTable}, logical_plan::{PlanType, ToStringifiedPlan}, optimizer::eliminate_limit::EliminateLimit, physical_optimizer::{ @@ -41,16 +48,14 @@ use std::{ use futures::{StreamExt, TryStreamExt}; use tokio::task::{self, JoinHandle}; -use arrow::csv; +use arrow::{csv, datatypes::SchemaRef}; use crate::catalog::{ catalog::{CatalogProvider, MemoryCatalogProvider}, schema::{MemorySchemaProvider, SchemaProvider}, ResolvedTableReference, TableReference, }; -use crate::datasource::csv::CsvFile; use crate::datasource::object_store::{ObjectStore, ObjectStoreRegistry}; -use crate::datasource::parquet::ParquetTable; use crate::datasource::TableProvider; use crate::error::{DataFusionError, Result}; use crate::execution::dataframe_impl::DataFrameImpl; @@ -68,9 +73,6 @@ use crate::physical_optimizer::coalesce_batches::CoalesceBatches; use crate::physical_optimizer::merge_exec::AddCoalescePartitionsExec; use crate::physical_optimizer::repartition::Repartition; -use crate::datasource::avro::AvroFile; -use crate::physical_plan::avro::AvroReadOptions; -use crate::physical_plan::csv::CsvReadOptions; use crate::physical_plan::planner::DefaultPhysicalPlanner; use crate::physical_plan::udf::ScalarUDF; use crate::physical_plan::ExecutionPlan; @@ -86,6 +88,8 @@ use chrono::{DateTime, Utc}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; +use super::options::{AvroReadOptions, CsvReadOptions}; + /// ExecutionContext is the main interface for executing queries with DataFusion. The context /// provides the following functionality: /// @@ -100,9 +104,10 @@ use parquet::file::properties::WriterProperties; /// ``` /// use datafusion::prelude::*; /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// # #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); -/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; +/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; /// let df = df.filter(col("a").lt_eq(col("b")))? /// .aggregate(vec![col("a")], vec![min(col("b"))])? /// .limit(100)?; @@ -117,10 +122,11 @@ use parquet::file::properties::WriterProperties; /// use datafusion::prelude::*; /// /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// # #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); -/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?; -/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?; +/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?; +/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; /// # Ok(()) /// # } /// ``` @@ -175,7 +181,10 @@ impl ExecutionContext { } /// Creates a dataframe that will execute a SQL query. - pub fn sql(&mut self, sql: &str) -> Result> { + /// + /// This method is `async` because queries of type `CREATE EXTERNAL TABLE` + /// might require the schema to be inferred. + pub async fn sql(&mut self, sql: &str) -> Result> { let plan = self.create_logical_plan(sql)?; match plan { LogicalPlan::CreateExternalTable { @@ -184,32 +193,49 @@ impl ExecutionContext { ref location, ref file_type, ref has_header, - } => match file_type { - FileType::CSV => { - let mut options = CsvReadOptions::new().has_header(*has_header); - let tmp_schema = schema.as_ref().to_owned().into(); - if !schema.fields().is_empty() { - options = options.schema(&tmp_schema); + } => { + let file_format = match file_type { + FileType::CSV => { + Ok(Arc::new(CsvFormat::default().with_has_header(*has_header)) + as Arc) } - self.register_csv(name, location, options)?; - let plan = LogicalPlanBuilder::empty(false).build()?; - Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))) - } - FileType::Parquet => { - self.register_parquet(name, location)?; - let plan = LogicalPlanBuilder::empty(false).build()?; - Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))) - } - FileType::Avro => { - self.register_avro(name, location, AvroReadOptions::default())?; - let plan = LogicalPlanBuilder::empty(false).build()?; - Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))) - } - _ => Err(DataFusionError::NotImplemented(format!( - "Unsupported file type {:?}.", - file_type - ))), - }, + FileType::Parquet => { + Ok(Arc::new(ParquetFormat::default()) as Arc) + } + FileType::Avro => { + Ok(Arc::new(AvroFormat::default()) as Arc) + } + _ => Err(DataFusionError::NotImplemented(format!( + "Unsupported file type {:?}.", + file_type + ))), + }?; + + let options = ListingOptions { + format: file_format, + collect_stat: false, + file_extension: String::new(), + target_partitions: self + .state + .lock() + .unwrap() + .config + .target_partitions, + partitions: vec![], + }; + + // TODO make schema in CreateExternalTable optional instead of empty + let provided_schema = if schema.fields().is_empty() { + None + } else { + Some(Arc::new(schema.as_ref().to_owned().into())) + }; + + self.register_listing_table(name, location, options, provided_schema) + .await?; + let plan = LogicalPlanBuilder::empty(false).build()?; + Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan))) + } plan => Ok(Arc::new(DataFrameImpl::new( self.state.clone(), @@ -281,42 +307,67 @@ impl ExecutionContext { /// Creates a DataFrame for reading an Avro data source. - pub fn read_avro( + pub async fn read_avro( &mut self, - filename: impl Into, - options: AvroReadOptions, + uri: impl Into, + options: AvroReadOptions<'_>, ) -> Result> { + let uri: String = uri.into(); + let (object_store, path) = self.object_store(&uri)?; Ok(Arc::new(DataFrameImpl::new( self.state.clone(), - &LogicalPlanBuilder::scan_avro(filename, options, None)?.build()?, + &LogicalPlanBuilder::scan_avro( + object_store, + path, + options, + None, + self.state.lock().unwrap().config.target_partitions, + ) + .await? + .build()?, ))) } /// Creates a DataFrame for reading a CSV data source. - pub fn read_csv( + pub async fn read_csv( &mut self, - filename: impl Into, - options: CsvReadOptions, + uri: impl Into, + options: CsvReadOptions<'_>, ) -> Result> { + let uri: String = uri.into(); + let (object_store, path) = self.object_store(&uri)?; Ok(Arc::new(DataFrameImpl::new( self.state.clone(), - &LogicalPlanBuilder::scan_csv(filename, options, None)?.build()?, + &LogicalPlanBuilder::scan_csv( + object_store, + path, + options, + None, + self.state.lock().unwrap().config.target_partitions, + ) + .await? + .build()?, ))) } /// Creates a DataFrame for reading a Parquet data source. - pub fn read_parquet( + pub async fn read_parquet( &mut self, - filename: impl Into, + uri: impl Into, ) -> Result> { + let uri: String = uri.into(); + let (object_store, path) = self.object_store(&uri)?; + let logical_plan = LogicalPlanBuilder::scan_parquet( + object_store, + path, + None, + self.state.lock().unwrap().config.target_partitions, + ) + .await? + .build()?; Ok(Arc::new(DataFrameImpl::new( self.state.clone(), - &LogicalPlanBuilder::scan_parquet( - filename, - None, - self.state.lock().unwrap().config.target_partitions, - )? - .build()?, + &logical_plan, ))) } @@ -331,39 +382,88 @@ impl ExecutionContext { ))) } + /// Registers a table that uses the listing feature of the object store to + /// find the files to be processed + /// This is async because it might need to resolve the schema. + pub async fn register_listing_table<'a>( + &'a mut self, + name: &'a str, + uri: &'a str, + options: ListingOptions, + provided_schema: Option, + ) -> Result<()> { + let (object_store, path) = self.object_store(uri)?; + let resolved_schema = match provided_schema { + None => { + options + .infer_schema(Arc::clone(&object_store), path) + .await? + } + Some(s) => s, + }; + let table = + ListingTable::new(object_store, path.to_owned(), resolved_schema, options); + self.register_table(name, Arc::new(table))?; + Ok(()) + } + /// Registers a CSV data source so that it can be referenced from SQL statements /// executed against this context. - pub fn register_csv( + pub async fn register_csv( &mut self, name: &str, - filename: &str, - options: CsvReadOptions, + uri: &str, + options: CsvReadOptions<'_>, ) -> Result<()> { - self.register_table(name, Arc::new(CsvFile::try_new(filename, options)?))?; + let listing_options = options + .to_listing_options(self.state.lock().unwrap().config.target_partitions); + + self.register_listing_table( + name, + uri, + listing_options, + options.schema.map(|s| Arc::new(s.to_owned())), + ) + .await?; + Ok(()) } /// Registers a Parquet data source so that it can be referenced from SQL statements /// executed against this context. - pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> { - let table = { + pub async fn register_parquet(&mut self, name: &str, uri: &str) -> Result<()> { + let (target_partitions, enable_pruning) = { let m = self.state.lock().unwrap(); - ParquetTable::try_new(filename, m.config.target_partitions)? - .with_enable_pruning(m.config.parquet_pruning) + (m.config.target_partitions, m.config.parquet_pruning) }; - self.register_table(name, Arc::new(table))?; + let file_format = ParquetFormat::default().with_enable_pruning(enable_pruning); + + let listing_options = ListingOptions { + format: Arc::new(file_format), + collect_stat: true, + file_extension: DEFAULT_PARQUET_EXTENSION.to_owned(), + target_partitions, + partitions: vec![], + }; + + self.register_listing_table(name, uri, listing_options, None) + .await?; Ok(()) } /// Registers an Avro data source so that it can be referenced from SQL statements /// executed against this context. - pub fn register_avro( + pub async fn register_avro( &mut self, name: &str, - filename: &str, - options: AvroReadOptions, + uri: &str, + options: AvroReadOptions<'_>, ) -> Result<()> { - self.register_table(name, Arc::new(AvroFile::try_new(filename, options)?))?; + let listing_options = options + .to_listing_options(self.state.lock().unwrap().config.target_partitions); + + self.register_listing_table(name, uri, listing_options, options.schema) + .await?; Ok(()) } @@ -417,8 +517,15 @@ impl ExecutionContext { } /// Retrieves a `ObjectStore` instance by scheme - pub fn object_store(&self, scheme: &str) -> Option> { - self.state.lock().unwrap().object_store_registry.get(scheme) + pub fn object_store<'a>( + &self, + uri: &'a str, + ) -> Result<(Arc, &'a str)> { + self.state + .lock() + .unwrap() + .object_store_registry + .get_by_uri(uri) } /// Registers a table using a custom `TableProvider` so that @@ -1176,7 +1283,7 @@ mod tests { async fn create_variable_expr() -> Result<()> { let tmp_dir = TempDir::new()?; let partition_count = 4; - let mut ctx = create_ctx(&tmp_dir, partition_count)?; + let mut ctx = create_ctx(&tmp_dir, partition_count).await?; let variable_provider = test::variable::SystemVar::new(); ctx.register_variable(VarType::System, Arc::new(variable_provider)); @@ -1205,7 +1312,7 @@ mod tests { async fn register_deregister() -> Result<()> { let tmp_dir = TempDir::new()?; let partition_count = 4; - let mut ctx = create_ctx(&tmp_dir, partition_count)?; + let mut ctx = create_ctx(&tmp_dir, partition_count).await?; let provider = test::create_table_dual(); ctx.register_table("dual", provider)?; @@ -1220,7 +1327,7 @@ mod tests { async fn parallel_query_with_filter() -> Result<()> { let tmp_dir = TempDir::new()?; let partition_count = 4; - let ctx = create_ctx(&tmp_dir, partition_count)?; + let ctx = create_ctx(&tmp_dir, partition_count).await?; let logical_plan = ctx.create_logical_plan("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")?; @@ -1275,7 +1382,7 @@ mod tests { async fn projection_on_table_scan() -> Result<()> { let tmp_dir = TempDir::new()?; let partition_count = 4; - let ctx = create_ctx(&tmp_dir, partition_count)?; + let ctx = create_ctx(&tmp_dir, partition_count).await?; let table = ctx.table("test")?; let logical_plan = LogicalPlanBuilder::from(table.to_logical_plan()) @@ -1316,7 +1423,7 @@ mod tests { #[tokio::test] async fn preserve_nullability_on_projection() -> Result<()> { let tmp_dir = TempDir::new()?; - let ctx = create_ctx(&tmp_dir, 1)?; + let ctx = create_ctx(&tmp_dir, 1).await?; let schema: Schema = ctx.table("test").unwrap().schema().clone().into(); assert!(!schema.field_with_name("c1")?.is_nullable()); @@ -1910,7 +2017,7 @@ mod tests { #[tokio::test] async fn aggregate_timestamps_sum() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -1929,7 +2036,7 @@ mod tests { #[tokio::test] async fn aggregate_timestamps_count() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -1955,7 +2062,7 @@ mod tests { #[tokio::test] async fn aggregate_timestamps_min() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -1981,7 +2088,7 @@ mod tests { #[tokio::test] async fn aggregate_timestamps_max() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -2007,7 +2114,7 @@ mod tests { #[tokio::test] async fn aggregate_timestamps_avg() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -2063,7 +2170,7 @@ mod tests { #[tokio::test] async fn join_timestamp() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_timestamps()) .unwrap(); @@ -2190,7 +2297,8 @@ mod tests { "test", tmp_dir.path().to_str().unwrap(), CsvReadOptions::new().schema(&schema).has_header(false), - )?; + ) + .await?; let results = plan_and_collect( &mut ctx, @@ -2420,7 +2528,8 @@ mod tests { "test", tmp_dir.path().to_str().unwrap(), CsvReadOptions::new().schema(&schema).has_header(false), - )?; + ) + .await?; let results = plan_and_collect( &mut ctx, @@ -2507,7 +2616,7 @@ mod tests { #[tokio::test] async fn aggregate_with_alias() -> Result<()> { let tmp_dir = TempDir::new()?; - let ctx = create_ctx(&tmp_dir, 1)?; + let ctx = create_ctx(&tmp_dir, 1).await?; let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8, false), @@ -2533,7 +2642,7 @@ mod tests { #[tokio::test] async fn limit() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; ctx.register_table("t", test::table_with_sequence(1, 1000).unwrap()) .unwrap(); @@ -2573,7 +2682,7 @@ mod tests { #[tokio::test] async fn limit_multi_partitions() -> Result<()> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 1)?; + let mut ctx = create_ctx(&tmp_dir, 1).await?; let partitions = vec![ vec![test::make_partition(0)], @@ -2841,7 +2950,7 @@ mod tests { async fn write_csv_results() -> Result<()> { // create partitioned input file and context let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 4)?; + let mut ctx = create_ctx(&tmp_dir, 4).await?; // execute a simple query and write the results to CSV let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out"; @@ -2857,8 +2966,10 @@ mod tests { // register each partition as well as the top level dir let csv_read_option = CsvReadOptions::new().schema(&schema); - ctx.register_csv("part0", &format!("{}/part-0.csv", out_dir), csv_read_option)?; - ctx.register_csv("allparts", &out_dir, csv_read_option)?; + ctx.register_csv("part0", &format!("{}/part-0.csv", out_dir), csv_read_option) + .await?; + ctx.register_csv("allparts", &out_dir, csv_read_option) + .await?; let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?; let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?; @@ -2876,7 +2987,7 @@ mod tests { async fn write_parquet_results() -> Result<()> { // create partitioned input file and context let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, 4)?; + let mut ctx = create_ctx(&tmp_dir, 4).await?; // execute a simple query and write the results to CSV let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out"; @@ -2886,11 +2997,15 @@ mod tests { let mut ctx = ExecutionContext::new(); // register each partition as well as the top level dir - ctx.register_parquet("part0", &format!("{}/part-0.parquet", out_dir))?; - ctx.register_parquet("part1", &format!("{}/part-1.parquet", out_dir))?; - ctx.register_parquet("part2", &format!("{}/part-2.parquet", out_dir))?; - ctx.register_parquet("part3", &format!("{}/part-3.parquet", out_dir))?; - ctx.register_parquet("allparts", &out_dir)?; + ctx.register_parquet("part0", &format!("{}/part-0.parquet", out_dir)) + .await?; + ctx.register_parquet("part1", &format!("{}/part-1.parquet", out_dir)) + .await?; + ctx.register_parquet("part2", &format!("{}/part-2.parquet", out_dir)) + .await?; + ctx.register_parquet("part3", &format!("{}/part-3.parquet", out_dir)) + .await?; + ctx.register_parquet("allparts", &out_dir).await?; let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?; let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?; @@ -2919,7 +3034,8 @@ mod tests { CsvReadOptions::new() .schema(&schema) .file_extension(file_extension), - )?; + ) + .await?; let results = plan_and_collect(&mut ctx, "SELECT SUM(c1), SUM(c2), COUNT(*) FROM test") .await?; @@ -2943,7 +3059,7 @@ mod tests { // environment. Usecase is for concurrent planing. let tmp_dir = TempDir::new()?; let partition_count = 4; - let ctx = Arc::new(Mutex::new(create_ctx(&tmp_dir, partition_count)?)); + let ctx = Arc::new(Mutex::new(create_ctx(&tmp_dir, partition_count).await?)); let threads: Vec>> = (0..2) .map(|_| ctx.clone()) @@ -2972,7 +3088,9 @@ mod tests { let opt_plan1 = ctx.optimize(&plan1)?; - let plan2 = ctx.sql("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE")?; + let plan2 = ctx + .sql("SELECT * FROM (SELECT 1) AS one WHERE TRUE AND TRUE") + .await?; assert_eq!( format!("{:?}", opt_plan1), @@ -3174,7 +3292,7 @@ mod tests { ExecutionConfig::new().with_query_planner(Arc::new(MyQueryPlanner {})), ); - let df = ctx.sql("SELECT 1")?; + let df = ctx.sql("SELECT 1").await?; df.collect().await.expect_err("query not supported"); Ok(()) } @@ -3662,7 +3780,7 @@ mod tests { )); assert!(matches!( - ctx.sql("select * from datafusion.public.test"), + ctx.sql("select * from datafusion.public.test").await, Err(DataFusionError::Plan(_)) )); @@ -3876,6 +3994,7 @@ mod tests { let mut ctx = ExecutionContext::new(); let df = ctx .read_parquet(table_dir.to_str().unwrap().to_string()) + .await .unwrap(); let result = df.collect().await.unwrap(); @@ -3928,13 +4047,13 @@ mod tests { ctx: &mut ExecutionContext, sql: &str, ) -> Result> { - ctx.sql(sql)?.collect().await + ctx.sql(sql).await?.collect().await } /// Execute SQL and return results async fn execute(sql: &str, partition_count: usize) -> Result> { let tmp_dir = TempDir::new()?; - let mut ctx = create_ctx(&tmp_dir, partition_count)?; + let mut ctx = create_ctx(&tmp_dir, partition_count).await?; plan_and_collect(&mut ctx, sql).await } @@ -3994,7 +4113,10 @@ mod tests { } /// Generate a partitioned CSV file and register it with an execution context - fn create_ctx(tmp_dir: &TempDir, partition_count: usize) -> Result { + async fn create_ctx( + tmp_dir: &TempDir, + partition_count: usize, + ) -> Result { let mut ctx = ExecutionContext::with_config( ExecutionConfig::new().with_target_partitions(8), ); @@ -4006,7 +4128,8 @@ mod tests { "test", tmp_dir.path().to_str().unwrap(), CsvReadOptions::new().schema(&schema), - )?; + ) + .await?; Ok(ctx) } diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs index 9971955a6db0..18a558ef7114 100644 --- a/datafusion/src/execution/dataframe_impl.rs +++ b/datafusion/src/execution/dataframe_impl.rs @@ -230,22 +230,23 @@ mod tests { use std::vec; use super::*; + use crate::execution::options::CsvReadOptions; use crate::logical_plan::*; use crate::physical_plan::functions::Volatility; + use crate::physical_plan::ColumnarValue; use crate::{assert_batches_sorted_eq, execution::context::ExecutionContext}; - use crate::{datasource::csv::CsvReadOptions, physical_plan::ColumnarValue}; use crate::{physical_plan::functions::ScalarFunctionImplementation, test}; use arrow::datatypes::DataType; - #[test] - fn select_columns() -> Result<()> { + #[tokio::test] + async fn select_columns() -> Result<()> { // build plan using Table API - let t = test_table()?; + let t = test_table().await?; let t2 = t.select_columns(&["c1", "c2", "c11"])?; let plan = t2.to_logical_plan(); // build query using SQL - let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?; + let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100").await?; // the two plans should be identical assert_same_plan(&plan, &sql_plan); @@ -253,15 +254,15 @@ mod tests { Ok(()) } - #[test] - fn select_expr() -> Result<()> { + #[tokio::test] + async fn select_expr() -> Result<()> { // build plan using Table API - let t = test_table()?; + let t = test_table().await?; let t2 = t.select(vec![col("c1"), col("c2"), col("c11")])?; let plan = t2.to_logical_plan(); // build query using SQL - let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?; + let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100").await?; // the two plans should be identical assert_same_plan(&plan, &sql_plan); @@ -272,7 +273,7 @@ mod tests { #[tokio::test] async fn aggregate() -> Result<()> { // build plan using DataFrame API - let df = test_table()?; + let df = test_table().await?; let group_expr = vec![col("c1")]; let aggr_expr = vec![ min(col("c12")), @@ -305,8 +306,10 @@ mod tests { #[tokio::test] async fn join() -> Result<()> { - let left = test_table()?.select_columns(&["c1", "c2"])?; - let right = test_table_with_name("c2")?.select_columns(&["c1", "c3"])?; + let left = test_table().await?.select_columns(&["c1", "c2"])?; + let right = test_table_with_name("c2") + .await? + .select_columns(&["c1", "c3"])?; let left_rows = left.collect().await?; let right_rows = right.collect().await?; let join = left.join(right, JoinType::Inner, &["c1"], &["c1"])?; @@ -317,16 +320,16 @@ mod tests { Ok(()) } - #[test] - fn limit() -> Result<()> { + #[tokio::test] + async fn limit() -> Result<()> { // build query using Table API - let t = test_table()?; + let t = test_table().await?; let t2 = t.select_columns(&["c1", "c2", "c11"])?.limit(10)?; let plan = t2.to_logical_plan(); // build query using SQL let sql_plan = - create_plan("SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?; + create_plan("SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10").await?; // the two plans should be identical assert_same_plan(&plan, &sql_plan); @@ -334,10 +337,10 @@ mod tests { Ok(()) } - #[test] - fn explain() -> Result<()> { + #[tokio::test] + async fn explain() -> Result<()> { // build query using Table API - let df = test_table()?; + let df = test_table().await?; let df = df .select_columns(&["c1", "c2", "c11"])? .limit(10)? @@ -346,7 +349,8 @@ mod tests { // build query using SQL let sql_plan = - create_plan("EXPLAIN SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?; + create_plan("EXPLAIN SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10") + .await?; // the two plans should be identical assert_same_plan(&plan, &sql_plan); @@ -354,10 +358,10 @@ mod tests { Ok(()) } - #[test] - fn registry() -> Result<()> { + #[tokio::test] + async fn registry() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx, "aggregate_test_100")?; + register_aggregate_csv(&mut ctx, "aggregate_test_100").await?; // declare the udf let my_fn: ScalarFunctionImplementation = @@ -392,7 +396,7 @@ mod tests { #[tokio::test] async fn sendable() { - let df = test_table().unwrap(); + let df = test_table().await.unwrap(); // dataframes should be sendable between threads/tasks let task = tokio::task::spawn(async move { df.select_columns(&["c1"]) @@ -407,23 +411,23 @@ mod tests { } /// Create a logical plan from a SQL query - fn create_plan(sql: &str) -> Result { + async fn create_plan(sql: &str) -> Result { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx, "aggregate_test_100")?; + register_aggregate_csv(&mut ctx, "aggregate_test_100").await?; ctx.create_logical_plan(sql) } - fn test_table_with_name(name: &str) -> Result> { + async fn test_table_with_name(name: &str) -> Result> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx, name)?; + register_aggregate_csv(&mut ctx, name).await?; ctx.table(name) } - fn test_table() -> Result> { - test_table_with_name("aggregate_test_100") + async fn test_table() -> Result> { + test_table_with_name("aggregate_test_100").await } - fn register_aggregate_csv( + async fn register_aggregate_csv( ctx: &mut ExecutionContext, table_name: &str, ) -> Result<()> { @@ -433,7 +437,8 @@ mod tests { table_name, &format!("{}/csv/aggregate_test_100.csv", testdata), CsvReadOptions::new().schema(schema.as_ref()), - )?; + ) + .await?; Ok(()) } } diff --git a/datafusion/src/execution/mod.rs b/datafusion/src/execution/mod.rs index ff44dd43f834..e353a3160b8d 100644 --- a/datafusion/src/execution/mod.rs +++ b/datafusion/src/execution/mod.rs @@ -19,3 +19,4 @@ pub mod context; pub mod dataframe_impl; +pub mod options; diff --git a/datafusion/src/execution/options.rs b/datafusion/src/execution/options.rs new file mode 100644 index 000000000000..f0ed6f24c325 --- /dev/null +++ b/datafusion/src/execution/options.rs @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! User facing options for the file formats readers + +use std::sync::Arc; + +use arrow::datatypes::{Schema, SchemaRef}; + +use crate::datasource::{ + file_format::{avro::AvroFormat, csv::CsvFormat}, + listing::ListingOptions, +}; + +/// CSV file read option +#[derive(Copy, Clone)] +pub struct CsvReadOptions<'a> { + /// Does the CSV file have a header? + /// + /// If schema inference is run on a file with no headers, default column names + /// are created. + pub has_header: bool, + /// An optional column delimiter. Defaults to `b','`. + pub delimiter: u8, + /// An optional schema representing the CSV files. If None, CSV reader will try to infer it + /// based on data in file. + pub schema: Option<&'a Schema>, + /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000. + pub schema_infer_max_records: usize, + /// File extension; only files with this extension are selected for data input. + /// Defaults to ".csv". + pub file_extension: &'a str, +} + +impl<'a> CsvReadOptions<'a> { + /// Create a CSV read option with default presets + pub fn new() -> Self { + Self { + has_header: true, + schema: None, + schema_infer_max_records: 1000, + delimiter: b',', + file_extension: ".csv", + } + } + + /// Configure has_header setting + pub fn has_header(mut self, has_header: bool) -> Self { + self.has_header = has_header; + self + } + + /// Specify delimiter to use for CSV read + pub fn delimiter(mut self, delimiter: u8) -> Self { + self.delimiter = delimiter; + self + } + + /// Specify the file extension for CSV file selection + pub fn file_extension(mut self, file_extension: &'a str) -> Self { + self.file_extension = file_extension; + self + } + + /// Configure delimiter setting with Option, None value will be ignored + pub fn delimiter_option(mut self, delimiter: Option) -> Self { + if let Some(d) = delimiter { + self.delimiter = d; + } + self + } + + /// Specify schema to use for CSV read + pub fn schema(mut self, schema: &'a Schema) -> Self { + self.schema = Some(schema); + self + } + + /// Configure number of max records to read for schema inference + pub fn schema_infer_max_records(mut self, max_records: usize) -> Self { + self.schema_infer_max_records = max_records; + self + } + + /// Helper to convert these user facing options to `ListingTable` options + pub fn to_listing_options(&self, target_partitions: usize) -> ListingOptions { + let file_format = CsvFormat::default() + .with_has_header(self.has_header) + .with_delimiter(self.delimiter) + .with_schema_infer_max_rec(Some(self.schema_infer_max_records)); + + ListingOptions { + format: Arc::new(file_format), + collect_stat: false, + file_extension: self.file_extension.to_owned(), + target_partitions, + partitions: vec![], + } + } +} + +/// Avro read options +#[derive(Clone)] +pub struct AvroReadOptions<'a> { + /// The data source schema. + pub schema: Option, + + /// File extension; only files with this extension are selected for data input. + /// Defaults to ".avro". + pub file_extension: &'a str, +} + +impl<'a> Default for AvroReadOptions<'a> { + fn default() -> Self { + Self { + schema: None, + file_extension: ".avro", + } + } +} + +impl<'a> AvroReadOptions<'a> { + /// Helper to convert these user facing options to `ListingTable` options + pub fn to_listing_options(&self, target_partitions: usize) -> ListingOptions { + let file_format = AvroFormat::default(); + + ListingOptions { + format: Arc::new(file_format), + collect_stat: false, + file_extension: self.file_extension.to_owned(), + target_partitions, + partitions: vec![], + } + } +} + +/// Line-delimited JSON read options +#[derive(Clone)] +pub struct NdJsonReadOptions<'a> { + /// The data source schema. + pub schema: Option, + + /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000. + pub schema_infer_max_records: usize, + + /// File extension; only files with this extension are selected for data input. + /// Defaults to ".json". + pub file_extension: &'a str, +} + +impl<'a> Default for NdJsonReadOptions<'a> { + fn default() -> Self { + Self { + schema: None, + schema_infer_max_records: 1000, + file_extension: ".json", + } + } +} diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs index adaca114d2d6..a4a5a88d16b5 100644 --- a/datafusion/src/lib.rs +++ b/datafusion/src/lib.rs @@ -46,7 +46,7 @@ //! let mut ctx = ExecutionContext::new(); //! //! // create the dataframe -//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; +//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; //! //! // create a plan //! let df = df.filter(col("a").lt_eq(col("b")))? @@ -83,10 +83,10 @@ //! # async fn main() -> Result<()> { //! let mut ctx = ExecutionContext::new(); //! -//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?; +//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?; //! //! // create a plan -//! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?; +//! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; //! //! // execute the plan //! let results: Vec = df.collect().await?; diff --git a/datafusion/src/logical_plan/builder.rs b/datafusion/src/logical_plan/builder.rs index d4f941a996d2..3a1d12735658 100644 --- a/datafusion/src/logical_plan/builder.rs +++ b/datafusion/src/logical_plan/builder.rs @@ -27,21 +27,23 @@ use arrow::{ record_batch::RecordBatch, }; -use crate::error::{DataFusionError, Result}; -use crate::{datasource::TableProvider, logical_plan::plan::ToStringifiedPlan}; -use crate::{ - datasource::{empty::EmptyTable, parquet::ParquetTable, CsvFile, MemTable}, - prelude::CsvReadOptions, +use crate::datasource::{ + empty::EmptyTable, + file_format::parquet::{ParquetFormat, DEFAULT_PARQUET_EXTENSION}, + listing::{ListingOptions, ListingTable}, + object_store::ObjectStore, + MemTable, TableProvider, }; +use crate::error::{DataFusionError, Result}; +use crate::logical_plan::plan::ToStringifiedPlan; +use crate::prelude::*; use super::dfschema::ToDFSchema; use super::{exprlist_to_fields, Expr, JoinConstraint, JoinType, LogicalPlan, PlanType}; -use crate::datasource::avro::AvroFile; use crate::logical_plan::{ columnize_expr, normalize_col, normalize_cols, Column, DFField, DFSchema, DFSchemaRef, Partitioning, }; -use crate::physical_plan::avro::AvroReadOptions; /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; @@ -120,66 +122,146 @@ impl LogicalPlanBuilder { } /// Scan a CSV data source - pub fn scan_csv( + pub async fn scan_csv( + object_store: Arc, path: impl Into, - options: CsvReadOptions, + options: CsvReadOptions<'_>, projection: Option>, + target_partitions: usize, ) -> Result { let path = path.into(); - Self::scan_csv_with_name(path.clone(), options, projection, path) + Self::scan_csv_with_name( + object_store, + path.clone(), + options, + projection, + path, + target_partitions, + ) + .await } /// Scan a CSV data source and register it with a given table name - pub fn scan_csv_with_name( + pub async fn scan_csv_with_name( + object_store: Arc, path: impl Into, - options: CsvReadOptions, + options: CsvReadOptions<'_>, projection: Option>, table_name: impl Into, + target_partitions: usize, ) -> Result { - let provider = Arc::new(CsvFile::try_new(path, options)?); - Self::scan(table_name, provider, projection) + let listing_options = options.to_listing_options(target_partitions); + + let path: String = path.into(); + + let resolved_schema = match options.schema { + Some(s) => Arc::new(s.to_owned()), + None => { + listing_options + .infer_schema(Arc::clone(&object_store), &path) + .await? + } + }; + let provider = + ListingTable::new(object_store, path, resolved_schema, listing_options); + + Self::scan(table_name, Arc::new(provider), projection) } /// Scan a Parquet data source - pub fn scan_parquet( + pub async fn scan_parquet( + object_store: Arc, path: impl Into, projection: Option>, target_partitions: usize, ) -> Result { let path = path.into(); - Self::scan_parquet_with_name(path.clone(), projection, target_partitions, path) + Self::scan_parquet_with_name( + object_store, + path.clone(), + projection, + target_partitions, + path, + ) + .await } /// Scan a Parquet data source and register it with a given table name - pub fn scan_parquet_with_name( + pub async fn scan_parquet_with_name( + object_store: Arc, path: impl Into, projection: Option>, target_partitions: usize, table_name: impl Into, ) -> Result { - let provider = Arc::new(ParquetTable::try_new(path, target_partitions)?); - Self::scan(table_name, provider, projection) + // TODO remove hard coded enable_pruning + let file_format = ParquetFormat::default().with_enable_pruning(true); + + let listing_options = ListingOptions { + format: Arc::new(file_format), + collect_stat: true, + file_extension: DEFAULT_PARQUET_EXTENSION.to_owned(), + target_partitions, + partitions: vec![], + }; + + let path: String = path.into(); + + // with parquet we resolve the schema in all cases + let resolved_schema = listing_options + .infer_schema(Arc::clone(&object_store), &path) + .await?; + + let provider = + ListingTable::new(object_store, path, resolved_schema, listing_options); + Self::scan(table_name, Arc::new(provider), projection) } /// Scan an Avro data source - pub fn scan_avro( + pub async fn scan_avro( + object_store: Arc, path: impl Into, - options: AvroReadOptions, + options: AvroReadOptions<'_>, projection: Option>, + target_partitions: usize, ) -> Result { let path = path.into(); - Self::scan_avro_with_name(path.clone(), options, projection, path) + Self::scan_avro_with_name( + object_store, + path.clone(), + options, + projection, + path, + target_partitions, + ) + .await } /// Scan an Avro data source and register it with a given table name - pub fn scan_avro_with_name( + pub async fn scan_avro_with_name( + object_store: Arc, path: impl Into, - options: AvroReadOptions, + options: AvroReadOptions<'_>, projection: Option>, table_name: impl Into, + target_partitions: usize, ) -> Result { - let provider = Arc::new(AvroFile::try_new(&path.into(), options)?); - Self::scan(table_name, provider, projection) + let listing_options = options.to_listing_options(target_partitions); + + let path: String = path.into(); + + let resolved_schema = match options.schema { + Some(s) => s, + None => { + listing_options + .infer_schema(Arc::clone(&object_store), &path) + .await? + } + }; + let provider = + ListingTable::new(object_store, path, resolved_schema, listing_options); + + Self::scan(table_name, Arc::new(provider), projection) } /// Scan an empty data source, mainly used in tests @@ -198,6 +280,16 @@ impl LogicalPlanBuilder { table_name: impl Into, provider: Arc, projection: Option>, + ) -> Result { + Self::scan_with_filters(table_name, provider, projection, vec![]) + } + + /// Convert a table provider into a builder with a TableScan + pub fn scan_with_filters( + table_name: impl Into, + provider: Arc, + projection: Option>, + filters: Vec, ) -> Result { let table_name = table_name.into(); @@ -229,7 +321,7 @@ impl LogicalPlanBuilder { source: provider, projected_schema: Arc::new(projected_schema), projection, - filters: vec![], + filters, limit: None, }; diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 8ef69e9b0cfe..d50d5331ff1a 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -2221,4 +2221,27 @@ mod tests { assert!(exp2 < exp3); assert!(exp3 > exp2); } + + #[test] + fn combine_zero_filters() { + let result = combine_filters(&[]); + assert_eq!(result, None); + } + + #[test] + fn combine_one_filter() { + let filter = binary_expr(col("c1"), Operator::Lt, lit(1)); + let result = combine_filters(&[filter.clone()]); + assert_eq!(result, Some(filter)); + } + + #[test] + fn combine_multiple_filters() { + let filter1 = binary_expr(col("c1"), Operator::Lt, lit(1)); + let filter2 = binary_expr(col("c2"), Operator::Lt, lit(2)); + let filter3 = binary_expr(col("c3"), Operator::Lt, lit(3)); + let result = + combine_filters(&[filter1.clone(), filter2.clone(), filter3.clone()]); + assert_eq!(result, Some(and(and(filter1, filter2), filter3))); + } } diff --git a/datafusion/src/physical_optimizer/repartition.rs b/datafusion/src/physical_optimizer/repartition.rs index 61266e442c98..af47d86f65bc 100644 --- a/datafusion/src/physical_optimizer/repartition.rs +++ b/datafusion/src/physical_optimizer/repartition.rs @@ -109,28 +109,23 @@ mod tests { use arrow::datatypes::Schema; use super::*; + use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::PartitionedFile; - use crate::physical_plan::metrics::ExecutionPlanMetricsSet; - use crate::physical_plan::parquet::{ParquetExec, ParquetPartition}; + use crate::physical_plan::file_format::ParquetExec; use crate::physical_plan::projection::ProjectionExec; use crate::physical_plan::Statistics; #[test] fn added_repartition_to_single_partition() -> Result<()> { let schema = Arc::new(Schema::empty()); - let metrics = ExecutionPlanMetricsSet::new(); let parquet_project = ProjectionExec::try_new( vec![], Arc::new(ParquetExec::new( - vec![ParquetPartition::new( - vec![PartitionedFile::from("x".to_string())], - 0, - metrics.clone(), - )], + Arc::new(LocalFileSystem {}), + vec![vec![PartitionedFile::new("x".to_string(), 100)]], + Statistics::default(), schema, None, - Statistics::default(), - metrics, None, 2048, None, @@ -157,21 +152,16 @@ mod tests { #[test] fn repartition_deepest_node() -> Result<()> { let schema = Arc::new(Schema::empty()); - let metrics = ExecutionPlanMetricsSet::new(); let parquet_project = ProjectionExec::try_new( vec![], Arc::new(ProjectionExec::try_new( vec![], Arc::new(ParquetExec::new( - vec![ParquetPartition::new( - vec![PartitionedFile::from("x".to_string())], - 0, - metrics.clone(), - )], + Arc::new(LocalFileSystem {}), + vec![vec![PartitionedFile::new("x".to_string(), 100)]], + Statistics::default(), schema, None, - Statistics::default(), - metrics, None, 2048, None, diff --git a/datafusion/src/physical_plan/avro.rs b/datafusion/src/physical_plan/avro.rs deleted file mode 100644 index 3f0b007b26c0..000000000000 --- a/datafusion/src/physical_plan/avro.rs +++ /dev/null @@ -1,457 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Execution plan for reading line-delimited Avro files -#[cfg(feature = "avro")] -use super::RecordBatchStream; -use super::{common, source::Source, ExecutionPlan, Partitioning}; -use crate::avro_to_arrow::read_avro_schema_from_reader; -use crate::error::{DataFusionError, Result}; -use crate::physical_plan::{DisplayFormatType, Statistics}; -use arrow::datatypes::{Schema, SchemaRef}; -#[cfg(feature = "avro")] -use arrow::{error::Result as ArrowResult, record_batch::RecordBatch}; -use async_trait::async_trait; -#[cfg(feature = "avro")] -use futures::Stream; -use std::fs::File; -use std::{any::Any, io::Seek}; -use std::{ - io::Read, - sync::{Arc, Mutex}, -}; -#[cfg(feature = "avro")] -use std::{ - pin::Pin, - task::{Context, Poll}, -}; - -/// Line-delimited Avro read options -#[derive(Clone)] -pub struct AvroReadOptions<'a> { - /// The data source schema. - pub schema: Option, - - /// File extension; only files with this extension are selected for data input. - /// Defaults to ".avro". - pub file_extension: &'a str, -} - -impl<'a> Default for AvroReadOptions<'a> { - fn default() -> Self { - Self { - schema: None, - file_extension: ".avro", - } - } -} - -trait SeekRead: Read + Seek {} - -impl SeekRead for T {} -/// Execution plan for scanning Avro data source -#[derive(Debug)] -pub struct AvroExec { - source: Source>, - schema: SchemaRef, - projection: Option>, - projected_schema: SchemaRef, - file_extension: String, - batch_size: usize, - limit: Option, -} - -impl AvroExec { - /// Create a new execution plan for reading from a path - pub fn try_from_path( - path: &str, - options: AvroReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let file_extension = options.file_extension.to_string(); - - let filenames = common::build_file_list(path, &file_extension)?; - - if filenames.is_empty() { - return Err(DataFusionError::Execution(format!( - "No files found at {path} with file extension {file_extension}", - path = path, - file_extension = file_extension.as_str() - ))); - } - - let schema = match options.schema { - Some(s) => s, - None => Arc::new(AvroExec::try_read_schema(filenames.as_slice())?), - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Arc::new(Schema::new( - p.iter().map(|i| schema.field(*i).clone()).collect(), - )), - }; - - Ok(Self { - source: Source::PartitionedFiles { - path: path.to_string(), - filenames, - }, - schema, - projected_schema, - file_extension, - projection, - batch_size, - limit, - }) - } - /// Create a new execution plan for reading from a reader - pub fn try_new_from_reader( - reader: impl Read + Seek + Send + Sync + 'static, - options: AvroReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let schema = match options.schema { - Some(s) => s, - None => { - return Err(DataFusionError::Execution( - "The schema must be provided in options when reading from a reader" - .to_string(), - )); - } - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Arc::new(Schema::new( - p.iter().map(|i| schema.field(*i).clone()).collect(), - )), - }; - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - file_extension: String::new(), - projection, - projected_schema, - batch_size, - limit, - }) - } - - /// Path to directory containing partitioned CSV files with the same schema - pub fn path(&self) -> &str { - self.source.path() - } - - /// The individual files under path - pub fn filenames(&self) -> &[String] { - self.source.filenames() - } - - /// File extension - pub fn file_extension(&self) -> &str { - &self.file_extension - } - - /// Get the schema of the avro file - pub fn file_schema(&self) -> SchemaRef { - self.schema.clone() - } - - /// Optional projection for which columns to load - pub fn projection(&self) -> Option<&Vec> { - self.projection.as_ref() - } - - /// Batch size - pub fn batch_size(&self) -> usize { - self.batch_size - } - - /// Limit - pub fn limit(&self) -> Option { - self.limit - } - - /// Read schema for given Avro dataset - pub fn try_read_schema(filenames: &[String]) -> Result { - let mut schemas = Vec::new(); - for filename in filenames { - let mut file = File::open(filename)?; - let schema = read_avro_schema_from_reader(&mut file)?; - schemas.push(schema); - } - - Ok(Schema::try_merge(schemas)?) - } -} - -#[async_trait] -impl ExecutionPlan for AvroExec { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.projected_schema.clone() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(match &self.source { - Source::PartitionedFiles { filenames, .. } => filenames.len(), - Source::Reader(_) => 1, - }) - } - - fn children(&self) -> Vec> { - Vec::new() - } - - fn with_new_children( - &self, - children: Vec>, - ) -> Result> { - if !children.is_empty() { - Err(DataFusionError::Internal(format!( - "Children cannot be replaced in {:?}", - self - ))) - } else if let Source::PartitionedFiles { filenames, path } = &self.source { - Ok(Arc::new(Self { - source: Source::PartitionedFiles { - filenames: filenames.clone(), - path: path.clone(), - }, - schema: self.schema.clone(), - projection: self.projection.clone(), - projected_schema: self.projected_schema.clone(), - batch_size: self.batch_size, - limit: self.limit, - file_extension: self.file_extension.clone(), - })) - } else { - Err(DataFusionError::Internal( - "AvroExec with reader source cannot be used with `with_new_children`" - .to_string(), - )) - } - } - - #[cfg(not(feature = "avro"))] - async fn execute( - &self, - _partition: usize, - ) -> Result { - Err(DataFusionError::NotImplemented( - "Cannot execute avro plan without avro feature enabled".to_string(), - )) - } - - #[cfg(feature = "avro")] - async fn execute( - &self, - partition: usize, - ) -> Result { - let mut builder = crate::avro_to_arrow::ReaderBuilder::new() - .with_schema(self.schema.clone()) - .with_batch_size(self.batch_size); - if let Some(proj) = &self.projection { - builder = builder.with_projection( - proj.iter() - .map(|col_idx| self.schema.field(*col_idx).name()) - .cloned() - .collect(), - ); - } - match &self.source { - Source::PartitionedFiles { filenames, .. } => { - let file = File::open(&filenames[partition])?; - - Ok(Box::pin(AvroStream::new(builder.build(file)?, self.limit))) - } - Source::Reader(rdr) => { - if partition != 0 { - Err(DataFusionError::Internal( - "Only partition 0 is valid when Avro comes from a reader" - .to_string(), - )) - } else if let Some(rdr) = rdr.lock().unwrap().take() { - Ok(Box::pin(AvroStream::new(builder.build(rdr)?, self.limit))) - } else { - Err(DataFusionError::Execution( - "Error reading Avro: Data can only be read a single time when the source is a reader" - .to_string(), - )) - } - } - } - } - - fn fmt_as( - &self, - t: DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - match t { - DisplayFormatType::Default => { - write!( - f, - "AvroExec: source={}, batch_size={}, limit={:?}", - self.source, self.batch_size, self.limit - ) - } - } - } - - fn statistics(&self) -> Statistics { - Statistics::default() - } -} - -#[cfg(feature = "avro")] -struct AvroStream<'a, R: Read> { - reader: crate::avro_to_arrow::Reader<'a, R>, - remain: Option, -} - -#[cfg(feature = "avro")] -impl<'a, R: Read> AvroStream<'a, R> { - fn new(reader: crate::avro_to_arrow::Reader<'a, R>, limit: Option) -> Self { - Self { - reader, - remain: limit, - } - } -} - -#[cfg(feature = "avro")] -impl Stream for AvroStream<'_, R> { - type Item = ArrowResult; - - fn poll_next( - mut self: Pin<&mut Self>, - _cx: &mut Context<'_>, - ) -> Poll> { - if let Some(remain) = self.remain.as_mut() { - if *remain < 1 { - return Poll::Ready(None); - } - } - - Poll::Ready(match self.reader.next() { - Ok(Some(item)) => { - if let Some(remain) = self.remain.as_mut() { - if *remain >= item.num_rows() { - *remain -= item.num_rows(); - Some(Ok(item)) - } else { - let len = *remain; - *remain = 0; - Some(Ok(RecordBatch::try_new( - item.schema(), - item.columns() - .iter() - .map(|column| column.slice(0, len)) - .collect(), - )?)) - } - } else { - Some(Ok(item)) - } - } - Ok(None) => None, - Err(err) => Some(Err(err)), - }) - } -} - -#[cfg(feature = "avro")] -impl RecordBatchStream for AvroStream<'_, R> { - fn schema(&self) -> SchemaRef { - self.reader.schema() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - #[cfg(feature = "avro")] - async fn test() -> Result<()> { - use futures::StreamExt; - - let testdata = crate::test_util::arrow_test_data(); - let filename = format!("{}/avro/alltypes_plain.avro", testdata); - let avro_exec = AvroExec::try_from_path( - &filename, - AvroReadOptions::default(), - Some(vec![0, 1, 2]), - 1024, - None, - )?; - assert_eq!(avro_exec.output_partitioning().partition_count(), 1); - - let mut results = avro_exec.execute(0).await?; - let batch = results.next().await.unwrap()?; - - assert_eq!(8, batch.num_rows()); - assert_eq!(3, batch.num_columns()); - - let schema = batch.schema(); - let field_names: Vec<&str> = - schema.fields().iter().map(|f| f.name().as_str()).collect(); - assert_eq!(vec!["id", "bool_col", "tinyint_col"], field_names); - - let batch = results.next().await; - assert!(batch.is_none()); - - let batch = results.next().await; - assert!(batch.is_none()); - - let batch = results.next().await; - assert!(batch.is_none()); - - Ok(()) - } - - #[tokio::test] - #[cfg(not(feature = "avro"))] - async fn test() -> Result<()> { - let testdata = crate::test_util::arrow_test_data(); - let filename = format!("{}/avro/alltypes_plain.avro", testdata); - let avro_exec = AvroExec::try_from_path( - &filename, - AvroReadOptions::default(), - Some(vec![0, 1, 2]), - 1024, - None, - ); - assert!(matches!( - avro_exec, - Err(DataFusionError::NotImplemented(msg)) - if msg == *"cannot read avro schema without the 'avro' feature enabled" - )); - - Ok(()) - } -} diff --git a/datafusion/src/physical_plan/coalesce_partitions.rs b/datafusion/src/physical_plan/coalesce_partitions.rs index 329edcb31d65..a1068386f0d2 100644 --- a/datafusion/src/physical_plan/coalesce_partitions.rs +++ b/datafusion/src/physical_plan/coalesce_partitions.rs @@ -195,8 +195,9 @@ impl RecordBatchStream for MergeStream { mod tests { use super::*; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::common; - use crate::physical_plan::csv::{CsvExec, CsvReadOptions}; + use crate::physical_plan::file_format::CsvExec; use crate::test; #[tokio::test] @@ -204,16 +205,19 @@ mod tests { let schema = test::aggr_test_schema(); let num_partitions = 4; - let path = + let (_, files) = test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?; - - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + schema, + true, + b',', None, 1024, None, - )?; + ); // input should have 4 partitions assert_eq!(csv.output_partitioning().partition_count(), num_partitions); diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs deleted file mode 100644 index 35bd2247bfbc..000000000000 --- a/datafusion/src/physical_plan/csv.rs +++ /dev/null @@ -1,534 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Execution plan for reading CSV files - -use crate::error::{DataFusionError, Result}; -use crate::physical_plan::ExecutionPlan; -use crate::physical_plan::{common, source::Source, Partitioning}; -use arrow::csv; -use arrow::datatypes::{Schema, SchemaRef}; -use arrow::error::Result as ArrowResult; -use arrow::record_batch::RecordBatch; -use futures::Stream; -use std::any::Any; -use std::fs::File; -use std::io::Read; -use std::pin::Pin; -use std::sync::Arc; -use std::sync::Mutex; -use std::task::{Context, Poll}; - -use super::{ - DisplayFormatType, RecordBatchStream, SendableRecordBatchStream, Statistics, -}; -use async_trait::async_trait; - -/// CSV file read option -#[derive(Copy, Clone)] -pub struct CsvReadOptions<'a> { - /// Does the CSV file have a header? - /// - /// If schema inference is run on a file with no headers, default column names - /// are created. - pub has_header: bool, - /// An optional column delimiter. Defaults to `b','`. - pub delimiter: u8, - /// An optional schema representing the CSV files. If None, CSV reader will try to infer it - /// based on data in file. - pub schema: Option<&'a Schema>, - /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000. - pub schema_infer_max_records: usize, - /// File extension; only files with this extension are selected for data input. - /// Defaults to ".csv". - pub file_extension: &'a str, -} - -impl<'a> CsvReadOptions<'a> { - /// Create a CSV read option with default presets - pub fn new() -> Self { - Self { - has_header: true, - schema: None, - schema_infer_max_records: 1000, - delimiter: b',', - file_extension: ".csv", - } - } - - /// Configure has_header setting - pub fn has_header(mut self, has_header: bool) -> Self { - self.has_header = has_header; - self - } - - /// Specify delimiter to use for CSV read - pub fn delimiter(mut self, delimiter: u8) -> Self { - self.delimiter = delimiter; - self - } - - /// Specify the file extension for CSV file selection - pub fn file_extension(mut self, file_extension: &'a str) -> Self { - self.file_extension = file_extension; - self - } - - /// Configure delimiter setting with Option, None value will be ignored - pub fn delimiter_option(mut self, delimiter: Option) -> Self { - if let Some(d) = delimiter { - self.delimiter = d; - } - self - } - - /// Specify schema to use for CSV read - pub fn schema(mut self, schema: &'a Schema) -> Self { - self.schema = Some(schema); - self - } - - /// Configure number of max records to read for schema inference - pub fn schema_infer_max_records(mut self, max_records: usize) -> Self { - self.schema_infer_max_records = max_records; - self - } -} - -/// Execution plan for scanning a CSV file -#[derive(Debug, Clone)] -pub struct CsvExec { - /// Where the data comes from. - source: Source, - /// Schema representing the CSV file - schema: SchemaRef, - /// Does the CSV file have a header? - has_header: bool, - /// An optional column delimiter. Defaults to `b','` - delimiter: Option, - /// File extension - file_extension: String, - /// Optional projection for which columns to load - projection: Option>, - /// Schema after the projection has been applied - projected_schema: SchemaRef, - /// Batch size - batch_size: usize, - /// Limit in nr. of rows - limit: Option, -} - -impl CsvExec { - /// Create a new execution plan for reading a set of CSV files - pub fn try_new( - path: &str, - options: CsvReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let file_extension = String::from(options.file_extension); - - let filenames = common::build_file_list(path, file_extension.as_str())?; - if filenames.is_empty() { - return Err(DataFusionError::Execution(format!( - "No files found at {path} with file extension {file_extension}", - path = path, - file_extension = file_extension.as_str() - ))); - } - - let schema = match options.schema { - Some(s) => s.clone(), - None => CsvExec::try_infer_schema(&filenames, &options)?, - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()), - }; - - Ok(Self { - source: Source::PartitionedFiles { - path: path.to_string(), - filenames, - }, - schema: Arc::new(schema), - has_header: options.has_header, - delimiter: Some(options.delimiter), - file_extension, - projection, - projected_schema: Arc::new(projected_schema), - batch_size, - limit, - }) - } - /// Create a new execution plan for reading from a reader - pub fn try_new_from_reader( - reader: impl Read + Send + Sync + 'static, - options: CsvReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let schema = match options.schema { - Some(s) => s.clone(), - None => { - return Err(DataFusionError::Execution( - "The schema must be provided in options when reading from a reader" - .to_string(), - )); - } - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()), - }; - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema: Arc::new(schema), - has_header: options.has_header, - delimiter: Some(options.delimiter), - file_extension: String::new(), - projection, - projected_schema: Arc::new(projected_schema), - batch_size, - limit, - }) - } - - /// Path to directory containing partitioned CSV files with the same schema - pub fn path(&self) -> &str { - self.source.path() - } - - /// The individual files under path - pub fn filenames(&self) -> &[String] { - self.source.filenames() - } - - /// Does the CSV file have a header? - pub fn has_header(&self) -> bool { - self.has_header - } - - /// An optional column delimiter. Defaults to `b','` - pub fn delimiter(&self) -> Option<&u8> { - self.delimiter.as_ref() - } - - /// File extension - pub fn file_extension(&self) -> &str { - &self.file_extension - } - - /// Get the schema of the CSV file - pub fn file_schema(&self) -> SchemaRef { - self.schema.clone() - } - - /// Optional projection for which columns to load - pub fn projection(&self) -> Option<&Vec> { - self.projection.as_ref() - } - - /// Batch size - pub fn batch_size(&self) -> usize { - self.batch_size - } - - /// Limit - pub fn limit(&self) -> Option { - self.limit - } - - /// Infer schema for given CSV dataset - pub fn try_infer_schema( - filenames: &[String], - options: &CsvReadOptions, - ) -> Result { - Ok(csv::infer_schema_from_files( - filenames, - options.delimiter, - Some(options.schema_infer_max_records), - options.has_header, - )?) - } -} - -#[async_trait] -impl ExecutionPlan for CsvExec { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - - /// Get the schema for this execution plan - fn schema(&self) -> SchemaRef { - self.projected_schema.clone() - } - - /// Get the output partitioning of this plan - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(match &self.source { - Source::PartitionedFiles { filenames, .. } => filenames.len(), - Source::Reader(_) => 1, - }) - } - - fn children(&self) -> Vec> { - // this is a leaf node and has no children - vec![] - } - - fn with_new_children( - &self, - children: Vec>, - ) -> Result> { - if children.is_empty() { - Ok(Arc::new(self.clone())) - } else { - Err(DataFusionError::Internal(format!( - "Children cannot be replaced in {:?}", - self - ))) - } - } - - async fn execute(&self, partition: usize) -> Result { - match &self.source { - Source::PartitionedFiles { filenames, .. } => { - Ok(Box::pin(CsvStream::try_new( - &filenames[partition], - self.schema.clone(), - self.has_header, - self.delimiter, - &self.projection, - self.batch_size, - self.limit, - )?)) - } - Source::Reader(rdr) => { - if partition != 0 { - Err(DataFusionError::Internal( - "Only partition 0 is valid when CSV comes from a reader" - .to_string(), - )) - } else if let Some(rdr) = rdr.lock().unwrap().take() { - Ok(Box::pin(CsvStream::try_new_from_reader( - rdr, - self.schema.clone(), - self.has_header, - self.delimiter, - &self.projection, - self.batch_size, - self.limit, - )?)) - } else { - Err(DataFusionError::Execution( - "Error reading CSV: Data can only be read a single time when the source is a reader" - .to_string(), - )) - } - } - } - } - - fn fmt_as( - &self, - t: DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - match t { - DisplayFormatType::Default => { - write!( - f, - "CsvExec: source={}, has_header={}", - self.source, self.has_header - ) - } - } - } - - fn statistics(&self) -> Statistics { - // TODO stats: handle statistics - Statistics::default() - } -} - -/// Iterator over batches -struct CsvStream { - /// Arrow CSV reader - reader: csv::Reader, -} -impl CsvStream { - /// Create an iterator for a CSV file - pub fn try_new( - filename: &str, - schema: SchemaRef, - has_header: bool, - delimiter: Option, - projection: &Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let file = File::open(filename)?; - Self::try_new_from_reader( - file, schema, has_header, delimiter, projection, batch_size, limit, - ) - } -} -impl CsvStream { - /// Create an iterator for a reader - pub fn try_new_from_reader( - reader: R, - schema: SchemaRef, - has_header: bool, - delimiter: Option, - projection: &Option>, - batch_size: usize, - limit: Option, - ) -> Result> { - let start_line = if has_header { 1 } else { 0 }; - let bounds = limit.map(|x| (0, x + start_line)); - - let reader = csv::Reader::new( - reader, - schema, - has_header, - delimiter, - batch_size, - bounds, - projection.clone(), - ); - - Ok(Self { reader }) - } -} - -impl Stream for CsvStream { - type Item = ArrowResult; - - fn poll_next( - mut self: Pin<&mut Self>, - _: &mut Context<'_>, - ) -> Poll> { - Poll::Ready(self.reader.next()) - } -} - -impl RecordBatchStream for CsvStream { - /// Get the schema - fn schema(&self) -> SchemaRef { - self.reader.schema() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::test::aggr_test_schema; - use futures::StreamExt; - - #[tokio::test] - async fn csv_exec_with_projection() -> Result<()> { - let schema = aggr_test_schema(); - let testdata = crate::test_util::arrow_test_data(); - let filename = "aggregate_test_100.csv"; - let path = format!("{}/csv/{}", testdata, filename); - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), - Some(vec![0, 2, 4]), - 1024, - None, - )?; - assert_eq!(13, csv.schema.fields().len()); - assert_eq!(3, csv.projected_schema.fields().len()); - assert_eq!(13, csv.file_schema().fields().len()); - assert_eq!(3, csv.schema().fields().len()); - let mut stream = csv.execute(0).await?; - let batch = stream.next().await.unwrap()?; - assert_eq!(3, batch.num_columns()); - let batch_schema = batch.schema(); - assert_eq!(3, batch_schema.fields().len()); - assert_eq!("c1", batch_schema.field(0).name()); - assert_eq!("c3", batch_schema.field(1).name()); - assert_eq!("c5", batch_schema.field(2).name()); - Ok(()) - } - - #[tokio::test] - async fn csv_exec_without_projection() -> Result<()> { - let schema = aggr_test_schema(); - let testdata = crate::test_util::arrow_test_data(); - let filename = "aggregate_test_100.csv"; - let path = format!("{}/csv/{}", testdata, filename); - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), - None, - 1024, - None, - )?; - assert_eq!(13, csv.schema.fields().len()); - assert_eq!(13, csv.projected_schema.fields().len()); - assert_eq!(13, csv.file_schema().fields().len()); - assert_eq!(13, csv.schema().fields().len()); - let mut it = csv.execute(0).await?; - let batch = it.next().await.unwrap()?; - assert_eq!(13, batch.num_columns()); - let batch_schema = batch.schema(); - assert_eq!(13, batch_schema.fields().len()); - assert_eq!("c1", batch_schema.field(0).name()); - assert_eq!("c2", batch_schema.field(1).name()); - assert_eq!("c3", batch_schema.field(2).name()); - Ok(()) - } - - #[tokio::test] - async fn csv_exec_with_reader() -> Result<()> { - let schema = aggr_test_schema(); - let testdata = crate::test_util::arrow_test_data(); - let filename = "aggregate_test_100.csv"; - let path = format!("{}/csv/{}", testdata, filename); - let buf = std::fs::read(path).unwrap(); - let rdr = std::io::Cursor::new(buf); - let csv = CsvExec::try_new_from_reader( - rdr, - CsvReadOptions::new().schema(&schema), - Some(vec![0, 2, 4]), - 1024, - None, - )?; - assert_eq!(13, csv.schema.fields().len()); - assert_eq!(3, csv.projected_schema.fields().len()); - assert_eq!(13, csv.file_schema().fields().len()); - assert_eq!(3, csv.schema().fields().len()); - let mut stream = csv.execute(0).await?; - let batch = stream.next().await.unwrap()?; - assert_eq!(3, batch.num_columns()); - let batch_schema = batch.schema(); - assert_eq!(3, batch_schema.fields().len()); - assert_eq!("c1", batch_schema.field(0).name()); - assert_eq!("c3", batch_schema.field(1).name()); - assert_eq!("c5", batch_schema.field(2).name()); - Ok(()) - } -} diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs index d58b2ed207a1..5838239eec8c 100644 --- a/datafusion/src/physical_plan/expressions/binary.rs +++ b/datafusion/src/physical_plan/expressions/binary.rs @@ -484,7 +484,7 @@ pub fn binary_operator_data_type( rhs_type: &DataType, ) -> Result { // validate that it is possible to perform the operation on incoming types. - // (or the return datatype cannot be infered) + // (or the return datatype cannot be inferred) let common_type = common_binary_type(lhs_type, op, rhs_type)?; match op { diff --git a/datafusion/src/physical_plan/file_format/avro.rs b/datafusion/src/physical_plan/file_format/avro.rs new file mode 100644 index 000000000000..0a57f8b386e0 --- /dev/null +++ b/datafusion/src/physical_plan/file_format/avro.rs @@ -0,0 +1,316 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Execution plan for reading line-delimited Avro files +use crate::datasource::object_store::ObjectStore; +use crate::datasource::PartitionedFile; +use crate::error::{DataFusionError, Result}; +#[cfg(feature = "avro")] +use crate::physical_plan::RecordBatchStream; +use crate::physical_plan::{ + DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics, +}; +use arrow::datatypes::{Schema, SchemaRef}; +#[cfg(feature = "avro")] +use arrow::{error::Result as ArrowResult, record_batch::RecordBatch}; +use async_trait::async_trait; +#[cfg(feature = "avro")] +use futures::Stream; +use std::any::Any; +use std::sync::Arc; +#[cfg(feature = "avro")] +use std::{ + io::Read, + pin::Pin, + task::{Context, Poll}, +}; + +/// Execution plan for scanning Avro data source +#[derive(Debug, Clone)] +pub struct AvroExec { + object_store: Arc, + files: Vec, + statistics: Statistics, + schema: SchemaRef, + projection: Option>, + projected_schema: SchemaRef, + batch_size: usize, + limit: Option, +} + +impl AvroExec { + /// Create a new JSON reader execution plan provided file list and schema + /// TODO: support partitiond file list (Vec>) + pub fn new( + object_store: Arc, + files: Vec, + statistics: Statistics, + schema: SchemaRef, + projection: Option>, + batch_size: usize, + limit: Option, + ) -> Self { + let projected_schema = match &projection { + None => Arc::clone(&schema), + Some(p) => Arc::new(Schema::new( + p.iter().map(|i| schema.field(*i).clone()).collect(), + )), + }; + + Self { + object_store, + files, + statistics, + schema, + projection, + projected_schema, + batch_size, + limit, + } + } + /// List of data files + pub fn files(&self) -> &[PartitionedFile] { + &self.files + } + /// The schema before projection + pub fn file_schema(&self) -> &SchemaRef { + &self.schema + } + /// Optional projection for which columns to load + pub fn projection(&self) -> &Option> { + &self.projection + } + /// Batch size + pub fn batch_size(&self) -> usize { + self.batch_size + } + /// Limit in nr. of rows + pub fn limit(&self) -> Option { + self.limit + } +} + +#[async_trait] +impl ExecutionPlan for AvroExec { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.projected_schema.clone() + } + + fn output_partitioning(&self) -> Partitioning { + Partitioning::UnknownPartitioning(self.files.len()) + } + + fn children(&self) -> Vec> { + Vec::new() + } + + fn with_new_children( + &self, + children: Vec>, + ) -> Result> { + if children.is_empty() { + Ok(Arc::new(self.clone())) + } else { + Err(DataFusionError::Internal(format!( + "Children cannot be replaced in {:?}", + self + ))) + } + } + + #[cfg(not(feature = "avro"))] + async fn execute(&self, _partition: usize) -> Result { + Err(DataFusionError::NotImplemented( + "Cannot execute avro plan without avro feature enabled".to_string(), + )) + } + + #[cfg(feature = "avro")] + async fn execute(&self, partition: usize) -> Result { + let file = self + .object_store + .file_reader(self.files[partition].file_meta.sized_file.clone())? + .sync_reader()?; + + let proj = self.projection.as_ref().map(|p| { + p.iter() + .map(|col_idx| self.schema.field(*col_idx).name()) + .cloned() + .collect() + }); + + let avro_reader = crate::avro_to_arrow::Reader::try_new( + file, + self.schema(), + self.batch_size, + proj, + )?; + + Ok(Box::pin(AvroStream::new(avro_reader, self.limit))) + } + + fn fmt_as( + &self, + t: DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match t { + DisplayFormatType::Default => { + write!( + f, + "AvroExec: files=[{}], batch_size={}, limit={:?}", + self.files + .iter() + .map(|f| f.file_meta.path()) + .collect::>() + .join(", "), + self.batch_size, + self.limit, + ) + } + } + } + + fn statistics(&self) -> Statistics { + self.statistics.clone() + } +} + +#[cfg(feature = "avro")] +struct AvroStream<'a, R: Read> { + reader: crate::avro_to_arrow::Reader<'a, R>, + remain: Option, +} + +#[cfg(feature = "avro")] +impl<'a, R: Read> AvroStream<'a, R> { + fn new(reader: crate::avro_to_arrow::Reader<'a, R>, limit: Option) -> Self { + Self { + reader, + remain: limit, + } + } +} + +#[cfg(feature = "avro")] +impl Stream for AvroStream<'_, R> { + type Item = ArrowResult; + + fn poll_next( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + if let Some(remain) = self.remain.as_mut() { + if *remain < 1 { + return Poll::Ready(None); + } + } + + Poll::Ready(match self.reader.next() { + Ok(Some(item)) => { + if let Some(remain) = self.remain.as_mut() { + if *remain >= item.num_rows() { + *remain -= item.num_rows(); + Some(Ok(item)) + } else { + let len = *remain; + *remain = 0; + Some(Ok(RecordBatch::try_new( + item.schema(), + item.columns() + .iter() + .map(|column| column.slice(0, len)) + .collect(), + )?)) + } + } else { + Some(Ok(item)) + } + } + Ok(None) => None, + Err(err) => Some(Err(err)), + }) + } +} + +#[cfg(feature = "avro")] +impl RecordBatchStream for AvroStream<'_, R> { + fn schema(&self) -> SchemaRef { + self.reader.schema() + } +} + +#[cfg(test)] +#[cfg(feature = "avro")] +mod tests { + + use crate::datasource::object_store::local::{ + local_file_meta, local_object_reader_stream, LocalFileSystem, + }; + + use super::*; + + #[tokio::test] + async fn test() -> Result<()> { + use futures::StreamExt; + + use crate::datasource::file_format::{avro::AvroFormat, FileFormat}; + + let testdata = crate::test_util::arrow_test_data(); + let filename = format!("{}/avro/alltypes_plain.avro", testdata); + let avro_exec = AvroExec::new( + Arc::new(LocalFileSystem {}), + vec![PartitionedFile { + file_meta: local_file_meta(filename.clone()), + }], + Statistics::default(), + AvroFormat {} + .infer_schema(local_object_reader_stream(vec![filename])) + .await?, + Some(vec![0, 1, 2]), + 1024, + None, + ); + assert_eq!(avro_exec.output_partitioning().partition_count(), 1); + + let mut results = avro_exec.execute(0).await?; + let batch = results.next().await.unwrap()?; + + assert_eq!(8, batch.num_rows()); + assert_eq!(3, batch.num_columns()); + + let schema = batch.schema(); + let field_names: Vec<&str> = + schema.fields().iter().map(|f| f.name().as_str()).collect(); + assert_eq!(vec!["id", "bool_col", "tinyint_col"], field_names); + + let batch = results.next().await; + assert!(batch.is_none()); + + let batch = results.next().await; + assert!(batch.is_none()); + + let batch = results.next().await; + assert!(batch.is_none()); + + Ok(()) + } +} diff --git a/datafusion/src/physical_plan/file_format/csv.rs b/datafusion/src/physical_plan/file_format/csv.rs new file mode 100644 index 000000000000..329dd6bae133 --- /dev/null +++ b/datafusion/src/physical_plan/file_format/csv.rs @@ -0,0 +1,330 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Execution plan for reading CSV files + +use crate::datasource::object_store::ObjectStore; +use crate::datasource::PartitionedFile; +use crate::error::{DataFusionError, Result}; +use crate::physical_plan::{ + DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + SendableRecordBatchStream, Statistics, +}; +use arrow::csv; +use arrow::datatypes::{Schema, SchemaRef}; +use arrow::error::Result as ArrowResult; +use arrow::record_batch::RecordBatch; +use futures::Stream; +use std::any::Any; +use std::io::Read; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use async_trait::async_trait; + +/// Execution plan for scanning a CSV file +#[derive(Debug, Clone)] +pub struct CsvExec { + object_store: Arc, + files: Vec, + /// Schema representing the CSV file + schema: SchemaRef, + /// Schema after the projection has been applied + projected_schema: SchemaRef, + statistics: Statistics, + has_header: bool, + delimiter: u8, + projection: Option>, + batch_size: usize, + limit: Option, +} + +impl CsvExec { + /// Create a new CSV reader execution plan provided file list and schema + /// TODO: support partitiond file list (Vec>) + #[allow(clippy::too_many_arguments)] + pub fn new( + object_store: Arc, + files: Vec, + statistics: Statistics, + schema: SchemaRef, + has_header: bool, + delimiter: u8, + projection: Option>, + batch_size: usize, + limit: Option, + ) -> Self { + let projected_schema = match &projection { + None => Arc::clone(&schema), + Some(p) => Arc::new(Schema::new( + p.iter().map(|i| schema.field(*i).clone()).collect(), + )), + }; + + Self { + object_store, + files, + schema, + statistics, + has_header, + delimiter, + projection, + projected_schema, + batch_size, + limit, + } + } + + /// List of data files + pub fn files(&self) -> &[PartitionedFile] { + &self.files + } + /// The schema before projection + pub fn file_schema(&self) -> &SchemaRef { + &self.schema + } + /// true if the first line of each file is a header + pub fn has_header(&self) -> bool { + self.has_header + } + /// A column delimiter + pub fn delimiter(&self) -> u8 { + self.delimiter + } + /// Optional projection for which columns to load + pub fn projection(&self) -> &Option> { + &self.projection + } + /// Batch size + pub fn batch_size(&self) -> usize { + self.batch_size + } + /// Limit in nr. of rows + pub fn limit(&self) -> Option { + self.limit + } +} + +#[async_trait] +impl ExecutionPlan for CsvExec { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + /// Get the schema for this execution plan + fn schema(&self) -> SchemaRef { + self.projected_schema.clone() + } + + /// Get the output partitioning of this plan + fn output_partitioning(&self) -> Partitioning { + Partitioning::UnknownPartitioning(self.files.len()) + } + + fn children(&self) -> Vec> { + // this is a leaf node and has no children + vec![] + } + + fn with_new_children( + &self, + children: Vec>, + ) -> Result> { + if children.is_empty() { + Ok(Arc::new(self.clone())) + } else { + Err(DataFusionError::Internal(format!( + "Children cannot be replaced in {:?}", + self + ))) + } + } + + async fn execute(&self, partition: usize) -> Result { + let file = self + .object_store + .file_reader(self.files[partition].file_meta.sized_file.clone())? + .sync_reader()?; + + Ok(Box::pin(CsvStream::try_new_from_reader( + file, + self.schema.clone(), + self.has_header, + self.delimiter, + &self.projection, + self.batch_size, + self.limit, + )?)) + } + + fn fmt_as( + &self, + t: DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match t { + DisplayFormatType::Default => { + write!( + f, + "CsvExec: files=[{}], has_header={}, batch_size={}, limit={:?}", + self.files + .iter() + .map(|f| f.file_meta.path()) + .collect::>() + .join(", "), + self.has_header, + self.batch_size, + self.limit, + ) + } + } + } + + fn statistics(&self) -> Statistics { + self.statistics.clone() + } +} + +/// Iterator over batches +struct CsvStream { + /// Arrow CSV reader + reader: csv::Reader, +} + +impl CsvStream { + /// Create an iterator for a reader + pub fn try_new_from_reader( + reader: R, + schema: SchemaRef, + has_header: bool, + delimiter: u8, + projection: &Option>, + batch_size: usize, + limit: Option, + ) -> Result> { + let start_line = if has_header { 1 } else { 0 }; + let bounds = limit.map(|x| (0, x + start_line)); + + let reader = csv::Reader::new( + reader, + schema, + has_header, + Some(delimiter), + batch_size, + bounds, + projection.clone(), + ); + + Ok(Self { reader }) + } +} + +impl Stream for CsvStream { + type Item = ArrowResult; + + fn poll_next( + mut self: Pin<&mut Self>, + _: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(self.reader.next()) + } +} + +impl RecordBatchStream for CsvStream { + /// Get the schema + fn schema(&self) -> SchemaRef { + self.reader.schema() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + datasource::object_store::local::{local_file_meta, LocalFileSystem}, + test::aggr_test_schema, + }; + use futures::StreamExt; + + #[tokio::test] + async fn csv_exec_with_projection() -> Result<()> { + let schema = aggr_test_schema(); + let testdata = crate::test_util::arrow_test_data(); + let filename = "aggregate_test_100.csv"; + let path = format!("{}/csv/{}", testdata, filename); + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + vec![PartitionedFile { + file_meta: local_file_meta(path), + }], + Statistics::default(), + schema, + true, + b',', + Some(vec![0, 2, 4]), + 1024, + None, + ); + assert_eq!(13, csv.schema.fields().len()); + assert_eq!(3, csv.projected_schema.fields().len()); + assert_eq!(3, csv.schema().fields().len()); + let mut stream = csv.execute(0).await?; + let batch = stream.next().await.unwrap()?; + assert_eq!(3, batch.num_columns()); + let batch_schema = batch.schema(); + assert_eq!(3, batch_schema.fields().len()); + assert_eq!("c1", batch_schema.field(0).name()); + assert_eq!("c3", batch_schema.field(1).name()); + assert_eq!("c5", batch_schema.field(2).name()); + Ok(()) + } + + #[tokio::test] + async fn csv_exec_without_projection() -> Result<()> { + let schema = aggr_test_schema(); + let testdata = crate::test_util::arrow_test_data(); + let filename = "aggregate_test_100.csv"; + let path = format!("{}/csv/{}", testdata, filename); + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + vec![PartitionedFile { + file_meta: local_file_meta(path), + }], + Statistics::default(), + schema, + true, + b',', + None, + 1024, + None, + ); + assert_eq!(13, csv.schema.fields().len()); + assert_eq!(13, csv.projected_schema.fields().len()); + assert_eq!(13, csv.schema().fields().len()); + let mut it = csv.execute(0).await?; + let batch = it.next().await.unwrap()?; + assert_eq!(13, batch.num_columns()); + let batch_schema = batch.schema(); + assert_eq!(13, batch_schema.fields().len()); + assert_eq!("c1", batch_schema.field(0).name()); + assert_eq!("c2", batch_schema.field(1).name()); + assert_eq!("c3", batch_schema.field(2).name()); + Ok(()) + } +} diff --git a/datafusion/src/physical_plan/file_format/json.rs b/datafusion/src/physical_plan/file_format/json.rs new file mode 100644 index 000000000000..068e53a60e0c --- /dev/null +++ b/datafusion/src/physical_plan/file_format/json.rs @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Execution plan for reading line-delimited JSON files +use async_trait::async_trait; +use futures::Stream; + +use crate::datasource::object_store::ObjectStore; +use crate::datasource::PartitionedFile; +use crate::error::{DataFusionError, Result}; +use crate::physical_plan::{ + DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + SendableRecordBatchStream, Statistics, +}; +use arrow::{ + datatypes::{Schema, SchemaRef}, + error::Result as ArrowResult, + json, + record_batch::RecordBatch, +}; +use std::any::Any; +use std::{ + io::Read, + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +/// Execution plan for scanning NdJson data source +#[derive(Debug, Clone)] +pub struct NdJsonExec { + object_store: Arc, + files: Vec, + statistics: Statistics, + schema: SchemaRef, + projection: Option>, + projected_schema: SchemaRef, + batch_size: usize, + limit: Option, +} + +impl NdJsonExec { + /// Create a new JSON reader execution plan provided file list and schema + /// TODO: support partitiond file list (Vec>) + pub fn new( + object_store: Arc, + files: Vec, + statistics: Statistics, + schema: SchemaRef, + projection: Option>, + batch_size: usize, + limit: Option, + ) -> Self { + let projected_schema = match &projection { + None => Arc::clone(&schema), + Some(p) => Arc::new(Schema::new( + p.iter().map(|i| schema.field(*i).clone()).collect(), + )), + }; + + Self { + object_store, + files, + statistics, + schema, + projection, + projected_schema, + batch_size, + limit, + } + } +} + +#[async_trait] +impl ExecutionPlan for NdJsonExec { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.projected_schema.clone() + } + + fn output_partitioning(&self) -> Partitioning { + Partitioning::UnknownPartitioning(self.files.len()) + } + + fn children(&self) -> Vec> { + Vec::new() + } + + fn with_new_children( + &self, + children: Vec>, + ) -> Result> { + if children.is_empty() { + Ok(Arc::new(self.clone()) as Arc) + } else { + Err(DataFusionError::Internal(format!( + "Children cannot be replaced in {:?}", + self + ))) + } + } + + async fn execute(&self, partition: usize) -> Result { + let proj = self.projection.as_ref().map(|p| { + p.iter() + .map(|col_idx| self.schema.field(*col_idx).name()) + .cloned() + .collect() + }); + + let file = self + .object_store + .file_reader(self.files[partition].file_meta.sized_file.clone())? + .sync_reader()?; + + let json_reader = json::Reader::new(file, self.schema(), self.batch_size, proj); + + Ok(Box::pin(NdJsonStream::new(json_reader, self.limit))) + } + + fn fmt_as( + &self, + t: DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match t { + DisplayFormatType::Default => { + write!( + f, + "JsonExec: batch_size={}, limit={:?}, files=[{}]", + self.batch_size, + self.limit, + self.files + .iter() + .map(|f| f.file_meta.path()) + .collect::>() + .join(", ") + ) + } + } + } + + fn statistics(&self) -> Statistics { + self.statistics.clone() + } +} + +struct NdJsonStream { + reader: json::Reader, + remain: Option, +} + +impl NdJsonStream { + fn new(reader: json::Reader, limit: Option) -> Self { + Self { + reader, + remain: limit, + } + } +} + +impl Stream for NdJsonStream { + type Item = ArrowResult; + + fn poll_next( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + if let Some(remain) = self.remain.as_mut() { + if *remain < 1 { + return Poll::Ready(None); + } + } + + Poll::Ready(match self.reader.next() { + Ok(Some(item)) => { + if let Some(remain) = self.remain.as_mut() { + if *remain >= item.num_rows() { + *remain -= item.num_rows(); + Some(Ok(item)) + } else { + let len = *remain; + *remain = 0; + Some(Ok(RecordBatch::try_new( + item.schema(), + item.columns() + .iter() + .map(|column| column.slice(0, len)) + .collect(), + )?)) + } + } else { + Some(Ok(item)) + } + } + Ok(None) => None, + Err(err) => Some(Err(err)), + }) + } +} + +impl RecordBatchStream for NdJsonStream { + fn schema(&self) -> SchemaRef { + self.reader.schema() + } +} + +#[cfg(test)] +mod tests { + use futures::StreamExt; + + use crate::datasource::{ + file_format::{json::JsonFormat, FileFormat}, + object_store::local::{ + local_file_meta, local_object_reader_stream, LocalFileSystem, + }, + }; + + use super::*; + + const TEST_DATA_BASE: &str = "tests/jsons"; + + async fn infer_schema(path: String) -> Result { + JsonFormat::default() + .infer_schema(local_object_reader_stream(vec![path])) + .await + } + + #[tokio::test] + async fn nd_json_exec_file_without_projection() -> Result<()> { + use arrow::datatypes::DataType; + let path = format!("{}/1.json", TEST_DATA_BASE); + let exec = NdJsonExec::new( + Arc::new(LocalFileSystem {}), + vec![PartitionedFile { + file_meta: local_file_meta(path.clone()), + }], + Default::default(), + infer_schema(path).await?, + None, + 1024, + Some(3), + ); + + // TODO: this is not where schema inference should be tested + + let inferred_schema = exec.schema(); + assert_eq!(inferred_schema.fields().len(), 4); + + // a,b,c,d should be inferred + inferred_schema.field_with_name("a").unwrap(); + inferred_schema.field_with_name("b").unwrap(); + inferred_schema.field_with_name("c").unwrap(); + inferred_schema.field_with_name("d").unwrap(); + + assert_eq!( + inferred_schema.field_with_name("a").unwrap().data_type(), + &DataType::Int64 + ); + assert!(matches!( + inferred_schema.field_with_name("b").unwrap().data_type(), + DataType::List(_) + )); + assert_eq!( + inferred_schema.field_with_name("d").unwrap().data_type(), + &DataType::Utf8 + ); + + let mut it = exec.execute(0).await?; + let batch = it.next().await.unwrap()?; + + assert_eq!(batch.num_rows(), 3); + let values = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(values.value(0), 1); + assert_eq!(values.value(1), -10); + assert_eq!(values.value(2), 2); + + Ok(()) + } + + #[tokio::test] + async fn nd_json_exec_file_projection() -> Result<()> { + let path = format!("{}/1.json", TEST_DATA_BASE); + let exec = NdJsonExec::new( + Arc::new(LocalFileSystem {}), + vec![PartitionedFile { + file_meta: local_file_meta(path.clone()), + }], + Default::default(), + infer_schema(path).await?, + Some(vec![0, 2]), + 1024, + None, + ); + let inferred_schema = exec.schema(); + assert_eq!(inferred_schema.fields().len(), 2); + + inferred_schema.field_with_name("a").unwrap(); + inferred_schema.field_with_name("b").unwrap_err(); + inferred_schema.field_with_name("c").unwrap(); + inferred_schema.field_with_name("d").unwrap_err(); + + let mut it = exec.execute(0).await?; + let batch = it.next().await.unwrap()?; + + assert_eq!(batch.num_rows(), 4); + let values = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(values.value(0), 1); + assert_eq!(values.value(1), -10); + assert_eq!(values.value(2), 2); + Ok(()) + } +} diff --git a/datafusion/src/physical_plan/file_format/mod.rs b/datafusion/src/physical_plan/file_format/mod.rs new file mode 100644 index 000000000000..aa9359c30da4 --- /dev/null +++ b/datafusion/src/physical_plan/file_format/mod.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Execution plans that read file formats + +mod avro; +mod csv; +mod json; +mod parquet; + +pub use self::parquet::ParquetExec; +pub use avro::AvroExec; +pub use csv::CsvExec; +pub use json::NdJsonExec; diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/file_format/parquet.rs similarity index 85% rename from datafusion/src/physical_plan/parquet.rs rename to datafusion/src/physical_plan/file_format/parquet.rs index f4ac4c8fddaf..77eed01f893c 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/file_format/parquet.rs @@ -18,22 +18,24 @@ //! Execution plan for reading Parquet files use std::fmt; -use std::fs::File; use std::sync::Arc; use std::{any::Any, convert::TryInto}; +use crate::datasource::file_format::parquet::ChunkObjectReader; +use crate::datasource::object_store::ObjectStore; use crate::{ error::{DataFusionError, Result}, logical_plan::{Column, Expr}, physical_optimizer::pruning::{PruningPredicate, PruningStatistics}, physical_plan::{ + metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}, + stream::RecordBatchReceiverStream, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + Statistics, }, scalar::ScalarValue, }; -use super::Statistics; - use arrow::{ array::ArrayRef, datatypes::{Schema, SchemaRef}, @@ -57,18 +59,16 @@ use tokio::{ use async_trait::async_trait; -use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; -use super::stream::RecordBatchReceiverStream; -use crate::datasource::parquet::ParquetTableDescriptor; -use crate::datasource::{get_statistics_with_limit, FilePartition, PartitionedFile}; +use crate::datasource::{FilePartition, PartitionedFile}; /// Execution plan for scanning one or more Parquet partitions #[derive(Debug, Clone)] pub struct ParquetExec { + object_store: Arc, /// Parquet partitions to read - pub partitions: Vec, + partitions: Vec, /// Schema after projection is applied - pub schema: SchemaRef, + schema: SchemaRef, /// Projection for which columns to load projection: Vec, /// Batch size @@ -110,54 +110,29 @@ struct ParquetFileMetrics { } impl ParquetExec { - /// Create a new Parquet reader execution plan based on the specified Parquet filename or - /// directory containing Parquet files - pub fn try_from_path( - path: &str, - projection: Option>, - predicate: Option, - batch_size: usize, - target_partitions: usize, - limit: Option, - ) -> Result { - // build a list of filenames from the specified path, which could be a single file or - // a directory containing one or more parquet files - let table_desc = ParquetTableDescriptor::new(path)?; - Self::try_new( - Arc::new(table_desc), - projection, - predicate, - batch_size, - target_partitions, - limit, - ) - } - - /// Create a new Parquet reader execution plan with root descriptor, provided partitions and schema - pub fn try_new( - desc: Arc, + /// Create a new Parquet reader execution plan provided file list and schema. + /// Even if `limit` is set, ParquetExec rounds up the number of records to the next `batch_size`. + #[allow(clippy::too_many_arguments)] + pub fn new( + object_store: Arc, + files: Vec>, + statistics: Statistics, + schema: SchemaRef, projection: Option>, predicate: Option, batch_size: usize, - target_partitions: usize, limit: Option, - ) -> Result { + ) -> Self { debug!("Creating ParquetExec, desc: {:?}, projection {:?}, predicate: {:?}, limit: {:?}", - desc, projection, predicate, limit); + files, projection, predicate, limit); let metrics = ExecutionPlanMetricsSet::new(); - let (all_files, statistics) = get_statistics_with_limit(&desc.descriptor, limit); - let schema = desc.schema(); - - let mut partitions = Vec::with_capacity(target_partitions); - let chunked_files = split_files(&all_files, target_partitions); - for (index, group) in chunked_files.iter().enumerate() { - partitions.push(ParquetPartition::new( - Vec::from(*group), - index, - metrics.clone(), - )); - } + + let partitions = files + .into_iter() + .enumerate() + .map(|(i, f)| ParquetPartition::new(f, i, metrics.clone())) + .collect::>(); let metrics = ExecutionPlanMetricsSet::new(); let predicate_creation_errors = @@ -177,35 +152,32 @@ impl ParquetExec { } }); - Ok(Self::new( + let projection = match projection { + Some(p) => p, + None => (0..schema.fields().len()).collect(), + }; + + let (projected_schema, projected_statistics) = + Self::project(&projection, schema, statistics); + + Self { + object_store, partitions, - schema, + schema: projected_schema, projection, - statistics, metrics, predicate_builder, batch_size, + statistics: projected_statistics, limit, - )) + } } - /// Create a new Parquet reader execution plan with provided partitions and schema - #[allow(clippy::too_many_arguments)] - pub fn new( - partitions: Vec, + fn project( + projection: &[usize], schema: SchemaRef, - projection: Option>, statistics: Statistics, - metrics: ExecutionPlanMetricsSet, - predicate_builder: Option, - batch_size: usize, - limit: Option, - ) -> Self { - let projection = match projection { - Some(p) => p, - None => (0..schema.fields().len()).collect(), - }; - + ) -> (SchemaRef, Statistics) { let projected_schema = Schema::new( projection .iter() @@ -215,7 +187,7 @@ impl ParquetExec { let new_column_statistics = statistics.column_statistics.map(|stats| { let mut projected_stats = Vec::with_capacity(projection.len()); - for proj in &projection { + for proj in projection { projected_stats.push(stats[*proj].clone()); } projected_stats @@ -228,32 +200,29 @@ impl ParquetExec { is_exact: statistics.is_exact, }; - Self { - partitions, - schema: Arc::new(projected_schema), - projection, - metrics, - predicate_builder, - batch_size, - statistics, - limit, - } + (Arc::new(projected_schema), statistics) } - /// Parquet partitions to read - pub fn partitions(&self) -> &[ParquetPartition] { - &self.partitions + /// List of data files + pub fn partitions(&self) -> Vec<&[PartitionedFile]> { + self.partitions + .iter() + .map(|fp| fp.file_partition.files.as_slice()) + .collect() } - - /// Projection for which columns to load + /// Optional projection for which columns to load pub fn projection(&self) -> &[usize] { &self.projection } - /// Batch size pub fn batch_size(&self) -> usize { self.batch_size } + + /// Limit in nr. of rows + pub fn limit(&self) -> Option { + self.limit + } } impl ParquetPartition { @@ -341,9 +310,11 @@ impl ExecutionPlan for ParquetExec { let predicate_builder = self.predicate_builder.clone(); let batch_size = self.batch_size; let limit = self.limit; + let object_store = Arc::clone(&self.object_store); task::spawn_blocking(move || { if let Err(e) = read_partition( + object_store.as_ref(), partition_index, partition, metrics, @@ -520,6 +491,7 @@ fn build_row_group_predicate( #[allow(clippy::too_many_arguments)] fn read_partition( + object_store: &dyn ObjectStore, partition_index: usize, partition: ParquetPartition, metrics: ExecutionPlanMetricsSet, @@ -532,10 +504,15 @@ fn read_partition( let mut total_rows = 0; let all_files = partition.file_partition.files; 'outer: for partitioned_file in all_files { - let file_metrics = - ParquetFileMetrics::new(partition_index, &*partitioned_file.path, &metrics); - let file = File::open(partitioned_file.path.as_str())?; - let mut file_reader = SerializedFileReader::new(file)?; + let file_metrics = ParquetFileMetrics::new( + partition_index, + &*partitioned_file.file_meta.path(), + &metrics, + ); + let object_reader = + object_store.file_reader(partitioned_file.file_meta.sized_file.clone())?; + let mut file_reader = + SerializedFileReader::new(ChunkObjectReader(object_reader))?; if let Some(predicate_builder) = predicate_builder { let row_group_predicate = build_row_group_predicate( predicate_builder, @@ -582,19 +559,15 @@ fn read_partition( Ok(()) } -fn split_files( - partitioned_files: &[PartitionedFile], - n: usize, -) -> Vec<&[PartitionedFile]> { - let mut chunk_size = partitioned_files.len() / n; - if partitioned_files.len() % n > 0 { - chunk_size += 1; - } - partitioned_files.chunks(chunk_size).collect() -} - #[cfg(test)] mod tests { + use crate::datasource::{ + file_format::{parquet::ParquetFormat, FileFormat}, + object_store::local::{ + local_file_meta, local_object_reader_stream, LocalFileSystem, + }, + }; + use super::*; use arrow::datatypes::{DataType, Field}; use futures::StreamExt; @@ -604,54 +577,24 @@ mod tests { schema::types::SchemaDescPtr, }; - #[test] - fn test_split_files() { - let files = vec![ - PartitionedFile::from("a".to_string()), - PartitionedFile::from("b".to_string()), - PartitionedFile::from("c".to_string()), - PartitionedFile::from("d".to_string()), - PartitionedFile::from("e".to_string()), - ]; - - let chunks = split_files(&files, 1); - assert_eq!(1, chunks.len()); - assert_eq!(5, chunks[0].len()); - - let chunks = split_files(&files, 2); - assert_eq!(2, chunks.len()); - assert_eq!(3, chunks[0].len()); - assert_eq!(2, chunks[1].len()); - - let chunks = split_files(&files, 5); - assert_eq!(5, chunks.len()); - assert_eq!(1, chunks[0].len()); - assert_eq!(1, chunks[1].len()); - assert_eq!(1, chunks[2].len()); - assert_eq!(1, chunks[3].len()); - assert_eq!(1, chunks[4].len()); - - let chunks = split_files(&files, 123); - assert_eq!(5, chunks.len()); - assert_eq!(1, chunks[0].len()); - assert_eq!(1, chunks[1].len()); - assert_eq!(1, chunks[2].len()); - assert_eq!(1, chunks[3].len()); - assert_eq!(1, chunks[4].len()); - } - #[tokio::test] async fn test() -> Result<()> { let testdata = crate::test_util::parquet_test_data(); let filename = format!("{}/alltypes_plain.parquet", testdata); - let parquet_exec = ParquetExec::try_from_path( - &filename, + let parquet_exec = ParquetExec::new( + Arc::new(LocalFileSystem {}), + vec![vec![PartitionedFile { + file_meta: local_file_meta(filename.clone()), + }]], + Statistics::default(), + ParquetFormat::default() + .infer_schema(local_object_reader_stream(vec![filename])) + .await?, Some(vec![0, 1, 2]), None, 1024, - 4, None, - )?; + ); assert_eq!(parquet_exec.output_partitioning().partition_count(), 1); let mut results = parquet_exec.execute(0).await?; diff --git a/datafusion/src/physical_plan/filter.rs b/datafusion/src/physical_plan/filter.rs index 8acfd1b92e6b..79b5ebc508f5 100644 --- a/datafusion/src/physical_plan/filter.rs +++ b/datafusion/src/physical_plan/filter.rs @@ -222,11 +222,12 @@ impl RecordBatchStream for FilterExecStream { mod tests { use super::*; - use crate::physical_plan::csv::{CsvExec, CsvReadOptions}; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::expressions::*; + use crate::physical_plan::file_format::CsvExec; use crate::physical_plan::ExecutionPlan; use crate::scalar::ScalarValue; - use crate::test; + use crate::test::{self, aggr_test_schema}; use crate::{logical_plan::Operator, physical_plan::collect}; use std::iter::Iterator; @@ -235,15 +236,20 @@ mod tests { let schema = test::aggr_test_schema(); let partitions = 4; - let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; - - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let (_, files) = + test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; + + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + aggr_test_schema(), + true, + b',', None, 1024, None, - )?; + ); let predicate: Arc = binary( binary( diff --git a/datafusion/src/physical_plan/json.rs b/datafusion/src/physical_plan/json.rs deleted file mode 100644 index 675d88ec3bfa..000000000000 --- a/datafusion/src/physical_plan/json.rs +++ /dev/null @@ -1,507 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Execution plan for reading line-delimited JSON files -use async_trait::async_trait; -use futures::Stream; - -use super::DisplayFormatType; -use super::{ - common, source::Source, ExecutionPlan, Partitioning, RecordBatchStream, Statistics, -}; -use crate::error::{DataFusionError, Result}; -use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter}; -use arrow::{ - datatypes::{Schema, SchemaRef}, - error::Result as ArrowResult, - json, - record_batch::RecordBatch, -}; -use std::fs::File; -use std::{any::Any, io::Seek}; -use std::{ - io::{BufReader, Read}, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll}, -}; - -/// Line-delimited JSON read options -#[derive(Clone)] -pub struct NdJsonReadOptions<'a> { - /// The data source schema. - pub schema: Option, - - /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000. - pub schema_infer_max_records: usize, - - /// File extension; only files with this extension are selected for data input. - /// Defaults to ".json". - pub file_extension: &'a str, -} - -impl<'a> Default for NdJsonReadOptions<'a> { - fn default() -> Self { - Self { - schema: None, - schema_infer_max_records: 1000, - file_extension: ".json", - } - } -} - -trait SeekRead: Read + Seek {} - -impl SeekRead for T {} -/// Execution plan for scanning NdJson data source -#[derive(Debug)] -pub struct NdJsonExec { - source: Source>, - schema: SchemaRef, - projection: Option>, - projected_schema: SchemaRef, - file_extension: String, - batch_size: usize, - limit: Option, -} - -impl NdJsonExec { - /// Create a new execution plan for reading from a path - pub fn try_new( - path: &str, - options: NdJsonReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let file_extension = options.file_extension.to_string(); - - let filenames = common::build_file_list(path, &file_extension)?; - - if filenames.is_empty() { - return Err(DataFusionError::Execution(format!( - "No files found at {path} with file extension {file_extension}", - path = path, - file_extension = file_extension.as_str() - ))); - } - - let schema = match options.schema { - Some(s) => s, - None => Arc::new(NdJsonExec::try_infer_schema( - filenames.clone(), - Some(options.schema_infer_max_records), - )?), - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Arc::new(Schema::new( - p.iter().map(|i| schema.field(*i).clone()).collect(), - )), - }; - - Ok(Self { - source: Source::PartitionedFiles { - path: path.to_string(), - filenames, - }, - schema, - file_extension, - projection, - projected_schema, - batch_size, - limit, - }) - } - /// Create a new execution plan for reading from a reader - pub fn try_new_from_reader( - reader: impl Read + Seek + Send + Sync + 'static, - options: NdJsonReadOptions, - projection: Option>, - batch_size: usize, - limit: Option, - ) -> Result { - let schema = match options.schema { - Some(s) => s, - None => { - return Err(DataFusionError::Execution( - "The schema must be provided in options when reading from a reader" - .to_string(), - )); - } - }; - - let projected_schema = match &projection { - None => schema.clone(), - Some(p) => Arc::new(Schema::new( - p.iter().map(|i| schema.field(*i).clone()).collect(), - )), - }; - - Ok(Self { - source: Source::Reader(Mutex::new(Some(Box::new(reader)))), - schema, - file_extension: String::new(), - projection, - projected_schema, - batch_size, - limit, - }) - } - - /// Path to directory containing partitioned CSV files with the same schema - pub fn path(&self) -> &str { - self.source.path() - } - - /// The individual files under path - pub fn filenames(&self) -> &[String] { - self.source.filenames() - } - - /// File extension - pub fn file_extension(&self) -> &str { - &self.file_extension - } - - /// Get the schema of the CSV file - pub fn file_schema(&self) -> SchemaRef { - self.schema.clone() - } - - /// Optional projection for which columns to load - pub fn projection(&self) -> Option<&Vec> { - self.projection.as_ref() - } - - /// Batch size - pub fn batch_size(&self) -> usize { - self.batch_size - } - - /// Limit - pub fn limit(&self) -> Option { - self.limit - } - - /// Infer schema for given CSV dataset - pub fn try_infer_schema( - mut filenames: Vec, - max_records: Option, - ) -> Result { - let mut schemas = Vec::new(); - let mut records_to_read = max_records.unwrap_or(usize::MAX); - while records_to_read > 0 && !filenames.is_empty() { - let file = File::open(filenames.pop().unwrap())?; - let mut reader = BufReader::new(file); - let iter = ValueIter::new(&mut reader, None); - let schema = infer_json_schema_from_iterator(iter.take_while(|_| { - let should_take = records_to_read > 0; - records_to_read -= 1; - should_take - }))?; - schemas.push(schema); - } - - Ok(Schema::try_merge(schemas)?) - } -} - -#[async_trait] -impl ExecutionPlan for NdJsonExec { - fn as_any(&self) -> &dyn Any { - self - } - - fn schema(&self) -> SchemaRef { - self.projected_schema.clone() - } - - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(match &self.source { - Source::PartitionedFiles { filenames, .. } => filenames.len(), - Source::Reader(_) => 1, - }) - } - - fn children(&self) -> Vec> { - Vec::new() - } - - fn with_new_children( - &self, - children: Vec>, - ) -> Result> { - if !children.is_empty() { - Err(DataFusionError::Internal(format!( - "Children cannot be replaced in {:?}", - self - ))) - } else if let Source::PartitionedFiles { filenames, path } = &self.source { - Ok(Arc::new(Self { - source: Source::PartitionedFiles { - filenames: filenames.clone(), - path: path.clone(), - }, - schema: self.schema.clone(), - projection: self.projection.clone(), - projected_schema: self.projected_schema.clone(), - batch_size: self.batch_size, - limit: self.limit, - file_extension: self.file_extension.clone(), - })) - } else { - Err(DataFusionError::Internal( - "NdJsonExec with reader source cannot be used with `with_new_children`" - .to_string(), - )) - } - } - - async fn execute( - &self, - partition: usize, - ) -> Result { - let mut builder = json::ReaderBuilder::new() - .with_schema(self.schema.clone()) - .with_batch_size(self.batch_size); - if let Some(proj) = &self.projection { - builder = builder.with_projection( - proj.iter() - .map(|col_idx| self.schema.field(*col_idx).name()) - .cloned() - .collect(), - ); - } - match &self.source { - Source::PartitionedFiles { filenames, .. } => { - let file = File::open(&filenames[partition])?; - - Ok(Box::pin(NdJsonStream::new( - builder.build(file)?, - self.limit, - ))) - } - Source::Reader(rdr) => { - if partition != 0 { - Err(DataFusionError::Internal( - "Only partition 0 is valid when CSV comes from a reader" - .to_string(), - )) - } else if let Some(rdr) = rdr.lock().unwrap().take() { - Ok(Box::pin(NdJsonStream::new(builder.build(rdr)?, self.limit))) - } else { - Err(DataFusionError::Execution( - "Error reading CSV: Data can only be read a single time when the source is a reader" - .to_string(), - )) - } - } - } - } - - fn fmt_as( - &self, - t: DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - match t { - DisplayFormatType::Default => { - write!(f, "NdJsonExec: source={:?}", self.source) - } - } - } - - fn statistics(&self) -> Statistics { - // TODO stats: handle statistics - Statistics::default() - } -} - -struct NdJsonStream { - reader: json::Reader, - remain: Option, -} - -impl NdJsonStream { - fn new(reader: json::Reader, limit: Option) -> Self { - Self { - reader, - remain: limit, - } - } -} - -impl Stream for NdJsonStream { - type Item = ArrowResult; - - fn poll_next( - mut self: Pin<&mut Self>, - _cx: &mut Context<'_>, - ) -> Poll> { - if let Some(remain) = self.remain.as_mut() { - if *remain < 1 { - return Poll::Ready(None); - } - } - - Poll::Ready(match self.reader.next() { - Ok(Some(item)) => { - if let Some(remain) = self.remain.as_mut() { - if *remain >= item.num_rows() { - *remain -= item.num_rows(); - Some(Ok(item)) - } else { - let len = *remain; - *remain = 0; - Some(Ok(RecordBatch::try_new( - item.schema(), - item.columns() - .iter() - .map(|column| column.slice(0, len)) - .collect(), - )?)) - } - } else { - Some(Ok(item)) - } - } - Ok(None) => None, - Err(err) => Some(Err(err)), - }) - } -} - -impl RecordBatchStream for NdJsonStream { - fn schema(&self) -> SchemaRef { - self.reader.schema() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use futures::StreamExt; - - const TEST_DATA_BASE: &str = "tests/jsons"; - - #[tokio::test] - async fn nd_json_exec_file_without_projection() -> Result<()> { - use arrow::datatypes::DataType; - let path = format!("{}/1.json", TEST_DATA_BASE); - let exec = NdJsonExec::try_new(&path, Default::default(), None, 1024, Some(3))?; - let inferred_schema = exec.schema(); - assert_eq!(inferred_schema.fields().len(), 4); - - // a,b,c,d should be inferred - inferred_schema.field_with_name("a").unwrap(); - inferred_schema.field_with_name("b").unwrap(); - inferred_schema.field_with_name("c").unwrap(); - inferred_schema.field_with_name("d").unwrap(); - - assert_eq!( - inferred_schema.field_with_name("a").unwrap().data_type(), - &DataType::Int64 - ); - assert!(matches!( - inferred_schema.field_with_name("b").unwrap().data_type(), - DataType::List(_) - )); - assert_eq!( - inferred_schema.field_with_name("d").unwrap().data_type(), - &DataType::Utf8 - ); - - let mut it = exec.execute(0).await?; - let batch = it.next().await.unwrap()?; - - assert_eq!(batch.num_rows(), 3); - let values = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(values.value(0), 1); - assert_eq!(values.value(1), -10); - assert_eq!(values.value(2), 2); - - Ok(()) - } - - #[tokio::test] - async fn nd_json_exec_file_projection() -> Result<()> { - let path = format!("{}/1.json", TEST_DATA_BASE); - let exec = - NdJsonExec::try_new(&path, Default::default(), Some(vec![0, 2]), 1024, None)?; - let inferred_schema = exec.schema(); - assert_eq!(inferred_schema.fields().len(), 2); - - inferred_schema.field_with_name("a").unwrap(); - inferred_schema.field_with_name("b").unwrap_err(); - inferred_schema.field_with_name("c").unwrap(); - inferred_schema.field_with_name("d").unwrap_err(); - - let mut it = exec.execute(0).await?; - let batch = it.next().await.unwrap()?; - - assert_eq!(batch.num_rows(), 4); - let values = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(values.value(0), 1); - assert_eq!(values.value(1), -10); - assert_eq!(values.value(2), 2); - Ok(()) - } - - #[tokio::test] - async fn nd_json_exec_from_reader() -> Result<()> { - let content = r#"{"a":"aaa", "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"} -{"a":"bbb", "b":[2.0, 1.3, -6.1], "c":[true, true], "d":"4"}"#; - let cur = std::io::Cursor::new(content); - let mut bufrdr = std::io::BufReader::new(cur); - let schema = - arrow::json::reader::infer_json_schema_from_seekable(&mut bufrdr, None)?; - let exec = NdJsonExec::try_new_from_reader( - bufrdr, - NdJsonReadOptions { - schema: Some(Arc::new(schema)), - ..Default::default() - }, - None, - 1024, - Some(1), - )?; - - let mut it = exec.execute(0).await?; - let batch = it.next().await.unwrap()?; - - assert_eq!(batch.num_rows(), 1); - - let values = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(values.value(0), "aaa"); - - Ok(()) - } -} diff --git a/datafusion/src/physical_plan/limit.rs b/datafusion/src/physical_plan/limit.rs index ccd719f32468..bd48e4d2e5d4 100644 --- a/datafusion/src/physical_plan/limit.rs +++ b/datafusion/src/physical_plan/limit.rs @@ -384,9 +384,10 @@ mod tests { use common::collect; use super::*; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; use crate::physical_plan::common; - use crate::physical_plan::csv::{CsvExec, CsvReadOptions}; + use crate::physical_plan::file_format::CsvExec; use crate::test; #[tokio::test] @@ -394,16 +395,20 @@ mod tests { let schema = test::aggr_test_schema(); let num_partitions = 4; - let path = + let (_, files) = test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?; - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + schema, + true, + b',', None, 1024, None, - )?; + ); // input should have 4 partitions assert_eq!(csv.output_partitioning().partition_count(), num_partitions); diff --git a/datafusion/src/physical_plan/mod.rs b/datafusion/src/physical_plan/mod.rs index fef2af58b99d..3accaadce607 100644 --- a/datafusion/src/physical_plan/mod.rs +++ b/datafusion/src/physical_plan/mod.rs @@ -201,7 +201,7 @@ pub trait ExecutionPlan: Debug + Send + Sync { /// let mut ctx = ExecutionContext::with_config(config); /// /// // register the a table -/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).unwrap(); +/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await.unwrap(); /// /// // create a plan to run a SQL query /// let plan = ctx @@ -218,7 +218,7 @@ pub trait ExecutionPlan: Debug + Send + Sync { /// \n CoalesceBatchesExec: target_batch_size=4096\ /// \n FilterExec: a@0 < 5\ /// \n RepartitionExec: partitioning=RoundRobinBatch(3)\ -/// \n CsvExec: source=Path(tests/example.csv: [tests/example.csv]), has_header=true", +/// \n CsvExec: files=[tests/example.csv], has_header=true, batch_size=8192, limit=None", /// plan_string.trim()); /// } /// ``` @@ -606,20 +606,19 @@ pub trait Accumulator: Send + Sync + Debug { pub mod aggregates; pub mod analyze; pub mod array_expressions; -pub mod avro; pub mod coalesce_batches; pub mod coalesce_partitions; pub mod common; pub mod cross_join; #[cfg(feature = "crypto_expressions")] pub mod crypto_expressions; -pub mod csv; pub mod datetime_expressions; pub mod display; pub mod distinct_expressions; pub mod empty; pub mod explain; pub mod expressions; +pub mod file_format; pub mod filter; pub mod functions; pub mod hash_aggregate; @@ -627,12 +626,10 @@ pub mod hash_join; pub mod hash_utils; pub(crate) mod hyperloglog; pub mod join_utils; -pub mod json; pub mod limit; pub mod math_expressions; pub mod memory; pub mod metrics; -pub mod parquet; pub mod planner; pub mod projection; #[cfg(feature = "regex_expressions")] @@ -640,7 +637,6 @@ pub mod regex_expressions; pub mod repartition; pub mod sort; pub mod sort_preserving_merge; -pub mod source; pub mod stream; pub mod string_expressions; pub mod type_coercion; diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 06f3a1ddd961..be8c588bfda5 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -1397,9 +1397,11 @@ fn tuple_err(value: (Result, Result)) -> Result<(T, R)> { #[cfg(test)] mod tests { use super::*; + use crate::datasource::object_store::local::LocalFileSystem; + use crate::execution::options::CsvReadOptions; use crate::logical_plan::{DFField, DFSchema, DFSchemaRef}; use crate::physical_plan::{ - csv::CsvReadOptions, expressions, DisplayFormatType, Partitioning, Statistics, + expressions, DisplayFormatType, Partitioning, Statistics, }; use crate::scalar::ScalarValue; use crate::{ @@ -1429,14 +1431,21 @@ mod tests { let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); - let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)? - // filter clause needs the type coercion rule applied - .filter(col("c7").lt(lit(5_u8)))? - .project(vec![col("c1"), col("c2")])? - .aggregate(vec![col("c1")], vec![sum(col("c2"))])? - .sort(vec![col("c1").sort(true, true)])? - .limit(10)? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + path, + options, + None, + 1, + ) + .await? + // filter clause needs the type coercion rule applied + .filter(col("c7").lt(lit(5_u8)))? + .project(vec![col("c1"), col("c2")])? + .aggregate(vec![col("c1")], vec![sum(col("c2"))])? + .sort(vec![col("c1").sort(true, true)])? + .limit(10)? + .build()?; let plan = plan(&logical_plan).await?; @@ -1474,9 +1483,16 @@ mod tests { let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); - let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)? - .filter(col("c7").lt(col("c12")))? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + path, + options, + None, + 1, + ) + .await? + .filter(col("c7").lt(col("c12")))? + .build()?; let plan = plan(&logical_plan).await?; @@ -1511,8 +1527,15 @@ mod tests { col("c1").like(col("c2")), ]; for case in cases { - let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)? - .project(vec![case.clone()]); + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + &path, + options, + None, + 1, + ) + .await? + .project(vec![case.clone()]); let message = format!( "Expression {:?} expected to error due to impossible coercion", case @@ -1604,11 +1627,18 @@ mod tests { Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))), Expr::Literal(ScalarValue::Int64(Some(1))), ]; - let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)? - // filter clause needs the type coercion rule applied - .filter(col("c12").lt(lit(0.05)))? - .project(vec![col("c1").in_list(list, false)])? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + &path, + options, + None, + 1, + ) + .await? + // filter clause needs the type coercion rule applied + .filter(col("c12").lt(lit(0.05)))? + .project(vec![col("c1").in_list(list, false)])? + .build()?; let execution_plan = plan(&logical_plan).await?; // verify that the plan correctly adds cast from Int64(1) to Utf8 let expected = "InListExpr { expr: Column { name: \"c1\", index: 0 }, list: [Literal { value: Utf8(\"a\") }, CastExpr { expr: Literal { value: Int64(1) }, cast_type: Utf8, cast_options: CastOptions { safe: false } }], negated: false }"; @@ -1619,11 +1649,18 @@ mod tests { Expr::Literal(ScalarValue::Boolean(Some(true))), Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))), ]; - let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)? - // filter clause needs the type coercion rule applied - .filter(col("c12").lt(lit(0.05)))? - .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + &path, + options, + None, + 1, + ) + .await? + // filter clause needs the type coercion rule applied + .filter(col("c12").lt(lit(0.05)))? + .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])? + .build()?; let execution_plan = plan(&logical_plan).await; let expected_error = "Unsupported CAST from Utf8 to Boolean"; @@ -1647,11 +1684,14 @@ mod tests { let options = CsvReadOptions::new().schema_infer_max_records(100); let logical_plan = LogicalPlanBuilder::scan_csv_with_name( - path, + Arc::new(LocalFileSystem {}), + &path, options, None, "aggregate_test_100", - )? + 1, + ) + .await? .aggregate(vec![col("c1")], vec![sum(col("c2"))])? .build()?; @@ -1677,9 +1717,16 @@ mod tests { let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); - let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)? - .aggregate(vec![col("c1")], vec![sum(col("c2"))])? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv( + Arc::new(LocalFileSystem {}), + &path, + options, + None, + 1, + ) + .await? + .aggregate(vec![col("c1")], vec![sum(col("c2"))])? + .build()?; let execution_plan = plan(&logical_plan).await?; let formatted = format!("{:?}", execution_plan); diff --git a/datafusion/src/physical_plan/projection.rs b/datafusion/src/physical_plan/projection.rs index f24726123f9d..794d9a2ec68e 100644 --- a/datafusion/src/physical_plan/projection.rs +++ b/datafusion/src/physical_plan/projection.rs @@ -259,10 +259,11 @@ impl RecordBatchStream for ProjectionStream { mod tests { use super::*; - use crate::physical_plan::csv::{CsvExec, CsvReadOptions}; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::expressions::{self, col}; + use crate::physical_plan::file_format::CsvExec; use crate::scalar::ScalarValue; - use crate::test; + use crate::test::{self, aggr_test_schema}; use futures::future; #[tokio::test] @@ -270,15 +271,20 @@ mod tests { let schema = test::aggr_test_schema(); let partitions = 4; - let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; - - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let (_, files) = + test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; + + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + aggr_test_schema(), + true, + b',', None, 1024, None, - )?; + ); // pick column c1 and name it column c1 in the output schema let projection = ProjectionExec::try_new( diff --git a/datafusion/src/physical_plan/sort.rs b/datafusion/src/physical_plan/sort.rs index 303255651194..68a42585b6c7 100644 --- a/datafusion/src/physical_plan/sort.rs +++ b/datafusion/src/physical_plan/sort.rs @@ -318,15 +318,12 @@ mod tests { use std::sync::Weak; use super::*; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; use crate::physical_plan::expressions::col; use crate::physical_plan::memory::MemoryExec; - use crate::physical_plan::{ - collect, - csv::{CsvExec, CsvReadOptions}, - }; - use crate::test; - use crate::test::exec::BlockingExec; + use crate::physical_plan::{collect, file_format::CsvExec}; + use crate::test::{self, aggr_test_schema, exec::BlockingExec}; use arrow::array::*; use arrow::datatypes::*; use futures::FutureExt; @@ -335,14 +332,20 @@ mod tests { async fn test_sort() -> Result<()> { let schema = test::aggr_test_schema(); let partitions = 4; - let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let (_, files) = + test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; + + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + aggr_test_schema(), + true, + b',', None, 1024, None, - )?; + ); let sort_exec = Arc::new(SortExec::try_new( vec![ diff --git a/datafusion/src/physical_plan/sort_preserving_merge.rs b/datafusion/src/physical_plan/sort_preserving_merge.rs index f63695057a7d..f65faccc3be9 100644 --- a/datafusion/src/physical_plan/sort_preserving_merge.rs +++ b/datafusion/src/physical_plan/sort_preserving_merge.rs @@ -642,15 +642,15 @@ impl RecordBatchStream for SortPreservingMergeStream { #[cfg(test)] mod tests { + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::metrics::MetricValue; use std::iter::FromIterator; use crate::arrow::array::{Int32Array, StringArray, TimestampNanosecondArray}; use crate::assert_batches_eq; - use crate::datasource::CsvReadOptions; use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; - use crate::physical_plan::csv::CsvExec; use crate::physical_plan::expressions::col; + use crate::physical_plan::file_format::CsvExec; use crate::physical_plan::memory::MemoryExec; use crate::physical_plan::sort::SortExec; use crate::physical_plan::{collect, common}; @@ -914,18 +914,20 @@ mod tests { async fn test_partition_sort() { let schema = test::aggr_test_schema(); let partitions = 4; - let path = + let (_, files) = test::create_partitioned_csv("aggregate_test_100.csv", partitions).unwrap(); - let csv = Arc::new( - CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), - None, - 1024, - None, - ) - .unwrap(), - ); + + let csv = Arc::new(CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + Arc::clone(&schema), + true, + b',', + None, + 1024, + None, + )); let sort = vec![ PhysicalSortExpr { @@ -984,18 +986,20 @@ mod tests { ) -> Arc { let schema = test::aggr_test_schema(); let partitions = 4; - let path = + let (_, files) = test::create_partitioned_csv("aggregate_test_100.csv", partitions).unwrap(); - let csv = Arc::new( - CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), - None, - 1024, - None, - ) - .unwrap(), - ); + + let csv = Arc::new(CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + schema, + true, + b',', + None, + 1024, + None, + )); let sorted = basic_sort(csv, sort).await; let split: Vec<_> = sizes.iter().map(|x| split_batch(&sorted, *x)).collect(); diff --git a/datafusion/src/physical_plan/source.rs b/datafusion/src/physical_plan/source.rs deleted file mode 100644 index 32fa9c37c8a2..000000000000 --- a/datafusion/src/physical_plan/source.rs +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Contains a `Source` enum represents where the data comes from. - -use std::{io::Read, sync::Mutex}; - -/// Source represents where the data comes from. -pub(crate) enum Source> { - /// The data comes from partitioned files - PartitionedFiles { - /// Path to directory containing partitioned files with the same schema - path: String, - /// The individual files under path - filenames: Vec, - }, - - /// The data comes from anything impl Read trait - Reader(Mutex>), -} - -impl std::fmt::Debug for Source { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Source::PartitionedFiles { path, filenames } => f - .debug_struct("PartitionedFiles") - .field("path", path) - .field("filenames", filenames) - .finish()?, - Source::Reader(_) => f.write_str("Reader")?, - }; - Ok(()) - } -} -impl std::fmt::Display for Source { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Source::PartitionedFiles { path, filenames } => { - write!(f, "Path({}: [{}])", path, filenames.join(",")) - } - Source::Reader(_) => { - write!(f, "Reader(...)") - } - } - } -} - -impl Clone for Source { - fn clone(&self) -> Self { - match self { - Source::PartitionedFiles { path, filenames } => Self::PartitionedFiles { - path: path.clone(), - filenames: filenames.clone(), - }, - Source::Reader(_) => Self::Reader(Mutex::new(None)), - } - } -} - -impl Source { - /// Path to directory containing partitioned files with the same schema - pub fn path(&self) -> &str { - match self { - Source::PartitionedFiles { path, .. } => path.as_str(), - Source::Reader(_) => "", - } - } - - /// The individual files under path - pub fn filenames(&self) -> &[String] { - match self { - Source::PartitionedFiles { filenames, .. } => filenames, - Source::Reader(_) => &[], - } - } -} diff --git a/datafusion/src/physical_plan/union.rs b/datafusion/src/physical_plan/union.rs index a2f5952b8090..43e23850b19e 100644 --- a/datafusion/src/physical_plan/union.rs +++ b/datafusion/src/physical_plan/union.rs @@ -218,12 +218,10 @@ fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { #[cfg(test)] mod tests { use super::*; + use crate::datasource::object_store::{local::LocalFileSystem, ObjectStore}; use crate::test; use crate::{ - physical_plan::{ - collect, - csv::{CsvExec, CsvReadOptions}, - }, + physical_plan::{collect, file_format::CsvExec}, scalar::ScalarValue, }; use arrow::record_batch::RecordBatch; @@ -231,26 +229,35 @@ mod tests { #[tokio::test] async fn test_union_partitions() -> Result<()> { let schema = test::aggr_test_schema(); + let fs: Arc = Arc::new(LocalFileSystem {}); // Create csv's with different partitioning - let path = test::create_partitioned_csv("aggregate_test_100.csv", 4)?; - let path2 = test::create_partitioned_csv("aggregate_test_100.csv", 5)?; - - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let (_, files) = test::create_partitioned_csv("aggregate_test_100.csv", 4)?; + let (_, files2) = test::create_partitioned_csv("aggregate_test_100.csv", 5)?; + + let csv = CsvExec::new( + Arc::clone(&fs), + files, + Statistics::default(), + Arc::clone(&schema), + true, + b',', None, 1024, None, - )?; - - let csv2 = CsvExec::try_new( - &path2, - CsvReadOptions::new().schema(&schema), + ); + + let csv2 = CsvExec::new( + Arc::clone(&fs), + files2, + Statistics::default(), + schema, + true, + b',', None, 1024, None, - )?; + ); let union_exec = Arc::new(UnionExec::new(vec![Arc::new(csv), Arc::new(csv2)])); diff --git a/datafusion/src/physical_plan/windows/mod.rs b/datafusion/src/physical_plan/windows/mod.rs index 0f6d9105fae2..3aa67cf28ba4 100644 --- a/datafusion/src/physical_plan/windows/mod.rs +++ b/datafusion/src/physical_plan/windows/mod.rs @@ -175,25 +175,31 @@ pub(crate) fn find_ranges_in_range<'a>( #[cfg(test)] mod tests { use super::*; + use crate::datasource::object_store::local::LocalFileSystem; use crate::physical_plan::aggregates::AggregateFunction; - use crate::physical_plan::collect; - use crate::physical_plan::csv::{CsvExec, CsvReadOptions}; use crate::physical_plan::expressions::col; - use crate::test; + use crate::physical_plan::file_format::CsvExec; + use crate::physical_plan::{collect, Statistics}; + use crate::test::{self, aggr_test_schema}; use arrow::array::*; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; fn create_test_schema(partitions: usize) -> Result<(Arc, SchemaRef)> { let schema = test::aggr_test_schema(); - let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; - let csv = CsvExec::try_new( - &path, - CsvReadOptions::new().schema(&schema), + let (_, files) = + test::create_partitioned_csv("aggregate_test_100.csv", partitions)?; + let csv = CsvExec::new( + Arc::new(LocalFileSystem {}), + files, + Statistics::default(), + aggr_test_schema(), + true, + b',', None, 1024, None, - )?; + ); let input = Arc::new(csv); Ok((input, schema)) diff --git a/datafusion/src/prelude.rs b/datafusion/src/prelude.rs index 02b9d4f3419e..8e47ed60ea2b 100644 --- a/datafusion/src/prelude.rs +++ b/datafusion/src/prelude.rs @@ -27,6 +27,8 @@ pub use crate::dataframe::DataFrame; pub use crate::execution::context::{ExecutionConfig, ExecutionContext}; +pub use crate::execution::options::AvroReadOptions; +pub use crate::execution::options::{CsvReadOptions, NdJsonReadOptions}; pub use crate::logical_plan::{ array, ascii, avg, bit_length, btrim, character_length, chr, col, concat, concat_ws, count, create_udf, date_part, date_trunc, digest, in_list, initcap, left, length, @@ -35,4 +37,3 @@ pub use crate::logical_plan::{ split_part, starts_with, strpos, substr, sum, to_hex, translate, trim, upper, Column, JoinType, Partitioning, }; -pub use crate::physical_plan::csv::CsvReadOptions; diff --git a/datafusion/src/test/mod.rs b/datafusion/src/test/mod.rs index e9a33745eeeb..917e7b1b3a50 100644 --- a/datafusion/src/test/mod.rs +++ b/datafusion/src/test/mod.rs @@ -17,7 +17,8 @@ //! Common unit test utility methods -use crate::datasource::{MemTable, TableProvider}; +use crate::datasource::object_store::local::local_file_meta; +use crate::datasource::{MemTable, PartitionedFile, TableProvider}; use crate::error::Result; use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder}; use array::{ @@ -51,19 +52,24 @@ pub fn create_table_dual() -> Arc { } /// Generated partitioned copy of a CSV file -pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result { +pub fn create_partitioned_csv( + filename: &str, + partitions: usize, +) -> Result<(String, Vec)> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/{}", testdata, filename); let tmp_dir = TempDir::new()?; let mut writers = vec![]; + let mut files = vec![]; for i in 0..partitions { let filename = format!("partition-{}.csv", i); let filename = tmp_dir.path().join(&filename); let writer = BufWriter::new(File::create(&filename).unwrap()); writers.push(writer); + files.push(filename); } let f = File::open(&path)?; @@ -88,7 +94,15 @@ pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result TestOutput { println!("Planning sql {}", sql); - let logical_plan = self.ctx.sql(sql).expect("planning").to_logical_plan(); + let logical_plan = self.ctx.sql(sql).await.expect("planning").to_logical_plan(); self.run_test(logical_plan, sql).await } @@ -523,6 +523,7 @@ impl ContextWithParquet { let input = self .ctx .sql("SELECT * from t") + .await .expect("planning") .collect() .await diff --git a/datafusion/tests/provider_filter_pushdown.rs b/datafusion/tests/provider_filter_pushdown.rs index 653b96c39320..f1655c5267b3 100644 --- a/datafusion/tests/provider_filter_pushdown.rs +++ b/datafusion/tests/provider_filter_pushdown.rs @@ -173,7 +173,8 @@ async fn assert_provider_row_count(value: i64, expected_count: u64) -> Result<() ctx.register_table("data", Arc::new(provider))?; let sql_results = ctx - .sql(&format!("select count(*) from data where flag = {}", value))? + .sql(&format!("select count(*) from data where flag = {}", value)) + .await? .collect() .await?; diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 6c85f35d6855..f52920575afb 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -35,17 +35,12 @@ use arrow::{ use datafusion::assert_batches_eq; use datafusion::assert_batches_sorted_eq; use datafusion::logical_plan::LogicalPlan; -#[cfg(feature = "avro")] -use datafusion::physical_plan::avro::AvroReadOptions; use datafusion::physical_plan::functions::Volatility; use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::ExecutionPlanVisitor; use datafusion::prelude::*; -use datafusion::{ - datasource::{csv::CsvReadOptions, MemTable}, - physical_plan::collect, -}; +use datafusion::{datasource::MemTable, physical_plan::collect}; use datafusion::{ error::{DataFusionError, Result}, physical_plan::ColumnarValue, @@ -124,7 +119,8 @@ async fn nyc() -> Result<()> { "tripdata", "file.csv", CsvReadOptions::new().schema(&schema), - )?; + ) + .await?; let logical_plan = ctx.create_logical_plan( "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount) \ @@ -157,7 +153,7 @@ async fn nyc() -> Result<()> { #[tokio::test] async fn parquet_query() { let mut ctx = ExecutionContext::new(); - register_alltypes_parquet(&mut ctx); + register_alltypes_parquet(&mut ctx).await; // NOTE that string_col is actually a binary column and does not have the UTF8 logical type // so we need an explicit cast let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain"; @@ -185,6 +181,7 @@ async fn parquet_single_nan_schema() { let mut ctx = ExecutionContext::new(); let testdata = datafusion::test_util::parquet_test_data(); ctx.register_parquet("single_nan", &format!("{}/single_nan.parquet", testdata)) + .await .unwrap(); let sql = "SELECT mycol FROM single_nan"; let plan = ctx.create_logical_plan(sql).unwrap(); @@ -206,6 +203,7 @@ async fn parquet_list_columns() { "list_columns", &format!("{}/list_columns.parquet", testdata), ) + .await .unwrap(); let schema = Arc::new(Schema::new(vec![ @@ -299,7 +297,7 @@ async fn parquet_list_columns() { #[tokio::test] async fn csv_select_nested() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT o1, o2, c3 FROM ( SELECT c1 AS o1, c2 + 1 AS o2, c3 @@ -331,7 +329,7 @@ async fn csv_select_nested() -> Result<()> { #[tokio::test] async fn csv_count_star() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT COUNT(*), COUNT(1) AS c, COUNT(c1) FROM aggregate_test_100"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -348,7 +346,7 @@ async fn csv_count_star() -> Result<()> { #[tokio::test] async fn csv_query_with_predicate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, c12 FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -366,7 +364,7 @@ async fn csv_query_with_predicate() -> Result<()> { #[tokio::test] async fn csv_query_with_negative_predicate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, c4 FROM aggregate_test_100 WHERE c3 < -55 AND -c4 > 30000"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -384,7 +382,7 @@ async fn csv_query_with_negative_predicate() -> Result<()> { #[tokio::test] async fn csv_query_with_negated_predicate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE NOT(c1 != 'a')"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -401,7 +399,7 @@ async fn csv_query_with_negated_predicate() -> Result<()> { #[tokio::test] async fn csv_query_with_is_not_null_predicate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NOT NULL"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -418,7 +416,7 @@ async fn csv_query_with_is_not_null_predicate() -> Result<()> { #[tokio::test] async fn csv_query_with_is_null_predicate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NULL"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -435,7 +433,7 @@ async fn csv_query_with_is_null_predicate() -> Result<()> { #[tokio::test] async fn csv_query_group_by_int_min_max() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c2, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c2"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -456,7 +454,7 @@ async fn csv_query_group_by_int_min_max() -> Result<()> { #[tokio::test] async fn csv_query_group_by_float32() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx)?; + register_aggregate_simple_csv(&mut ctx).await?; let sql = "SELECT COUNT(*) as cnt, c1 FROM aggregate_simple GROUP BY c1 ORDER BY cnt DESC"; @@ -481,7 +479,7 @@ async fn csv_query_group_by_float32() -> Result<()> { #[tokio::test] async fn select_all() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx)?; + register_aggregate_simple_csv(&mut ctx).await?; let sql = "SELECT c1 FROM aggregate_simple order by c1"; let actual_no_all = execute(&mut ctx, sql).await; @@ -497,7 +495,7 @@ async fn select_all() -> Result<()> { #[tokio::test] async fn select_distinct() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx)?; + register_aggregate_simple_csv(&mut ctx).await?; let sql = "SELECT DISTINCT * FROM aggregate_simple"; let mut actual = execute(&mut ctx, sql).await; @@ -514,7 +512,7 @@ async fn select_distinct() -> Result<()> { #[tokio::test] async fn select_distinct_simple_1() { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx).unwrap(); + register_aggregate_simple_csv(&mut ctx).await.unwrap(); let sql = "SELECT DISTINCT c1 FROM aggregate_simple order by c1"; let actual = execute_to_batches(&mut ctx, sql).await; @@ -536,7 +534,7 @@ async fn select_distinct_simple_1() { #[tokio::test] async fn select_distinct_simple_2() { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx).unwrap(); + register_aggregate_simple_csv(&mut ctx).await.unwrap(); let sql = "SELECT DISTINCT c1, c2 FROM aggregate_simple order by c1"; let actual = execute_to_batches(&mut ctx, sql).await; @@ -558,7 +556,7 @@ async fn select_distinct_simple_2() { #[tokio::test] async fn select_distinct_simple_3() { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx).unwrap(); + register_aggregate_simple_csv(&mut ctx).await.unwrap(); let sql = "SELECT distinct c3 FROM aggregate_simple order by c3"; let actual = execute_to_batches(&mut ctx, sql).await; @@ -577,7 +575,7 @@ async fn select_distinct_simple_3() { #[tokio::test] async fn select_distinct_simple_4() { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx).unwrap(); + register_aggregate_simple_csv(&mut ctx).await.unwrap(); let sql = "SELECT distinct c1+c2 as a FROM aggregate_simple"; let actual = execute_to_batches(&mut ctx, sql).await; @@ -612,7 +610,7 @@ async fn projection_same_fields() -> Result<()> { #[tokio::test] async fn csv_query_group_by_float64() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx)?; + register_aggregate_simple_csv(&mut ctx).await?; let sql = "SELECT COUNT(*) as cnt, c2 FROM aggregate_simple GROUP BY c2 ORDER BY cnt DESC"; @@ -637,7 +635,7 @@ async fn csv_query_group_by_float64() -> Result<()> { #[tokio::test] async fn csv_query_group_by_boolean() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_simple_csv(&mut ctx)?; + register_aggregate_simple_csv(&mut ctx).await?; let sql = "SELECT COUNT(*) as cnt, c3 FROM aggregate_simple GROUP BY c3 ORDER BY cnt DESC"; @@ -659,7 +657,7 @@ async fn csv_query_group_by_boolean() -> Result<()> { #[tokio::test] async fn csv_query_group_by_two_columns() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, c2, MIN(c3) FROM aggregate_test_100 GROUP BY c1, c2"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -700,7 +698,7 @@ async fn csv_query_group_by_two_columns() -> Result<()> { #[tokio::test] async fn csv_query_group_by_and_having() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, MIN(c3) AS m FROM aggregate_test_100 GROUP BY c1 HAVING m < -100 AND MAX(c3) > 70"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -718,7 +716,7 @@ async fn csv_query_group_by_and_having() -> Result<()> { #[tokio::test] async fn csv_query_group_by_and_having_and_where() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, MIN(c3) AS m FROM aggregate_test_100 WHERE c1 IN ('a', 'b') @@ -739,7 +737,7 @@ async fn csv_query_group_by_and_having_and_where() -> Result<()> { #[tokio::test] async fn all_where_empty() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT * FROM aggregate_test_100 WHERE 1=2"; @@ -752,7 +750,7 @@ async fn all_where_empty() -> Result<()> { #[tokio::test] async fn csv_query_having_without_group_by() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 HAVING c2 >= 4 AND c3 > 90"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -773,7 +771,7 @@ async fn csv_query_having_without_group_by() -> Result<()> { #[tokio::test] async fn csv_query_avg_sqrt() -> Result<()> { let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT avg(custom_sqrt(c12)) FROM aggregate_test_100"; let mut actual = execute(&mut ctx, sql).await; actual.sort(); @@ -788,7 +786,7 @@ async fn csv_query_avg_sqrt() -> Result<()> { #[tokio::test] async fn csv_query_custom_udf_with_cast() -> Result<()> { let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT avg(custom_sqrt(c11)) FROM aggregate_test_100"; let actual = execute(&mut ctx, sql).await; let expected = vec![vec!["0.6584408483418833"]]; @@ -800,7 +798,7 @@ async fn csv_query_custom_udf_with_cast() -> Result<()> { #[tokio::test] async fn sqrt_f32_vs_f64() -> Result<()> { let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; // sqrt(f32)'s plan passes let sql = "SELECT avg(sqrt(c11)) FROM aggregate_test_100"; let actual = execute(&mut ctx, sql).await; @@ -818,7 +816,7 @@ async fn sqrt_f32_vs_f64() -> Result<()> { async fn csv_query_error() -> Result<()> { // sin(utf8) should error let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT sin(c1) FROM aggregate_test_100"; let plan = ctx.create_logical_plan(sql); assert!(plan.is_err()); @@ -829,7 +827,7 @@ async fn csv_query_error() -> Result<()> { #[tokio::test] async fn csv_query_sqrt_sqrt() -> Result<()> { let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT sqrt(sqrt(c12)) FROM aggregate_test_100 LIMIT 1"; let actual = execute(&mut ctx, sql).await; // sqrt(sqrt(c12=0.9294097332465232)) = 0.9818650561397431 @@ -872,7 +870,7 @@ fn custom_sqrt(args: &[ColumnarValue]) -> Result { #[tokio::test] async fn csv_query_avg() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT avg(c12) FROM aggregate_test_100"; let mut actual = execute(&mut ctx, sql).await; actual.sort(); @@ -884,7 +882,7 @@ async fn csv_query_avg() -> Result<()> { #[tokio::test] async fn csv_query_group_by_avg() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, avg(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -905,7 +903,7 @@ async fn csv_query_group_by_avg() -> Result<()> { #[tokio::test] async fn csv_query_group_by_avg_with_projection() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT avg(c12), c1 FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -926,7 +924,7 @@ async fn csv_query_group_by_avg_with_projection() -> Result<()> { #[tokio::test] async fn csv_query_avg_multi_batch() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT avg(c12) FROM aggregate_test_100"; let plan = ctx.create_logical_plan(sql).unwrap(); let plan = ctx.optimize(&plan).unwrap(); @@ -946,7 +944,7 @@ async fn csv_query_avg_multi_batch() -> Result<()> { #[tokio::test] async fn csv_query_nullif_divide_by_0() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c8/nullif(c7, 0) FROM aggregate_test_100"; let actual = execute(&mut ctx, sql).await; let actual = &actual[80..90]; // We just want to compare rows 80-89 @@ -969,7 +967,7 @@ async fn csv_query_nullif_divide_by_0() -> Result<()> { #[tokio::test] async fn csv_query_count() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT count(c12) FROM aggregate_test_100"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -986,7 +984,7 @@ async fn csv_query_count() -> Result<()> { #[tokio::test] async fn csv_query_approx_count() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -1004,7 +1002,7 @@ async fn csv_query_approx_count() -> Result<()> { #[tokio::test] async fn csv_query_window_with_empty_over() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "select \ c9, \ count(c5) over (), \ @@ -1033,7 +1031,7 @@ async fn csv_query_window_with_empty_over() -> Result<()> { #[tokio::test] async fn csv_query_window_with_partition_by() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "select \ c9, \ sum(cast(c4 as Int)) over (partition by c3), \ @@ -1063,7 +1061,7 @@ async fn csv_query_window_with_partition_by() -> Result<()> { #[tokio::test] async fn csv_query_window_with_order_by() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "select \ c9, \ sum(c5) over (order by c9), \ @@ -1096,7 +1094,7 @@ async fn csv_query_window_with_order_by() -> Result<()> { #[tokio::test] async fn csv_query_window_with_partition_by_order_by() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "select \ c9, \ sum(c5) over (partition by c4 order by c9), \ @@ -1129,7 +1127,7 @@ async fn csv_query_window_with_partition_by_order_by() -> Result<()> { #[tokio::test] async fn csv_query_group_by_int_count() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, count(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -1150,7 +1148,7 @@ async fn csv_query_group_by_int_count() -> Result<()> { #[tokio::test] async fn csv_query_group_with_aliased_aggregate() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, count(c12) AS count FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -1171,7 +1169,7 @@ async fn csv_query_group_with_aliased_aggregate() -> Result<()> { #[tokio::test] async fn csv_query_group_by_string_min_max() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ @@ -1194,7 +1192,7 @@ async fn csv_query_group_by_string_min_max() -> Result<()> { #[tokio::test] async fn csv_query_cast() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT CAST(c12 AS float) FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4"; let actual = execute(&mut ctx, sql).await; let expected = vec![vec!["0.39144436569161134"], vec!["0.38870280983958583"]]; @@ -1205,7 +1203,7 @@ async fn csv_query_cast() -> Result<()> { #[tokio::test] async fn csv_query_cast_literal() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c12, CAST(1 AS float) FROM aggregate_test_100 WHERE c12 > CAST(0 AS float) LIMIT 2"; let actual = execute(&mut ctx, sql).await; @@ -1412,7 +1410,7 @@ async fn union_all() -> Result<()> { #[tokio::test] async fn csv_union_all() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1 FROM aggregate_test_100 UNION ALL SELECT c1 FROM aggregate_test_100"; let actual = execute(&mut ctx, sql).await; @@ -1423,7 +1421,7 @@ async fn csv_union_all() -> Result<()> { #[tokio::test] async fn csv_query_limit() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 2"; let actual = execute(&mut ctx, sql).await; let expected = vec![vec!["c"], vec!["d"]]; @@ -1434,7 +1432,7 @@ async fn csv_query_limit() -> Result<()> { #[tokio::test] async fn csv_query_limit_bigger_than_nbr_of_rows() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 200"; let actual = execute(&mut ctx, sql).await; let expected = vec![ @@ -1546,7 +1544,7 @@ async fn csv_query_limit_bigger_than_nbr_of_rows() -> Result<()> { #[tokio::test] async fn csv_query_limit_with_same_nbr_of_rows() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 100"; let actual = execute(&mut ctx, sql).await; let expected = vec![ @@ -1658,7 +1656,7 @@ async fn csv_query_limit_with_same_nbr_of_rows() -> Result<()> { #[tokio::test] async fn csv_query_limit_zero() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 0"; let actual = execute(&mut ctx, sql).await; let expected: Vec> = vec![]; @@ -2445,14 +2443,14 @@ async fn csv_explain() { "logical_plan", "Projection: #aggregate_test_100.c1\ \n Filter: #aggregate_test_100.c2 > Int64(10)\ - \n TableScan: aggregate_test_100 projection=Some([0, 1])" + \n TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]" ], vec!["physical_plan", "ProjectionExec: expr=[c1@0 as c1]\ \n CoalesceBatchesExec: target_batch_size=4096\ \n FilterExec: CAST(c2@1 AS Int64) > 10\ \n RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES)\ - \n CsvExec: source=Path(ARROW_TEST_DATA/csv/aggregate_test_100.csv: [ARROW_TEST_DATA/csv/aggregate_test_100.csv]), has_header=true\ + \n CsvExec: files=[ARROW_TEST_DATA/csv/aggregate_test_100.csv], has_header=true, batch_size=8192, limit=None\ \n" ]]; assert_eq!(expected, actual); @@ -2756,7 +2754,7 @@ async fn csv_explain_plans() { "Explain [plan_type:Utf8, plan:Utf8]", " Projection: #aggregate_test_100.c1 [c1:Utf8]", " Filter: #aggregate_test_100.c2 > Int64(10) [c1:Utf8, c2:Int32]", - " TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]", + " TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)] [c1:Utf8, c2:Int32]", ]; let formatted = plan.display_indent_schema().to_string(); let actual: Vec<&str> = formatted.trim().lines().collect(); @@ -2771,7 +2769,7 @@ async fn csv_explain_plans() { "Explain", " Projection: #aggregate_test_100.c1", " Filter: #aggregate_test_100.c2 > Int64(10)", - " TableScan: aggregate_test_100 projection=Some([0, 1])", + " TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]", ]; let formatted = plan.display_indent().to_string(); let actual: Vec<&str> = formatted.trim().lines().collect(); @@ -2793,7 +2791,7 @@ async fn csv_explain_plans() { " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", " 4[shape=box label=\"Filter: #aggregate_test_100.c2 > Int64(10)\"]", " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]\"]", " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", " }", " subgraph cluster_6", @@ -2804,7 +2802,7 @@ async fn csv_explain_plans() { " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", " 9[shape=box label=\"Filter: #aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]", " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]\\nSchema: [c1:Utf8, c2:Int32]\"]", " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", " }", "}", @@ -2953,7 +2951,7 @@ async fn csv_explain_verbose_plans() { "Explain [plan_type:Utf8, plan:Utf8]", " Projection: #aggregate_test_100.c1 [c1:Utf8]", " Filter: #aggregate_test_100.c2 > Int64(10) [c1:Utf8, c2:Int32]", - " TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]", + " TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)] [c1:Utf8, c2:Int32]", ]; let formatted = plan.display_indent_schema().to_string(); let actual: Vec<&str> = formatted.trim().lines().collect(); @@ -2968,7 +2966,7 @@ async fn csv_explain_verbose_plans() { "Explain", " Projection: #aggregate_test_100.c1", " Filter: #aggregate_test_100.c2 > Int64(10)", - " TableScan: aggregate_test_100 projection=Some([0, 1])", + " TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]", ]; let formatted = plan.display_indent().to_string(); let actual: Vec<&str> = formatted.trim().lines().collect(); @@ -2990,7 +2988,7 @@ async fn csv_explain_verbose_plans() { " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", " 4[shape=box label=\"Filter: #aggregate_test_100.c2 > Int64(10)\"]", " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", - " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]\"]", " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", " }", " subgraph cluster_6", @@ -3001,7 +2999,7 @@ async fn csv_explain_verbose_plans() { " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", " 9[shape=box label=\"Filter: #aggregate_test_100.c2 > Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]", " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", - " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1]), filters=[#aggregate_test_100.c2 > Int64(10)]\\nSchema: [c1:Utf8, c2:Int32]\"]", " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", " }", "}", @@ -3040,7 +3038,7 @@ async fn explain_analyze_runs_optimizers() { // repro for https://github.com/apache/arrow-datafusion/issues/917 // where EXPLAIN ANALYZE was not correctly running optiimizer let mut ctx = ExecutionContext::new(); - register_alltypes_parquet(&mut ctx); + register_alltypes_parquet(&mut ctx).await; // This happens as an optimization pass where count(*) can be // answered using statistics only. @@ -3105,6 +3103,7 @@ async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) { ", testdata )) + .await .expect("Creating dataframe for CREATE EXTERNAL TABLE"); // Mimic the CLI and execute the resulting plan -- even though it @@ -3116,18 +3115,19 @@ async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) { ); } -fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> { +async fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> { let testdata = datafusion::test_util::arrow_test_data(); let schema = aggr_test_schema(); ctx.register_csv( "aggregate_test_100", &format!("{}/csv/aggregate_test_100.csv", testdata), CsvReadOptions::new().schema(&schema), - )?; + ) + .await?; Ok(()) } -fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> { +async fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> { // It's not possible to use aggregate_test_100, not enought similar values to test grouping on floats let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Float32, false), @@ -3139,27 +3139,30 @@ fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> { "aggregate_simple", "tests/aggregate_simple.csv", CsvReadOptions::new().schema(&schema), - )?; + ) + .await?; Ok(()) } -fn register_alltypes_parquet(ctx: &mut ExecutionContext) { +async fn register_alltypes_parquet(ctx: &mut ExecutionContext) { let testdata = datafusion::test_util::parquet_test_data(); ctx.register_parquet( "alltypes_plain", &format!("{}/alltypes_plain.parquet", testdata), ) + .await .unwrap(); } #[cfg(feature = "avro")] -fn register_alltypes_avro(ctx: &mut ExecutionContext) { +async fn register_alltypes_avro(ctx: &mut ExecutionContext) { let testdata = datafusion::test_util::arrow_test_data(); ctx.register_avro( "alltypes_plain", &format!("{}/avro/alltypes_plain.avro", testdata), AvroReadOptions::default(), ) + .await .unwrap(); } @@ -3892,7 +3895,7 @@ where #[tokio::test] async fn csv_between_expr() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 BETWEEN 0.995 AND 1.0"; let mut actual = execute(&mut ctx, sql).await; actual.sort(); @@ -3904,7 +3907,7 @@ async fn csv_between_expr() -> Result<()> { #[tokio::test] async fn csv_between_expr_negated() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 NOT BETWEEN 0 AND 0.995"; let mut actual = execute(&mut ctx, sql).await; actual.sort(); @@ -4550,7 +4553,7 @@ async fn inner_join_nulls() { #[tokio::test] async fn qualified_table_references() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; for table_ref in &[ "aggregate_test_100", @@ -4567,7 +4570,7 @@ async fn qualified_table_references() -> Result<()> { #[tokio::test] async fn invalid_qualified_table_references() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; for table_ref in &[ "nonexistentschema.aggregate_test_100", @@ -4575,7 +4578,7 @@ async fn invalid_qualified_table_references() -> Result<()> { "way.too.many.namespaces.as.ident.prefixes.aggregate_test_100", ] { let sql = format!("SELECT COUNT(*) FROM {}", table_ref); - assert!(matches!(ctx.sql(&sql), Err(DataFusionError::Plan(_)))); + assert!(matches!(ctx.sql(&sql).await, Err(DataFusionError::Plan(_)))); } Ok(()) } @@ -4651,7 +4654,7 @@ async fn test_random_expression() -> Result<()> { async fn test_cast_expressions_error() -> Result<()> { // sin(utf8) should error let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT CAST(c1 AS INT) FROM aggregate_test_100"; let plan = ctx.create_logical_plan(sql).unwrap(); let plan = ctx.optimize(&plan).unwrap(); @@ -4675,7 +4678,7 @@ async fn test_physical_plan_display_indent() { // Hard code target_partitions as it appears in the RepartitionExec output let config = ExecutionConfig::new().with_target_partitions(3); let mut ctx = ExecutionContext::with_config(config); - register_aggregate_csv(&mut ctx).unwrap(); + register_aggregate_csv(&mut ctx).await.unwrap(); let sql = "SELECT c1, MAX(c12), MIN(c12) as the_min \ FROM aggregate_test_100 \ WHERE c12 < 10 \ @@ -4698,7 +4701,7 @@ async fn test_physical_plan_display_indent() { " CoalesceBatchesExec: target_batch_size=4096", " FilterExec: c12@1 < CAST(10 AS Float64)", " RepartitionExec: partitioning=RoundRobinBatch(3)", - " CsvExec: source=Path(ARROW_TEST_DATA/csv/aggregate_test_100.csv: [ARROW_TEST_DATA/csv/aggregate_test_100.csv]), has_header=true", + " CsvExec: files=[ARROW_TEST_DATA/csv/aggregate_test_100.csv], has_header=true, batch_size=8192, limit=None", ]; let data_path = datafusion::test_util::arrow_test_data(); @@ -4722,7 +4725,7 @@ async fn test_physical_plan_display_indent_multi_children() { let config = ExecutionConfig::new().with_target_partitions(3); let mut ctx = ExecutionContext::with_config(config); // ensure indenting works for nodes with multiple children - register_aggregate_csv(&mut ctx).unwrap(); + register_aggregate_csv(&mut ctx).await.unwrap(); let sql = "SELECT c1 \ FROM (select c1 from aggregate_test_100) AS a \ JOIN\ @@ -4743,13 +4746,13 @@ async fn test_physical_plan_display_indent_multi_children() { " ProjectionExec: expr=[c1@0 as c1]", " ProjectionExec: expr=[c1@0 as c1]", " RepartitionExec: partitioning=RoundRobinBatch(3)", - " CsvExec: source=Path(ARROW_TEST_DATA/csv/aggregate_test_100.csv: [ARROW_TEST_DATA/csv/aggregate_test_100.csv]), has_header=true", + " CsvExec: files=[ARROW_TEST_DATA/csv/aggregate_test_100.csv], has_header=true, batch_size=8192, limit=None", " CoalesceBatchesExec: target_batch_size=4096", " RepartitionExec: partitioning=Hash([Column { name: \"c2\", index: 0 }], 3)", " ProjectionExec: expr=[c2@0 as c2]", " ProjectionExec: expr=[c1@0 as c2]", " RepartitionExec: partitioning=RoundRobinBatch(3)", - " CsvExec: source=Path(ARROW_TEST_DATA/csv/aggregate_test_100.csv: [ARROW_TEST_DATA/csv/aggregate_test_100.csv]), has_header=true", + " CsvExec: files=[ARROW_TEST_DATA/csv/aggregate_test_100.csv], has_header=true, batch_size=8192, limit=None", ]; let data_path = datafusion::test_util::arrow_test_data(); @@ -4770,7 +4773,7 @@ async fn test_physical_plan_display_indent_multi_children() { #[tokio::test] async fn test_aggregation_with_bad_arguments() -> Result<()> { let mut ctx = ExecutionContext::new(); - register_aggregate_csv(&mut ctx)?; + register_aggregate_csv(&mut ctx).await?; let sql = "SELECT COUNT(DISTINCT) FROM aggregate_test_100"; let logical_plan = ctx.create_logical_plan(sql)?; let physical_plan = ctx.create_physical_plan(&logical_plan).await; @@ -5001,7 +5004,7 @@ async fn join_tables_with_duplicated_column_name_not_in_on_constraint() -> Resul #[tokio::test] async fn avro_query() { let mut ctx = ExecutionContext::new(); - register_alltypes_avro(&mut ctx); + register_alltypes_avro(&mut ctx).await; // NOTE that string_col is actually a binary column and does not have the UTF8 logical type // so we need an explicit cast let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain"; @@ -5048,6 +5051,7 @@ async fn avro_query_multiple_files() { table_path.display().to_string().as_str(), AvroReadOptions::default(), ) + .await .unwrap(); // NOTE that string_col is actually a binary column and does not have the UTF8 logical type // so we need an explicit cast @@ -5089,6 +5093,7 @@ async fn avro_single_nan_schema() { &format!("{}/avro/single_nan.avro", testdata), AvroReadOptions::default(), ) + .await .unwrap(); let sql = "SELECT mycol FROM single_nan"; let plan = ctx.create_logical_plan(sql).unwrap(); @@ -5105,7 +5110,7 @@ async fn avro_single_nan_schema() { #[tokio::test] async fn avro_explain() { let mut ctx = ExecutionContext::new(); - register_alltypes_avro(&mut ctx); + register_alltypes_avro(&mut ctx).await; let sql = "EXPLAIN SELECT count(*) from alltypes_plain"; let actual = execute(&mut ctx, sql).await; @@ -5124,7 +5129,7 @@ async fn avro_explain() { \n CoalescePartitionsExec\ \n HashAggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]\ \n RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES)\ - \n AvroExec: source=Path(ARROW_TEST_DATA/avro/alltypes_plain.avro: [ARROW_TEST_DATA/avro/alltypes_plain.avro]), batch_size=8192, limit=None\ + \n AvroExec: files=[ARROW_TEST_DATA/avro/alltypes_plain.avro], batch_size=8192, limit=None\ \n", ], ]; diff --git a/datafusion/tests/statistics.rs b/datafusion/tests/statistics.rs index 7a19aa7deb69..2934d7889215 100644 --- a/datafusion/tests/statistics.rs +++ b/datafusion/tests/statistics.rs @@ -211,7 +211,7 @@ async fn sql_basic() -> Result<()> { let (stats, schema) = fully_defined(); let mut ctx = init_ctx(stats.clone(), schema)?; - let df = ctx.sql("SELECT * from stats_table").unwrap(); + let df = ctx.sql("SELECT * from stats_table").await.unwrap(); let physical_plan = ctx .create_physical_plan(&df.to_logical_plan()) @@ -229,7 +229,10 @@ async fn sql_filter() -> Result<()> { let (stats, schema) = fully_defined(); let mut ctx = init_ctx(stats, schema)?; - let df = ctx.sql("SELECT * FROM stats_table WHERE c1 = 5").unwrap(); + let df = ctx + .sql("SELECT * FROM stats_table WHERE c1 = 5") + .await + .unwrap(); let physical_plan = ctx .create_physical_plan(&df.to_logical_plan()) @@ -247,7 +250,7 @@ async fn sql_limit() -> Result<()> { let (stats, schema) = fully_defined(); let mut ctx = init_ctx(stats.clone(), schema)?; - let df = ctx.sql("SELECT * FROM stats_table LIMIT 5").unwrap(); + let df = ctx.sql("SELECT * FROM stats_table LIMIT 5").await.unwrap(); let physical_plan = ctx .create_physical_plan(&df.to_logical_plan()) .await @@ -263,7 +266,10 @@ async fn sql_limit() -> Result<()> { physical_plan.statistics() ); - let df = ctx.sql("SELECT * FROM stats_table LIMIT 100").unwrap(); + let df = ctx + .sql("SELECT * FROM stats_table LIMIT 100") + .await + .unwrap(); let physical_plan = ctx .create_physical_plan(&df.to_logical_plan()) .await @@ -281,6 +287,7 @@ async fn sql_window() -> Result<()> { let df = ctx .sql("SELECT c2, sum(c1) over (partition by c2) FROM stats_table") + .await .unwrap(); let physical_plan = ctx diff --git a/datafusion/tests/user_defined_plan.rs b/datafusion/tests/user_defined_plan.rs index 27ad901d135d..adb83ac0a202 100644 --- a/datafusion/tests/user_defined_plan.rs +++ b/datafusion/tests/user_defined_plan.rs @@ -91,7 +91,7 @@ use datafusion::logical_plan::DFSchemaRef; /// Execute the specified sql and return the resulting record batches /// pretty printed as a String. async fn exec_sql(ctx: &mut ExecutionContext, sql: &str) -> Result { - let df = ctx.sql(sql)?; + let df = ctx.sql(sql).await?; let batches = df.collect().await?; pretty_format_batches(&batches).map_err(DataFusionError::ArrowError) } @@ -216,9 +216,9 @@ async fn topk_plan() -> Result<()> { let mut ctx = setup_table(make_topk_context()).await?; let expected = vec![ - "| logical_plan after topk | TopK: k=3 |", - "| | Projection: #sales.customer_id, #sales.revenue |", - "| | TableScan: sales projection=Some([0, 1]) |", + "| logical_plan after topk | TopK: k=3 |", + "| | Projection: #sales.customer_id, #sales.revenue |", + "| | TableScan: sales projection=Some([0, 1]) |", ].join("\n"); let explain_query = format!("EXPLAIN VERBOSE {}", QUERY); diff --git a/python/src/context.rs b/python/src/context.rs index 4c47058190d8..24a2cb813045 100644 --- a/python/src/context.rs +++ b/python/src/context.rs @@ -21,6 +21,8 @@ use std::{collections::HashSet, sync::Arc}; use rand::distributions::Alphanumeric; use rand::Rng; +use tokio::runtime::Runtime; + use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -53,11 +55,16 @@ impl ExecutionContext { } /// Returns a DataFrame whose plan corresponds to the SQL statement. - fn sql(&mut self, query: &str) -> PyResult { - let df = self - .ctx - .sql(query) - .map_err(|e| -> errors::DataFusionError { e.into() })?; + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let rt = Runtime::new().unwrap(); + let df = py.allow_threads(|| { + rt.block_on(async { + self.ctx + .sql(query) + .await + .map_err(|e| -> errors::DataFusionError { e.into() }) + }) + })?; Ok(dataframe::DataFrame::new( self.ctx.state.clone(), df.to_logical_plan(), @@ -119,8 +126,13 @@ impl ExecutionContext { Ok(()) } - fn register_parquet(&mut self, name: &str, path: &str) -> PyResult<()> { - errors::wrap(self.ctx.register_parquet(name, path))?; + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let rt = Runtime::new().unwrap(); + py.allow_threads(|| { + rt.block_on(async { + errors::wrap(self.ctx.register_parquet(name, path).await) + }) + })?; Ok(()) } @@ -140,6 +152,7 @@ impl ExecutionContext { delimiter: &str, schema_infer_max_records: usize, file_extension: &str, + py: Python, ) -> PyResult<()> { let path = path .to_str() @@ -162,7 +175,12 @@ impl ExecutionContext { .file_extension(file_extension); options.schema = schema.as_ref(); - errors::wrap(self.ctx.register_csv(name, path, options))?; + let rt = Runtime::new().unwrap(); + py.allow_threads(|| { + rt.block_on(async { + errors::wrap(self.ctx.register_csv(name, path, options).await) + }) + })?; Ok(()) }