diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 35622661eaaa..25a385eaea0e 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -31,8 +31,6 @@ simd = ["datafusion/simd"] snmalloc = ["snmalloc-rs"] [dependencies] -arrow = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" } -parquet = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" } datafusion = { path = "../datafusion" } ballista = { path = "../ballista/rust/client" } structopt = { version = "0.3", default-features = false } diff --git a/benchmarks/src/bin/nyctaxi.rs b/benchmarks/src/bin/nyctaxi.rs index 005efca94885..b2a62a0d39f9 100644 --- a/benchmarks/src/bin/nyctaxi.rs +++ b/benchmarks/src/bin/nyctaxi.rs @@ -22,8 +22,9 @@ use std::path::PathBuf; use std::process; use std::time::Instant; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::util::pretty; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::util::pretty; + use datafusion::error::Result; use datafusion::execution::context::{ExecutionConfig, ExecutionContext}; diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index fd9f0525987d..543e84f33097 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -28,17 +28,21 @@ use std::{ use futures::StreamExt; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::util::pretty; use ballista::context::BallistaContext; + +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::arrow::util::pretty; + use datafusion::datasource::parquet::ParquetTable; use datafusion::datasource::{CsvFile, MemTable, TableProvider}; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_plan::LogicalPlan; use datafusion::physical_plan::collect; use datafusion::prelude::*; -use parquet::basic::Compression; -use parquet::file::properties::WriterProperties; + +use datafusion::parquet::basic::Compression; +use datafusion::parquet::file::properties::WriterProperties; use structopt::StructOpt; #[cfg(feature = "snmalloc")] @@ -149,9 +153,7 @@ async fn main() -> Result<()> { } } -async fn benchmark_datafusion( - opt: BenchmarkOpt, -) -> Result> { +async fn benchmark_datafusion(opt: BenchmarkOpt) -> Result> { println!("Running benchmarks with the following options: {:?}", opt); let config = ExecutionConfig::new() .with_concurrency(opt.concurrency) @@ -186,7 +188,7 @@ async fn benchmark_datafusion( let mut millis = vec![]; // run benchmark - let mut result: Vec = Vec::with_capacity(1); + let mut result: Vec = Vec::with_capacity(1); for i in 0..opt.iterations { let start = Instant::now(); let plan = create_logical_plan(&mut ctx, opt.query)?; @@ -299,7 +301,7 @@ async fn execute_query( ctx: &mut ExecutionContext, plan: &LogicalPlan, debug: bool, -) -> Result> { +) -> Result> { if debug { println!("Logical plan:\n{:?}", plan); } @@ -523,9 +525,8 @@ mod tests { use std::env; use std::sync::Arc; - use arrow::array::*; - use arrow::record_batch::RecordBatch; - use arrow::util::display::array_value_to_string; + use datafusion::arrow::array::*; + use datafusion::arrow::util::display::array_value_to_string; use datafusion::logical_plan::Expr; use datafusion::logical_plan::Expr::Cast; diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 929bdf208305..0445f382a25e 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -29,7 +29,6 @@ publish = false [dev-dependencies] -arrow = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" } arrow-flight = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" } datafusion = { path = "../datafusion" } prost = "0.7" diff --git a/datafusion-examples/examples/csv_sql.rs b/datafusion-examples/examples/csv_sql.rs index 63fd36d44ce4..76c87960d71d 100644 --- a/datafusion-examples/examples/csv_sql.rs +++ b/datafusion-examples/examples/csv_sql.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::util::pretty; +use datafusion::arrow::util::pretty; use datafusion::error::Result; use datafusion::prelude::*; @@ -27,7 +27,7 @@ async fn main() -> Result<()> { // create local execution context let mut ctx = ExecutionContext::new(); - let testdata = arrow::util::test_util::arrow_test_data(); + let testdata = datafusion::arrow::util::test_util::arrow_test_data(); // register csv file with the execution context ctx.register_csv( diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index cba4d87f1e0d..dcf6bc32be6b 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::util::pretty; +use datafusion::arrow::util::pretty; use datafusion::error::Result; use datafusion::prelude::*; @@ -27,7 +27,7 @@ async fn main() -> Result<()> { // create local execution context let mut ctx = ExecutionContext::new(); - let testdata = arrow::util::test_util::parquet_test_data(); + let testdata = datafusion::arrow::util::test_util::parquet_test_data(); let filename = &format!("{}/alltypes_plain.parquet", testdata); diff --git a/datafusion-examples/examples/dataframe_in_memory.rs b/datafusion-examples/examples/dataframe_in_memory.rs index de8552a3bba7..0c65a7477e97 100644 --- a/datafusion-examples/examples/dataframe_in_memory.rs +++ b/datafusion-examples/examples/dataframe_in_memory.rs @@ -17,10 +17,10 @@ use std::sync::Arc; -use arrow::array::{Int32Array, StringArray}; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::record_batch::RecordBatch; -use arrow::util::pretty; +use datafusion::arrow::array::{Int32Array, StringArray}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::arrow::util::pretty; use datafusion::datasource::MemTable; use datafusion::error::Result; diff --git a/datafusion-examples/examples/flight_client.rs b/datafusion-examples/examples/flight_client.rs index 2c2954d5a029..53347826ff89 100644 --- a/datafusion-examples/examples/flight_client.rs +++ b/datafusion-examples/examples/flight_client.rs @@ -18,8 +18,8 @@ use std::convert::TryFrom; use std::sync::Arc; -use arrow::datatypes::Schema; -use arrow::util::pretty; +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::util::pretty; use arrow_flight::flight_descriptor; use arrow_flight::flight_service_client::FlightServiceClient; @@ -31,7 +31,7 @@ use arrow_flight::{FlightDescriptor, Ticket}; /// This example is run along-side the example `flight_server`. #[tokio::main] async fn main() -> Result<(), Box> { - let testdata = arrow::util::test_util::parquet_test_data(); + let testdata = datafusion::arrow::util::test_util::parquet_test_data(); // Create Flight client let mut client = FlightServiceClient::connect("http://localhost:50051").await?; diff --git a/datafusion-examples/examples/flight_server.rs b/datafusion-examples/examples/flight_server.rs index 79660dd1871c..8496bcb18914 100644 --- a/datafusion-examples/examples/flight_server.rs +++ b/datafusion-examples/examples/flight_server.rs @@ -66,7 +66,7 @@ impl FlightService for FlightServiceImpl { let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap(); - let options = arrow::ipc::writer::IpcWriteOptions::default(); + let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default(); let schema_result = arrow_flight::utils::flight_schema_from_arrow_schema( table.schema().as_ref(), &options, @@ -87,7 +87,7 @@ impl FlightService for FlightServiceImpl { // create local execution context let mut ctx = ExecutionContext::new(); - let testdata = arrow::util::test_util::parquet_test_data(); + let testdata = datafusion::arrow::util::test_util::parquet_test_data(); // register parquet file with the execution context ctx.register_parquet( @@ -106,7 +106,7 @@ impl FlightService for FlightServiceImpl { } // add an initial FlightData message that sends schema - let options = arrow::ipc::writer::IpcWriteOptions::default(); + let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default(); let schema_flight_data = arrow_flight::utils::flight_data_from_arrow_schema( &df.schema().clone().into(), diff --git a/datafusion-examples/examples/parquet_sql.rs b/datafusion-examples/examples/parquet_sql.rs index 8043d3296c87..f679b22ceb90 100644 --- a/datafusion-examples/examples/parquet_sql.rs +++ b/datafusion-examples/examples/parquet_sql.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::util::pretty; +use datafusion::arrow::util::pretty; use datafusion::error::Result; use datafusion::prelude::*; @@ -27,7 +27,7 @@ async fn main() -> Result<()> { // create local execution context let mut ctx = ExecutionContext::new(); - let testdata = arrow::util::test_util::parquet_test_data(); + let testdata = datafusion::arrow::util::test_util::parquet_test_data(); // register parquet file with the execution context ctx.register_parquet( diff --git a/datafusion-examples/examples/simple_udaf.rs b/datafusion-examples/examples/simple_udaf.rs index 8086dfc47de4..49d09ff43155 100644 --- a/datafusion-examples/examples/simple_udaf.rs +++ b/datafusion-examples/examples/simple_udaf.rs @@ -17,7 +17,7 @@ /// In this example we will declare a single-type, single return type UDAF that computes the geometric mean. /// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean -use arrow::{ +use datafusion::arrow::{ array::Float32Array, array::Float64Array, datatypes::DataType, record_batch::RecordBatch, }; @@ -28,7 +28,7 @@ use std::sync::Arc; // create local execution context with an in-memory table fn create_context() -> Result { - use arrow::datatypes::{Field, Schema}; + use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::datasource::MemTable; // define a schema. let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)])); diff --git a/datafusion-examples/examples/simple_udf.rs b/datafusion-examples/examples/simple_udf.rs index bfef1089a634..0ffec44a3720 100644 --- a/datafusion-examples/examples/simple_udf.rs +++ b/datafusion-examples/examples/simple_udf.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::{ +use datafusion::arrow::{ array::{ArrayRef, Float32Array, Float64Array}, datatypes::DataType, record_batch::RecordBatch, @@ -28,7 +28,7 @@ use std::sync::Arc; // create local execution context with an in-memory table fn create_context() -> Result { - use arrow::datatypes::{Field, Schema}; + use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::datasource::MemTable; // define a schema. let schema = Arc::new(Schema::new(vec![ diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs index 44a8a686a496..252d168114ad 100644 --- a/datafusion/src/lib.rs +++ b/datafusion/src/lib.rs @@ -183,7 +183,6 @@ //! //! you can find examples of each of them in examples section. -extern crate arrow; extern crate sqlparser; pub mod catalog; @@ -200,6 +199,10 @@ pub mod scalar; pub mod sql; pub mod variable; +// re-export dependencies from arrow-rs to minimise version maintenance for crate users +pub use arrow; +pub use parquet; + #[cfg(test)] pub mod test;